Index | Thread | Search

From:
Alexander Bluhm <alexander.bluhm@gmx.net>
Subject:
Re: bnxt(4) tso
To:
Jonathan Matthew <jonathan@d14n.org>
Cc:
Hrvoje Popovski <hrvoje@srce.hr>, Jonathan Matthew <jmatthew@openbsd.org>, tech@openbsd.org
Date:
Wed, 17 Jan 2024 15:33:49 +0100

Download raw body.

Thread
  • Alexander Bluhm:

    bnxt(4) tso

    • Jonathan Matthew:

      bnxt(4) tso

      • Alexander Bluhm:

        bnxt(4) tso

On Mon, Jan 15, 2024 at 07:54:05PM +1000, Jonathan Matthew wrote:
> On Fri, Jan 12, 2024 at 10:15:02PM +0100, Alexander Bluhm wrote:
> > On Fri, Jan 12, 2024 at 11:27:31AM +0100, Hrvoje Popovski wrote:
> > > is it maybe because of mtu. I've tested even that and it's working just
> > > fine ..
> > 
> > I have identified the test that causes problems.
> > 
> > netbench.pl -v -b1000000 -croot@lt40 -sroot@lt43 -A10.10.21.20 -a10.10.22.40 -t10 tcpsplice
> > 
> > It runs a single TCP connection from Linux to OpenBSD bnxt0, splices
> > it to another TCP connection that goes via bnxt1 to a second Linux
> > machine.
> 
> Thanks for tracking this down!  I worked out that TSO was generating packets
> with 32 DMA segments, but after looking at if_bnxtreg.h again I realised
> 31 is the most we can do.  r1.44 of if_bnxt.c fixes the driver so it only
> allows 31 segments, and writes the right value into the tx ring slot when
> this happens.
> 
> With that fixed, I can no longer reproduce this problem with socket splicing.
> Updated diff below, addressing your comments from elsewhere in the thread.

My tests work now.
http://bluhm.genua.de/netlink/results/2024-01-16T10%3A37%3A06Z/netlink.html

The failures with UDP socket splicing also happen elsewere and seem
unrelated.

Tested with vlan and jumbo frames.

OK bluhm@

> Index: if_bnxt.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_bnxt.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 if_bnxt.c
> --- if_bnxt.c	15 Jan 2024 08:56:45 -0000	1.44
> +++ if_bnxt.c	15 Jan 2024 09:21:25 -0000
> @@ -68,6 +68,7 @@
>  
>  #include <net/if.h>
>  #include <net/if_media.h>
> +#include <net/route.h>
>  #include <net/toeplitz.h>
>  
>  #if NBPFILTER > 0
> @@ -75,7 +76,12 @@
>  #endif
>  
>  #include <netinet/in.h>
> +#include <netinet/ip.h>
> +#include <netinet/ip6.h>
>  #include <netinet/if_ether.h>
> +#include <netinet/tcp.h>
> +#include <netinet/tcp_timer.h>
> +#include <netinet/tcp_var.h>
>  
>  #define BNXT_HWRM_BAR		0x10
>  #define BNXT_DOORBELL_BAR	0x18
> @@ -642,6 +649,7 @@ bnxt_attach(struct device *parent, struc
>  	ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_CSUM_IPv4 |
>  	    IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv6 |
>  	    IFCAP_CSUM_TCPv6;
> +	ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
>  #if NVLAN > 0
>  	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
>  #endif
> @@ -929,7 +937,7 @@ bnxt_queue_up(struct bnxt_softc *sc, str
>  
>  	for (i = 0; i < tx->tx_ring.ring_size; i++) {
>  		bs = &tx->tx_slots[i];
> -		if (bus_dmamap_create(sc->sc_dmat, BNXT_MAX_MTU, BNXT_MAX_TX_SEGS,
> +		if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES, BNXT_MAX_TX_SEGS,
>  		    BNXT_MAX_MTU, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
>  		    &bs->bs_map) != 0) {
>  			printf("%s: failed to allocate tx dma maps\n",
> @@ -1337,11 +1345,12 @@ bnxt_start(struct ifqueue *ifq)
>  	struct bnxt_tx_queue *tx = ifq->ifq_softc;
>  	struct bnxt_softc *sc = tx->tx_softc;
>  	struct bnxt_slot *bs;
> +	struct ether_extracted ext;
>  	bus_dmamap_t map;
>  	struct mbuf *m;
>  	u_int idx, free, used, laststart;
> -	uint16_t txflags;
> -	int i;
> +	uint16_t txflags, lflags;
> +	int i, slen;
>  
>  	txring = (struct tx_bd_short *)BNXT_DMA_KVA(tx->tx_ring_mem);
>  
> @@ -1385,12 +1394,16 @@ bnxt_start(struct ifqueue *ifq)
>  		txring[idx].len = htole16(map->dm_segs[0].ds_len);
>  		txring[idx].opaque = tx->tx_prod;
>  		txring[idx].addr = htole64(map->dm_segs[0].ds_addr);
> +		if (m->m_pkthdr.csum_flags & M_TCP_TSO)
> +			slen = m->m_pkthdr.ph_mss;
> +		else
> +			slen = map->dm_mapsize;
>  
> -		if (map->dm_mapsize < 512)
> +		if (slen < 512)
>  			txflags = TX_BD_LONG_FLAGS_LHINT_LT512;
> -		else if (map->dm_mapsize < 1024)
> +		else if (slen < 1024)
>  			txflags = TX_BD_LONG_FLAGS_LHINT_LT1K;
> -		else if (map->dm_mapsize < 2048)
> +		else if (slen < 2048)
>  			txflags = TX_BD_LONG_FLAGS_LHINT_LT2K;
>  		else
>  			txflags = TX_BD_LONG_FLAGS_LHINT_GTE2K;
> @@ -1409,12 +1422,44 @@ bnxt_start(struct ifqueue *ifq)
>  		/* long tx descriptor */
>  		txhi = (struct tx_bd_long_hi *)&txring[idx];
>  		memset(txhi, 0, sizeof(*txhi));
> -		txflags = 0;
> -		if (m->m_pkthdr.csum_flags & (M_UDP_CSUM_OUT | M_TCP_CSUM_OUT))
> -			txflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM;
> -		if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
> -			txflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM;
> -		txhi->lflags = htole16(txflags);
> +
> +		lflags = 0;
> +		if (m->m_pkthdr.csum_flags & M_TCP_TSO) {
> +			int hdrsize;
> +			int outlen;
> +			int paylen;
> +
> +			ether_extract_headers(m, &ext);
> +			if (ext.tcp) {
> +				lflags |= TX_BD_LONG_LFLAGS_LSO;
> +				hdrsize = sizeof(*ext.eh);
> +				if (ext.ip4)
> +					hdrsize += ext.ip4->ip_hl << 2;
> +				else if (ext.ip6)
> +					hdrsize += sizeof(*ext.ip6);
> +				else
> +					tcpstat_inc(tcps_outbadtso);
> +
> +				hdrsize += ext.tcp->th_off << 2;
> +				txhi->hdr_size = htole16(hdrsize / 2);
> +
> +				outlen = m->m_pkthdr.ph_mss;
> +				txhi->mss = htole32(outlen);
> +
> +				paylen = m->m_pkthdr.len - hdrsize;
> +				tcpstat_add(tcps_outpkttso,
> +				    (paylen + outlen + 1) / outlen);
> +			} else {
> +				tcpstat_inc(tcps_outbadtso);
> +			}
> +		} else {
> +			if (m->m_pkthdr.csum_flags & (M_UDP_CSUM_OUT |
> +			    M_TCP_CSUM_OUT))
> +				lflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM;
> +			if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
> +				lflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM;
> +		}
> +		txhi->lflags = htole16(lflags);
>  
>  #if NVLAN > 0
>  		if (m->m_flags & M_VLANTAG) {