Index | Thread | Search

From:
Jonathan Matthew <jonathan@d14n.org>
Subject:
Re: bnxt(4) tso
To:
Jan Klemkow <j.klemkow@wemelug.de>
Cc:
tech@openbsd.org
Date:
Thu, 11 Jan 2024 10:35:12 +1000

Download raw body.

Thread
On Tue, Jan 09, 2024 at 09:30:41PM +0100, Jan Klemkow wrote:
> On Tue, Jan 09, 2024 at 03:01:23PM +1000, Jonathan Matthew wrote:
> > This implements TSO for bnxt(4).  To do TSO, the nic needs the
> > MSS and the size of all packet headers in 16-bit units.  The
> > bits in the tx descriptor that indicate the packet size also need
> > to be set based on the MSS.
> > 
> > Some testing shows that it makes tcpbench print much bigger numbers.
> 
> Your diff also lets tcpbench print bigger numbers in my setup :-)
> 
> See my comment below.

> > +
> > +				txhi->hdr_size = htole16(hdrsize / 2);
> > +				txhi->mss = htole32(m->m_pkthdr.ph_mss);
> 
> We something like this from ix(4) and ixl(4) here:
> 				tcpstat_add(tcps_outpkttso,
> 				    (paylen + outlen - 1) / outlen);
> 
> Thus, the stats behaves similar over all drivers.
>

Good point.  ok with that fixed?


Index: if_bnxt.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_bnxt.c,v
retrieving revision 1.43
diff -u -p -r1.43 if_bnxt.c
--- if_bnxt.c	10 Jan 2024 05:06:00 -0000	1.43
+++ if_bnxt.c	11 Jan 2024 00:28:53 -0000
@@ -68,6 +68,7 @@
 
 #include <net/if.h>
 #include <net/if_media.h>
+#include <net/route.h>
 #include <net/toeplitz.h>
 
 #if NBPFILTER > 0
@@ -75,7 +76,12 @@
 #endif
 
 #include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
 #include <netinet/if_ether.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
 
 #define BNXT_HWRM_BAR		0x10
 #define BNXT_DOORBELL_BAR	0x18
@@ -642,6 +649,7 @@ bnxt_attach(struct device *parent, struc
 	ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_CSUM_IPv4 |
 	    IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv6 |
 	    IFCAP_CSUM_TCPv6;
+	ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
 #if NVLAN > 0
 	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
 #endif
@@ -929,7 +937,7 @@ bnxt_queue_up(struct bnxt_softc *sc, str
 
 	for (i = 0; i < tx->tx_ring.ring_size; i++) {
 		bs = &tx->tx_slots[i];
-		if (bus_dmamap_create(sc->sc_dmat, BNXT_MAX_MTU, BNXT_MAX_TX_SEGS,
+		if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES, BNXT_MAX_TX_SEGS,
 		    BNXT_MAX_MTU, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
 		    &bs->bs_map) != 0) {
 			printf("%s: failed to allocate tx dma maps\n",
@@ -1337,11 +1345,12 @@ bnxt_start(struct ifqueue *ifq)
 	struct bnxt_tx_queue *tx = ifq->ifq_softc;
 	struct bnxt_softc *sc = tx->tx_softc;
 	struct bnxt_slot *bs;
+	struct ether_extracted ext;
 	bus_dmamap_t map;
 	struct mbuf *m;
 	u_int idx, free, used, laststart;
-	uint16_t txflags;
-	int i;
+	uint16_t txflags, lflags;
+	int i, slen;
 
 	txring = (struct tx_bd_short *)BNXT_DMA_KVA(tx->tx_ring_mem);
 
@@ -1385,12 +1394,16 @@ bnxt_start(struct ifqueue *ifq)
 		txring[idx].len = htole16(map->dm_segs[0].ds_len);
 		txring[idx].opaque = tx->tx_prod;
 		txring[idx].addr = htole64(map->dm_segs[0].ds_addr);
+		if (m->m_pkthdr.csum_flags & M_TCP_TSO)
+			slen = m->m_pkthdr.ph_mss;
+		else
+			slen = map->dm_mapsize;
 
-		if (map->dm_mapsize < 512)
+		if (slen < 512)
 			txflags = TX_BD_LONG_FLAGS_LHINT_LT512;
-		else if (map->dm_mapsize < 1024)
+		else if (slen < 1024)
 			txflags = TX_BD_LONG_FLAGS_LHINT_LT1K;
-		else if (map->dm_mapsize < 2048)
+		else if (slen < 2048)
 			txflags = TX_BD_LONG_FLAGS_LHINT_LT2K;
 		else
 			txflags = TX_BD_LONG_FLAGS_LHINT_GTE2K;
@@ -1408,12 +1421,43 @@ bnxt_start(struct ifqueue *ifq)
 		/* long tx descriptor */
 		txhi = (struct tx_bd_long_hi *)&txring[idx];
 		memset(txhi, 0, sizeof(*txhi));
-		txflags = 0;
-		if (m->m_pkthdr.csum_flags & (M_UDP_CSUM_OUT | M_TCP_CSUM_OUT))
-			txflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM;
-		if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
-			txflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM;
-		txhi->lflags = htole16(txflags);
+
+		lflags = 0;
+		if (m->m_pkthdr.csum_flags & M_TCP_TSO) {
+			int hdrsize;
+			int outlen;
+			int paylen;
+
+			ether_extract_headers(m, &ext);
+			if (ext.tcp) {
+				lflags |= TX_BD_LONG_LFLAGS_LSO;
+				hdrsize = sizeof(*ext.eh);
+				if (ext.ip4)
+					hdrsize += ext.ip4->ip_hl << 2;
+				else if (ext.ip6)
+					hdrsize += sizeof(*ext.ip6);
+				else
+					panic("TSO set for non-IP packet");
+				hdrsize += ext.tcp->th_off << 2;
+				txhi->hdr_size = htole16(hdrsize / 2);
+
+				outlen = m->m_pkthdr.ph_mss;
+				txhi->mss = htole32(outlen);
+
+				paylen = m->m_pkthdr.len - hdrsize;
+				tcpstat_add(tcps_outpkttso,
+				    (paylen + outlen + 1) / outlen);
+			} else {
+				tcpstat_inc(tcps_outbadtso);
+			}
+		} else {
+			if (m->m_pkthdr.csum_flags & (M_UDP_CSUM_OUT |
+			    M_TCP_CSUM_OUT))
+				lflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM;
+			if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
+				lflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM;
+		}
+		txhi->lflags = htole16(lflags);
 
 #if NVLAN > 0
 		if (m->m_flags & M_VLANTAG) {