From: Jonathan Matthew Subject: Re: bnxt(4) tso To: Jan Klemkow Cc: tech@openbsd.org Date: Thu, 11 Jan 2024 10:35:12 +1000 On Tue, Jan 09, 2024 at 09:30:41PM +0100, Jan Klemkow wrote: > On Tue, Jan 09, 2024 at 03:01:23PM +1000, Jonathan Matthew wrote: > > This implements TSO for bnxt(4). To do TSO, the nic needs the > > MSS and the size of all packet headers in 16-bit units. The > > bits in the tx descriptor that indicate the packet size also need > > to be set based on the MSS. > > > > Some testing shows that it makes tcpbench print much bigger numbers. > > Your diff also lets tcpbench print bigger numbers in my setup :-) > > See my comment below. > > + > > + txhi->hdr_size = htole16(hdrsize / 2); > > + txhi->mss = htole32(m->m_pkthdr.ph_mss); > > We something like this from ix(4) and ixl(4) here: > tcpstat_add(tcps_outpkttso, > (paylen + outlen - 1) / outlen); > > Thus, the stats behaves similar over all drivers. > Good point. ok with that fixed? Index: if_bnxt.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_bnxt.c,v retrieving revision 1.43 diff -u -p -r1.43 if_bnxt.c --- if_bnxt.c 10 Jan 2024 05:06:00 -0000 1.43 +++ if_bnxt.c 11 Jan 2024 00:28:53 -0000 @@ -68,6 +68,7 @@ #include #include +#include #include #if NBPFILTER > 0 @@ -75,7 +76,12 @@ #endif #include +#include +#include #include +#include +#include +#include #define BNXT_HWRM_BAR 0x10 #define BNXT_DOORBELL_BAR 0x18 @@ -642,6 +649,7 @@ bnxt_attach(struct device *parent, struc ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_CSUM_IPv4 | IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv6 | IFCAP_CSUM_TCPv6; + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; #if NVLAN > 0 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; #endif @@ -929,7 +937,7 @@ bnxt_queue_up(struct bnxt_softc *sc, str for (i = 0; i < tx->tx_ring.ring_size; i++) { bs = &tx->tx_slots[i]; - if (bus_dmamap_create(sc->sc_dmat, BNXT_MAX_MTU, BNXT_MAX_TX_SEGS, + if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES, BNXT_MAX_TX_SEGS, BNXT_MAX_MTU, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &bs->bs_map) != 0) { printf("%s: failed to allocate tx dma maps\n", @@ -1337,11 +1345,12 @@ bnxt_start(struct ifqueue *ifq) struct bnxt_tx_queue *tx = ifq->ifq_softc; struct bnxt_softc *sc = tx->tx_softc; struct bnxt_slot *bs; + struct ether_extracted ext; bus_dmamap_t map; struct mbuf *m; u_int idx, free, used, laststart; - uint16_t txflags; - int i; + uint16_t txflags, lflags; + int i, slen; txring = (struct tx_bd_short *)BNXT_DMA_KVA(tx->tx_ring_mem); @@ -1385,12 +1394,16 @@ bnxt_start(struct ifqueue *ifq) txring[idx].len = htole16(map->dm_segs[0].ds_len); txring[idx].opaque = tx->tx_prod; txring[idx].addr = htole64(map->dm_segs[0].ds_addr); + if (m->m_pkthdr.csum_flags & M_TCP_TSO) + slen = m->m_pkthdr.ph_mss; + else + slen = map->dm_mapsize; - if (map->dm_mapsize < 512) + if (slen < 512) txflags = TX_BD_LONG_FLAGS_LHINT_LT512; - else if (map->dm_mapsize < 1024) + else if (slen < 1024) txflags = TX_BD_LONG_FLAGS_LHINT_LT1K; - else if (map->dm_mapsize < 2048) + else if (slen < 2048) txflags = TX_BD_LONG_FLAGS_LHINT_LT2K; else txflags = TX_BD_LONG_FLAGS_LHINT_GTE2K; @@ -1408,12 +1421,43 @@ bnxt_start(struct ifqueue *ifq) /* long tx descriptor */ txhi = (struct tx_bd_long_hi *)&txring[idx]; memset(txhi, 0, sizeof(*txhi)); - txflags = 0; - if (m->m_pkthdr.csum_flags & (M_UDP_CSUM_OUT | M_TCP_CSUM_OUT)) - txflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM; - if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) - txflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM; - txhi->lflags = htole16(txflags); + + lflags = 0; + if (m->m_pkthdr.csum_flags & M_TCP_TSO) { + int hdrsize; + int outlen; + int paylen; + + ether_extract_headers(m, &ext); + if (ext.tcp) { + lflags |= TX_BD_LONG_LFLAGS_LSO; + hdrsize = sizeof(*ext.eh); + if (ext.ip4) + hdrsize += ext.ip4->ip_hl << 2; + else if (ext.ip6) + hdrsize += sizeof(*ext.ip6); + else + panic("TSO set for non-IP packet"); + hdrsize += ext.tcp->th_off << 2; + txhi->hdr_size = htole16(hdrsize / 2); + + outlen = m->m_pkthdr.ph_mss; + txhi->mss = htole32(outlen); + + paylen = m->m_pkthdr.len - hdrsize; + tcpstat_add(tcps_outpkttso, + (paylen + outlen + 1) / outlen); + } else { + tcpstat_inc(tcps_outbadtso); + } + } else { + if (m->m_pkthdr.csum_flags & (M_UDP_CSUM_OUT | + M_TCP_CSUM_OUT)) + lflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM; + if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT) + lflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM; + } + txhi->lflags = htole16(lflags); #if NVLAN > 0 if (m->m_flags & M_VLANTAG) {