Download raw body.
bnxt(4) tso
On Thu, Jan 11, 2024 at 10:35:12AM +1000, Jonathan Matthew wrote:
> On Tue, Jan 09, 2024 at 09:30:41PM +0100, Jan Klemkow wrote:
> > On Tue, Jan 09, 2024 at 03:01:23PM +1000, Jonathan Matthew wrote:
> > > This implements TSO for bnxt(4). To do TSO, the nic needs the
> > > MSS and the size of all packet headers in 16-bit units. The
> > > bits in the tx descriptor that indicate the packet size also need
> > > to be set based on the MSS.
> > >
> > > Some testing shows that it makes tcpbench print much bigger numbers.
> >
> > Your diff also lets tcpbench print bigger numbers in my setup :-)
> >
> > See my comment below.
>
> > > +
> > > + txhi->hdr_size = htole16(hdrsize / 2);
> > > + txhi->mss = htole32(m->m_pkthdr.ph_mss);
> >
> > We something like this from ix(4) and ixl(4) here:
> > tcpstat_add(tcps_outpkttso,
> > (paylen + outlen - 1) / outlen);
> >
> > Thus, the stats behaves similar over all drivers.
> >
>
> Good point. ok with that fixed?
Usually I try to avoid a TSO panic in a driver and just increment
tcpstat_inc(tcps_outbadtso). Note that ether_extract_headers()
cannot trigger your panic anyway.
Should hdrsize, outlen, paylen habe type uint32_t? They are all
unsigned and uint32_t is their type in ix.
I have thrown your diff on my test setup.
bluhm
> Index: if_bnxt.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_bnxt.c,v
> retrieving revision 1.43
> diff -u -p -r1.43 if_bnxt.c
> --- if_bnxt.c 10 Jan 2024 05:06:00 -0000 1.43
> +++ if_bnxt.c 11 Jan 2024 00:28:53 -0000
> @@ -68,6 +68,7 @@
>
> #include <net/if.h>
> #include <net/if_media.h>
> +#include <net/route.h>
> #include <net/toeplitz.h>
>
> #if NBPFILTER > 0
> @@ -75,7 +76,12 @@
> #endif
>
> #include <netinet/in.h>
> +#include <netinet/ip.h>
> +#include <netinet/ip6.h>
> #include <netinet/if_ether.h>
> +#include <netinet/tcp.h>
> +#include <netinet/tcp_timer.h>
> +#include <netinet/tcp_var.h>
>
> #define BNXT_HWRM_BAR 0x10
> #define BNXT_DOORBELL_BAR 0x18
> @@ -642,6 +649,7 @@ bnxt_attach(struct device *parent, struc
> ifp->if_capabilities = IFCAP_VLAN_MTU | IFCAP_CSUM_IPv4 |
> IFCAP_CSUM_UDPv4 | IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv6 |
> IFCAP_CSUM_TCPv6;
> + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
> #if NVLAN > 0
> ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
> #endif
> @@ -929,7 +937,7 @@ bnxt_queue_up(struct bnxt_softc *sc, str
>
> for (i = 0; i < tx->tx_ring.ring_size; i++) {
> bs = &tx->tx_slots[i];
> - if (bus_dmamap_create(sc->sc_dmat, BNXT_MAX_MTU, BNXT_MAX_TX_SEGS,
> + if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES, BNXT_MAX_TX_SEGS,
> BNXT_MAX_MTU, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
> &bs->bs_map) != 0) {
> printf("%s: failed to allocate tx dma maps\n",
> @@ -1337,11 +1345,12 @@ bnxt_start(struct ifqueue *ifq)
> struct bnxt_tx_queue *tx = ifq->ifq_softc;
> struct bnxt_softc *sc = tx->tx_softc;
> struct bnxt_slot *bs;
> + struct ether_extracted ext;
> bus_dmamap_t map;
> struct mbuf *m;
> u_int idx, free, used, laststart;
> - uint16_t txflags;
> - int i;
> + uint16_t txflags, lflags;
> + int i, slen;
>
> txring = (struct tx_bd_short *)BNXT_DMA_KVA(tx->tx_ring_mem);
>
> @@ -1385,12 +1394,16 @@ bnxt_start(struct ifqueue *ifq)
> txring[idx].len = htole16(map->dm_segs[0].ds_len);
> txring[idx].opaque = tx->tx_prod;
> txring[idx].addr = htole64(map->dm_segs[0].ds_addr);
> + if (m->m_pkthdr.csum_flags & M_TCP_TSO)
> + slen = m->m_pkthdr.ph_mss;
> + else
> + slen = map->dm_mapsize;
>
> - if (map->dm_mapsize < 512)
> + if (slen < 512)
> txflags = TX_BD_LONG_FLAGS_LHINT_LT512;
> - else if (map->dm_mapsize < 1024)
> + else if (slen < 1024)
> txflags = TX_BD_LONG_FLAGS_LHINT_LT1K;
> - else if (map->dm_mapsize < 2048)
> + else if (slen < 2048)
> txflags = TX_BD_LONG_FLAGS_LHINT_LT2K;
> else
> txflags = TX_BD_LONG_FLAGS_LHINT_GTE2K;
> @@ -1408,12 +1421,43 @@ bnxt_start(struct ifqueue *ifq)
> /* long tx descriptor */
> txhi = (struct tx_bd_long_hi *)&txring[idx];
> memset(txhi, 0, sizeof(*txhi));
> - txflags = 0;
> - if (m->m_pkthdr.csum_flags & (M_UDP_CSUM_OUT | M_TCP_CSUM_OUT))
> - txflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM;
> - if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
> - txflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM;
> - txhi->lflags = htole16(txflags);
> +
> + lflags = 0;
> + if (m->m_pkthdr.csum_flags & M_TCP_TSO) {
> + int hdrsize;
> + int outlen;
> + int paylen;
> +
> + ether_extract_headers(m, &ext);
> + if (ext.tcp) {
> + lflags |= TX_BD_LONG_LFLAGS_LSO;
> + hdrsize = sizeof(*ext.eh);
> + if (ext.ip4)
> + hdrsize += ext.ip4->ip_hl << 2;
> + else if (ext.ip6)
> + hdrsize += sizeof(*ext.ip6);
> + else
> + panic("TSO set for non-IP packet");
> + hdrsize += ext.tcp->th_off << 2;
> + txhi->hdr_size = htole16(hdrsize / 2);
> +
> + outlen = m->m_pkthdr.ph_mss;
> + txhi->mss = htole32(outlen);
> +
> + paylen = m->m_pkthdr.len - hdrsize;
> + tcpstat_add(tcps_outpkttso,
> + (paylen + outlen + 1) / outlen);
> + } else {
> + tcpstat_inc(tcps_outbadtso);
> + }
> + } else {
> + if (m->m_pkthdr.csum_flags & (M_UDP_CSUM_OUT |
> + M_TCP_CSUM_OUT))
> + lflags |= TX_BD_LONG_LFLAGS_TCP_UDP_CHKSUM;
> + if (m->m_pkthdr.csum_flags & M_IPV4_CSUM_OUT)
> + lflags |= TX_BD_LONG_LFLAGS_IP_CHKSUM;
> + }
> + txhi->lflags = htole16(lflags);
>
> #if NVLAN > 0
> if (m->m_flags & M_VLANTAG) {
bnxt(4) tso