From: jan@openbsd.org Subject: vmx(4): tcp segmentation offload To: tech@openbsd.org Date: Fri, 16 Feb 2024 11:01:16 +0100 Hi, This diff implements TCP segmentation offload for vmx(4). I tested it on an ESXi 8.0 with Linux and external Network interfaces. If you attach an XL710 to an ESXi Host an OpenBSD VM effortlessly send out 25 Gbit/s TCP single stream with this diff. Tests are welcome! bye, Jan Index: dev/pci/if_vmx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v diff -u -p -r1.81 if_vmx.c --- dev/pci/if_vmx.c 15 Feb 2024 13:24:45 -0000 1.81 +++ dev/pci/if_vmx.c 16 Feb 2024 08:42:48 -0000 @@ -33,11 +33,14 @@ #include #include #include +#include #include #include #include #include +#include +#include #include #include @@ -406,6 +409,8 @@ vmxnet3_attach(struct device *parent, st ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6; } + ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6; + #if NVLAN > 0 if (sc->sc_ds->upt_features & UPT1_F_VLAN) ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; @@ -578,8 +583,8 @@ vmxnet3_alloc_txring(struct vmxnet3_soft comp_ring->txcd = VMX_DMA_KVA(&comp_ring->dmamem); for (idx = 0; idx < NTXDESC; idx++) { - if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, NTXSEGS, - VMXNET3_TX_LEN_M + 1, 0, BUS_DMA_NOWAIT, &ring->dmap[idx])) + if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES, NTXSEGS, + VMXNET3_TX_LEN_M, 0, BUS_DMA_NOWAIT, &ring->dmap[idx])) return -1; } @@ -1439,13 +1444,41 @@ vmxnet3_tx_offload(struct vmxnet3_txdesc offset = hdrlen + offsetof(struct tcphdr, th_sum); else if (ext.udp) offset = hdrlen + offsetof(struct udphdr, uh_sum); + else + return; + + if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) { + hdrlen &= VMXNET3_TX_HLEN_M; + offset &= VMXNET3_TX_OP_M; + + sop->tx_word3 |= htole32(VMXNET3_OM_CSUM << VMXNET3_TX_OM_S); + sop->tx_word3 |= htole32(hdrlen << VMXNET3_TX_HLEN_S); + sop->tx_word2 |= htole32(offset << VMXNET3_TX_OP_S); + + return; + } + + /* + * TCP Segmentation Offload + */ + if (ext.tcp == NULL) { + tcpstat_inc(tcps_outbadtso); + return; + } + + if (ext.ip4) + ext.ip4->ip_sum = 0; + + hdrlen += ext.tcphlen; hdrlen &= VMXNET3_TX_HLEN_M; - offset &= VMXNET3_TX_OP_M; - sop->tx_word3 |= htole32(VMXNET3_OM_CSUM << VMXNET3_TX_OM_S); + sop->tx_word3 |= htole32(VMXNET3_OM_TSO << VMXNET3_TX_OM_S); sop->tx_word3 |= htole32(hdrlen << VMXNET3_TX_HLEN_S); - sop->tx_word2 |= htole32(offset << VMXNET3_TX_OP_S); + sop->tx_word2 |= htole32(m->m_pkthdr.ph_mss << VMXNET3_TX_OP_S); + + tcpstat_add(tcps_outpkttso, (m->m_pkthdr.len - hdrlen + + m->m_pkthdr.ph_mss - 1) / m->m_pkthdr.ph_mss); } void