Download raw body.
vmx(4): tcp segmentation offload
Hi,
This diff implements TCP segmentation offload for vmx(4). I tested it
on an ESXi 8.0 with Linux and external Network interfaces. If you
attach an XL710 to an ESXi Host an OpenBSD VM effortlessly send out 25
Gbit/s TCP single stream with this diff.
Tests are welcome!
bye,
Jan
Index: dev/pci/if_vmx.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v
diff -u -p -r1.81 if_vmx.c
--- dev/pci/if_vmx.c 15 Feb 2024 13:24:45 -0000 1.81
+++ dev/pci/if_vmx.c 16 Feb 2024 08:42:48 -0000
@@ -33,11 +33,14 @@
#include <net/if.h>
#include <net/toeplitz.h>
#include <net/if_media.h>
+#include <net/route.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
#include <netinet/udp.h>
#include <machine/bus.h>
@@ -406,6 +409,8 @@ vmxnet3_attach(struct device *parent, st
ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
}
+ ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
+
#if NVLAN > 0
if (sc->sc_ds->upt_features & UPT1_F_VLAN)
ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
@@ -578,8 +583,8 @@ vmxnet3_alloc_txring(struct vmxnet3_soft
comp_ring->txcd = VMX_DMA_KVA(&comp_ring->dmamem);
for (idx = 0; idx < NTXDESC; idx++) {
- if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, NTXSEGS,
- VMXNET3_TX_LEN_M + 1, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
+ if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES, NTXSEGS,
+ VMXNET3_TX_LEN_M, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
return -1;
}
@@ -1439,13 +1444,41 @@ vmxnet3_tx_offload(struct vmxnet3_txdesc
offset = hdrlen + offsetof(struct tcphdr, th_sum);
else if (ext.udp)
offset = hdrlen + offsetof(struct udphdr, uh_sum);
+ else
+ return;
+
+ if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+ hdrlen &= VMXNET3_TX_HLEN_M;
+ offset &= VMXNET3_TX_OP_M;
+
+ sop->tx_word3 |= htole32(VMXNET3_OM_CSUM << VMXNET3_TX_OM_S);
+ sop->tx_word3 |= htole32(hdrlen << VMXNET3_TX_HLEN_S);
+ sop->tx_word2 |= htole32(offset << VMXNET3_TX_OP_S);
+
+ return;
+ }
+
+ /*
+ * TCP Segmentation Offload
+ */
+ if (ext.tcp == NULL) {
+ tcpstat_inc(tcps_outbadtso);
+ return;
+ }
+
+ if (ext.ip4)
+ ext.ip4->ip_sum = 0;
+
+ hdrlen += ext.tcphlen;
hdrlen &= VMXNET3_TX_HLEN_M;
- offset &= VMXNET3_TX_OP_M;
- sop->tx_word3 |= htole32(VMXNET3_OM_CSUM << VMXNET3_TX_OM_S);
+ sop->tx_word3 |= htole32(VMXNET3_OM_TSO << VMXNET3_TX_OM_S);
sop->tx_word3 |= htole32(hdrlen << VMXNET3_TX_HLEN_S);
- sop->tx_word2 |= htole32(offset << VMXNET3_TX_OP_S);
+ sop->tx_word2 |= htole32(m->m_pkthdr.ph_mss << VMXNET3_TX_OP_S);
+
+ tcpstat_add(tcps_outpkttso, (m->m_pkthdr.len - hdrlen +
+ m->m_pkthdr.ph_mss - 1) / m->m_pkthdr.ph_mss);
}
void
vmx(4): tcp segmentation offload