Index | Thread | Search

From:
jan@openbsd.org
Subject:
vmx(4): tcp segmentation offload
To:
tech@openbsd.org
Date:
Fri, 16 Feb 2024 11:01:16 +0100

Download raw body.

Thread
  • jan@openbsd.org:

    vmx(4): tcp segmentation offload

Hi,

This diff implements TCP segmentation offload for vmx(4).  I tested it
on an ESXi 8.0 with Linux and external Network interfaces.  If you
attach an XL710 to an ESXi Host an OpenBSD VM effortlessly send out 25
Gbit/s TCP single stream with this diff.

Tests are welcome!

bye,
Jan

Index: dev/pci/if_vmx.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v
diff -u -p -r1.81 if_vmx.c
--- dev/pci/if_vmx.c	15 Feb 2024 13:24:45 -0000	1.81
+++ dev/pci/if_vmx.c	16 Feb 2024 08:42:48 -0000
@@ -33,11 +33,14 @@
 #include <net/if.h>
 #include <net/toeplitz.h>
 #include <net/if_media.h>
+#include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 
 #include <machine/bus.h>
@@ -406,6 +409,8 @@ vmxnet3_attach(struct device *parent, st
 		ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
 	}
 
+	ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
+
 #if NVLAN > 0
 	if (sc->sc_ds->upt_features & UPT1_F_VLAN)
 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
@@ -578,8 +583,8 @@ vmxnet3_alloc_txring(struct vmxnet3_soft
 	comp_ring->txcd = VMX_DMA_KVA(&comp_ring->dmamem);
 
 	for (idx = 0; idx < NTXDESC; idx++) {
-		if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, NTXSEGS,
-		    VMXNET3_TX_LEN_M + 1, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
+		if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES, NTXSEGS,
+		    VMXNET3_TX_LEN_M, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
 			return -1;
 	}
 
@@ -1439,13 +1444,41 @@ vmxnet3_tx_offload(struct vmxnet3_txdesc
 		offset = hdrlen + offsetof(struct tcphdr, th_sum);
 	else if (ext.udp)
 		offset = hdrlen + offsetof(struct udphdr, uh_sum);
+	else
+		return;
+
+	if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
+		hdrlen &= VMXNET3_TX_HLEN_M;
+		offset &= VMXNET3_TX_OP_M;
+
+		sop->tx_word3 |= htole32(VMXNET3_OM_CSUM << VMXNET3_TX_OM_S);
+		sop->tx_word3 |= htole32(hdrlen << VMXNET3_TX_HLEN_S);
+		sop->tx_word2 |= htole32(offset << VMXNET3_TX_OP_S);
+
+		return;
+	}
+
+	/*
+	 * TCP Segmentation Offload
+	 */
 
+	if (ext.tcp == NULL) {
+		tcpstat_inc(tcps_outbadtso);
+		return;
+	}
+
+	if (ext.ip4)
+		ext.ip4->ip_sum = 0;
+
+	hdrlen += ext.tcphlen;
 	hdrlen &= VMXNET3_TX_HLEN_M;
-	offset &= VMXNET3_TX_OP_M;
 
-	sop->tx_word3 |= htole32(VMXNET3_OM_CSUM << VMXNET3_TX_OM_S);
+	sop->tx_word3 |= htole32(VMXNET3_OM_TSO << VMXNET3_TX_OM_S);
 	sop->tx_word3 |= htole32(hdrlen << VMXNET3_TX_HLEN_S);
-	sop->tx_word2 |= htole32(offset << VMXNET3_TX_OP_S);
+	sop->tx_word2 |= htole32(m->m_pkthdr.ph_mss << VMXNET3_TX_OP_S);
+
+	tcpstat_add(tcps_outpkttso, (m->m_pkthdr.len - hdrlen +
+	    m->m_pkthdr.ph_mss - 1) / m->m_pkthdr.ph_mss);
 }
 
 void