Download raw body.
vio(4): tso
Hi,
This diff implements TCP Segmentation Offload (TSO) for vio(4).
I'll first fix the vio(4) bpf checksum issue[1] before commiting this.
But, people poked me to show off this diff, they want to test and run.
On my KVM machine it increases single TCP single stream throughput
between OpenBSD and Linux guests from 4 Gbit/s to 20 Gbit/s.
bye,
Jan
[1]: https://marc.info/?t=170463117000002&r=1&w=2
Index: dev/pv/if_vio.c
===================================================================
RCS file: /cvs/src/sys/dev/pv/if_vio.c,v
diff -u -p -r1.29 if_vio.c
--- dev/pv/if_vio.c 20 Dec 2023 09:51:06 -0000 1.29
+++ dev/pv/if_vio.c 14 Jan 2024 12:53:24 -0000
@@ -43,12 +43,15 @@
#include <net/if.h>
#include <net/if_media.h>
+#include <net/route.h>
#include <netinet/in.h>
#include <netinet/if_ether.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
#include <netinet/udp.h>
#if NBPFILTER > 0
@@ -537,6 +540,9 @@ vio_attach(struct device *parent, struct
VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM |
VIRTIO_F_RING_EVENT_IDX | VIRTIO_NET_F_GUEST_CSUM;
+ vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4;
+ vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6;
+
virtio_negotiate_features(vsc, virtio_net_feature_names);
if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) {
vio_get_lladdr(&sc->sc_ac, vsc);
@@ -553,9 +559,9 @@ vio_attach(struct device *parent, struct
sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers);
}
if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF))
- ifp->if_hardmtu = 16000; /* arbitrary limit */
+ ifp->if_hardmtu = MAXMCLBYTES;
else
- ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN;
+ ifp->if_hardmtu = MAXMCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN;
if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0, MCLBYTES, 2, "rx") != 0)
goto err;
@@ -595,6 +601,10 @@ vio_attach(struct device *parent, struct
if (virtio_has_feature(vsc, VIRTIO_NET_F_CSUM))
ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4|
IFCAP_CSUM_TCPv6|IFCAP_CSUM_UDPv6;
+ if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO4))
+ ifp->if_capabilities |= IFCAP_TSOv4;
+ if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6))
+ ifp->if_capabilities |= IFCAP_TSOv6;
ifq_init_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
@@ -714,6 +724,80 @@ vio_stop(struct ifnet *ifp, int disable)
}
}
+static inline uint16_t
+vio_tso_cksum(uint32_t cksum, uint16_t paylen)
+{
+ /* Add payload length */
+ cksum += paylen;
+
+ /* Fold back to 16 bit */
+ cksum += cksum >> 16;
+
+ return (uint16_t)(cksum);
+}
+
+void
+vio_tx_offload(struct virtio_net_hdr *hdr, struct mbuf *m)
+{
+ struct ether_extracted ext;
+
+ /*
+ * Checksum Offload
+ */
+
+ if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT) &&
+ !ISSET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT))
+ return;
+
+ ether_extract_headers(m, &ext);
+ hdr->csum_start = sizeof(*ext.eh);
+#if NVLAN > 0
+ if (ext.evh)
+ hdr->csum_start = sizeof(*ext.evh);
+#endif
+ if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT)
+ hdr->csum_offset = offsetof(struct tcphdr, th_sum);
+ else
+ hdr->csum_offset = offsetof(struct udphdr, uh_sum);
+
+ if (ext.ip4)
+ hdr->csum_start += ext.ip4->ip_hl << 2;
+#ifdef INET6
+ else if (ext.ip6)
+ hdr->csum_start += sizeof(*ext.ip6);
+#endif
+ hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+
+ /*
+ * TCP Segmentation Offload
+ */
+
+ if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO))
+ return;
+
+ hdr->hdr_len = hdr->csum_start + (ext.tcp->th_off << 2);
+ hdr->gso_size = m->m_pkthdr.ph_mss;
+
+ /* VirtIO-Net need pseudo header cksum with payload length for TSO */
+ if (ext.ip4) {
+ ext.tcp->th_sum = vio_tso_cksum(ext.tcp->th_sum,
+ htons(ntohs(ext.ip4->ip_len) - (ext.ip4->ip_hl << 2)));
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ }
+#ifdef INET6
+ else if (ext.ip6) {
+ ext.tcp->th_sum = vio_tso_cksum(ext.tcp->th_sum,
+ ext.ip6->ip6_plen);
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+#endif
+ } else {
+ tcpstat_inc(tcps_outbadtso);
+ }
+
+ tcpstat_add(tcps_outpkttso, (m->m_pkthdr.len - hdr->hdr_len +
+ m->m_pkthdr.ph_mss - 1) / m->m_pkthdr.ph_mss);
+}
+
void
vio_start(struct ifnet *ifp)
{
@@ -750,28 +834,7 @@ again:
hdr = &sc->sc_tx_hdrs[slot];
memset(hdr, 0, sc->sc_hdr_size);
- if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) {
- struct ether_extracted ext;
-
- ether_extract_headers(m, &ext);
- hdr->csum_start = sizeof(*ext.eh);
-#if NVLAN > 0
- if (ext.evh)
- hdr->csum_start = sizeof(*ext.evh);
-#endif
- if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT)
- hdr->csum_offset = offsetof(struct tcphdr, th_sum);
- else
- hdr->csum_offset = offsetof(struct udphdr, uh_sum);
-
- if (ext.ip4)
- hdr->csum_start += ext.ip4->ip_hl << 2;
-#ifdef INET6
- else if (ext.ip6)
- hdr->csum_start += sizeof(*ext.ip6);
-#endif
- hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- }
+ vio_tx_offload(hdr, m);
r = vio_encap(sc, slot, m);
if (r != 0) {
vio(4): tso