From: Jan Klemkow Subject: vio(4): tso To: tech@openbsd.org Date: Sun, 14 Jan 2024 14:05:51 +0100 Hi, This diff implements TCP Segmentation Offload (TSO) for vio(4). I'll first fix the vio(4) bpf checksum issue[1] before commiting this. But, people poked me to show off this diff, they want to test and run. On my KVM machine it increases single TCP single stream throughput between OpenBSD and Linux guests from 4 Gbit/s to 20 Gbit/s. bye, Jan [1]: https://marc.info/?t=170463117000002&r=1&w=2 Index: dev/pv/if_vio.c =================================================================== RCS file: /cvs/src/sys/dev/pv/if_vio.c,v diff -u -p -r1.29 if_vio.c --- dev/pv/if_vio.c 20 Dec 2023 09:51:06 -0000 1.29 +++ dev/pv/if_vio.c 14 Jan 2024 12:53:24 -0000 @@ -43,12 +43,15 @@ #include #include +#include #include #include #include #include #include +#include +#include #include #if NBPFILTER > 0 @@ -537,6 +540,9 @@ vio_attach(struct device *parent, struct VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM | VIRTIO_F_RING_EVENT_IDX | VIRTIO_NET_F_GUEST_CSUM; + vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4; + vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6; + virtio_negotiate_features(vsc, virtio_net_feature_names); if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) { vio_get_lladdr(&sc->sc_ac, vsc); @@ -553,9 +559,9 @@ vio_attach(struct device *parent, struct sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers); } if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF)) - ifp->if_hardmtu = 16000; /* arbitrary limit */ + ifp->if_hardmtu = MAXMCLBYTES; else - ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN; + ifp->if_hardmtu = MAXMCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN; if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0, MCLBYTES, 2, "rx") != 0) goto err; @@ -595,6 +601,10 @@ vio_attach(struct device *parent, struct if (virtio_has_feature(vsc, VIRTIO_NET_F_CSUM)) ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4| IFCAP_CSUM_TCPv6|IFCAP_CSUM_UDPv6; + if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO4)) + ifp->if_capabilities |= IFCAP_TSOv4; + if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6)) + ifp->if_capabilities |= IFCAP_TSOv6; ifq_init_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1); ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status); ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); @@ -714,6 +724,80 @@ vio_stop(struct ifnet *ifp, int disable) } } +static inline uint16_t +vio_tso_cksum(uint32_t cksum, uint16_t paylen) +{ + /* Add payload length */ + cksum += paylen; + + /* Fold back to 16 bit */ + cksum += cksum >> 16; + + return (uint16_t)(cksum); +} + +void +vio_tx_offload(struct virtio_net_hdr *hdr, struct mbuf *m) +{ + struct ether_extracted ext; + + /* + * Checksum Offload + */ + + if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT) && + !ISSET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) + return; + + ether_extract_headers(m, &ext); + hdr->csum_start = sizeof(*ext.eh); +#if NVLAN > 0 + if (ext.evh) + hdr->csum_start = sizeof(*ext.evh); +#endif + if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) + hdr->csum_offset = offsetof(struct tcphdr, th_sum); + else + hdr->csum_offset = offsetof(struct udphdr, uh_sum); + + if (ext.ip4) + hdr->csum_start += ext.ip4->ip_hl << 2; +#ifdef INET6 + else if (ext.ip6) + hdr->csum_start += sizeof(*ext.ip6); +#endif + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + + /* + * TCP Segmentation Offload + */ + + if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) + return; + + hdr->hdr_len = hdr->csum_start + (ext.tcp->th_off << 2); + hdr->gso_size = m->m_pkthdr.ph_mss; + + /* VirtIO-Net need pseudo header cksum with payload length for TSO */ + if (ext.ip4) { + ext.tcp->th_sum = vio_tso_cksum(ext.tcp->th_sum, + htons(ntohs(ext.ip4->ip_len) - (ext.ip4->ip_hl << 2))); + hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + } +#ifdef INET6 + else if (ext.ip6) { + ext.tcp->th_sum = vio_tso_cksum(ext.tcp->th_sum, + ext.ip6->ip6_plen); + hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; +#endif + } else { + tcpstat_inc(tcps_outbadtso); + } + + tcpstat_add(tcps_outpkttso, (m->m_pkthdr.len - hdr->hdr_len + + m->m_pkthdr.ph_mss - 1) / m->m_pkthdr.ph_mss); +} + void vio_start(struct ifnet *ifp) { @@ -750,28 +834,7 @@ again: hdr = &sc->sc_tx_hdrs[slot]; memset(hdr, 0, sc->sc_hdr_size); - if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) { - struct ether_extracted ext; - - ether_extract_headers(m, &ext); - hdr->csum_start = sizeof(*ext.eh); -#if NVLAN > 0 - if (ext.evh) - hdr->csum_start = sizeof(*ext.evh); -#endif - if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) - hdr->csum_offset = offsetof(struct tcphdr, th_sum); - else - hdr->csum_offset = offsetof(struct udphdr, uh_sum); - - if (ext.ip4) - hdr->csum_start += ext.ip4->ip_hl << 2; -#ifdef INET6 - else if (ext.ip6) - hdr->csum_start += sizeof(*ext.ip6); -#endif - hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - } + vio_tx_offload(hdr, m); r = vio_encap(sc, slot, m); if (r != 0) {