From: jan@openbsd.org Subject: vio(4): TCP Large Receive Offload To: tech@openbsd.org Date: Tue, 28 May 2024 16:06:11 +0200 Hi, This diff implements TCP Large Receive Offload for vio(4). LRO is only used when guest offload control is also available. Without that, it would be impossible to turn off LRO. I tested it on KVM with external Linux machines. Tests are welcome on various platforms with vio(4). bye, Jan Index: dev/pv/if_vio.c =================================================================== RCS file: /cvs/src/sys/dev/pv/if_vio.c,v diff -u -p -r1.36 if_vio.c --- dev/pv/if_vio.c 28 May 2024 12:11:26 -0000 1.36 +++ dev/pv/if_vio.c 28 May 2024 13:55:12 -0000 @@ -169,6 +169,9 @@ struct virtio_net_ctrl_cmd { # define VIRTIO_NET_CTRL_VLAN_ADD 0 # define VIRTIO_NET_CTRL_VLAN_DEL 1 +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 +# define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 + struct virtio_net_ctrl_status { uint8_t ack; } __packed; @@ -179,6 +182,10 @@ struct virtio_net_ctrl_rx { uint8_t onoff; } __packed; +struct virtio_net_ctrl_guest_offloads { + uint64_t offloads; +} __packed; + struct virtio_net_ctrl_mac_tbl { uint32_t nentries; uint8_t macs[][ETHER_ADDR_LEN]; @@ -220,6 +227,7 @@ struct vio_softc { struct virtio_net_ctrl_cmd *sc_ctrl_cmd; struct virtio_net_ctrl_status *sc_ctrl_status; struct virtio_net_ctrl_rx *sc_ctrl_rx; + struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads; struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc; #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc; @@ -289,6 +297,7 @@ void vio_txtick(void *); void vio_link_state(struct ifnet *); int vio_config_change(struct virtio_softc *); int vio_ctrl_rx(struct vio_softc *, int, int); +int vio_ctrl_guest_offloads(struct vio_softc *, uint64_t); int vio_set_rx_filter(struct vio_softc *); void vio_iff(struct vio_softc *); int vio_media_change(struct ifnet *); @@ -414,6 +423,7 @@ vio_alloc_mem(struct vio_softc *sc) allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1; allocsize += sizeof(struct virtio_net_ctrl_status) * 1; allocsize += sizeof(struct virtio_net_ctrl_rx) * 1; + allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1; allocsize += VIO_CTRL_MAC_INFO_SIZE; } sc->sc_dma_size = allocsize; @@ -433,6 +443,8 @@ vio_alloc_mem(struct vio_softc *sc) offset += sizeof(*sc->sc_ctrl_status); sc->sc_ctrl_rx = (void*)(kva + offset); offset += sizeof(*sc->sc_ctrl_rx); + sc->sc_ctrl_guest_offloads = (void*)(kva + offset); + offset += sizeof(*sc->sc_ctrl_guest_offloads); sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset); offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES; @@ -454,8 +466,8 @@ vio_alloc_mem(struct vio_softc *sc) sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize; for (i = 0; i < rxqsize; i++) { - r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0, - BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]); + r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES, 16, MCLBYTES, + 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]); if (r != 0) goto err_reqs; } @@ -550,6 +562,10 @@ vio_attach(struct device *parent, struct vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4; vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6; + vsc->sc_driver_features |= VIRTIO_NET_F_CTRL_GUEST_OFFLOADS; + vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO4; + vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO6; + virtio_negotiate_features(vsc, virtio_net_feature_names); if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) { vio_get_lladdr(&sc->sc_ac, vsc); @@ -612,6 +628,14 @@ vio_attach(struct device *parent, struct ifp->if_capabilities |= IFCAP_TSOv4; if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6)) ifp->if_capabilities |= IFCAP_TSOv6; + + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) && + (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4) || + virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6))) { + ifp->if_xflags |= IFXF_LRO; + ifp->if_capabilities |= IFCAP_LRO; + } + ifq_init_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1); ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status); ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); @@ -688,6 +712,7 @@ int vio_init(struct ifnet *ifp) { struct vio_softc *sc = ifp->if_softc; + struct virtio_softc *vsc = sc->sc_virtio; vio_stop(ifp, 0); if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1), @@ -697,6 +722,20 @@ vio_init(struct ifnet *ifp) ifq_clr_oactive(&ifp->if_snd); vio_iff(sc); vio_link_state(ifp); + + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { + uint64_t features = 0; + + SET(features, VIRTIO_NET_F_GUEST_CSUM); + + if (ISSET(ifp->if_xflags, IFXF_LRO)) { + SET(features, VIRTIO_NET_F_GUEST_TSO4); + SET(features, VIRTIO_NET_F_GUEST_TSO6); + } + + vio_ctrl_guest_offloads(sc, features); + } + return 0; } @@ -1083,6 +1122,24 @@ vio_rx_offload(struct mbuf *m, struct vi if (ISSET(hdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); } + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || + hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { + uint16_t mss = hdr->gso_size; + + if (!ext.tcp || mss == 0) { + tcpstat_inc(tcps_inbadlro); + return; + } + + if ((ext.paylen + mss - 1) / mss <= 1) + return; + + tcpstat_inc(tcps_inhwlro); + tcpstat_add(tcps_inpktlro, (ext.paylen + mss - 1) / mss); + SET(m->m_pkthdr.csum_flags, M_TCP_TSO); + m->m_pkthdr.ph_mss = mss; + } } /* dequeue received packets */ @@ -1370,6 +1427,67 @@ vio_ctrl_rx(struct vio_softc *sc, int cm DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r); out: + vio_ctrl_wakeup(sc, FREE); + return r; +} + +int +vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features) +{ + struct virtio_softc *vsc = sc->sc_virtio; + struct virtqueue *vq = &sc->sc_vq[VQCTL]; + int r, slot; + + splassert(IPL_NET); + + if ((r = vio_wait_ctrl(sc)) != 0) + return r; + + sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; + sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; + sc->sc_ctrl_guest_offloads->offloads = features; + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads, + sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD); + + r = virtio_enqueue_prep(vq, &slot); + if (r != 0) + panic("%s: control vq busy!?", sc->sc_dev.dv_xname); + r = virtio_enqueue_reserve(vq, slot, 3); + if (r != 0) + panic("%s: control vq busy!?", sc->sc_dev.dv_xname); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_guest_offloads, + sizeof(*sc->sc_ctrl_guest_offloads), 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), 0); + virtio_enqueue_commit(vsc, vq, slot, 1); + + if ((r = vio_wait_ctrl_done(sc)) != 0) + goto out; + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads, + sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_POSTWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD); + + if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) { + r = 0; + } else { + printf("%s: features 0x%llx failed\n", sc->sc_dev.dv_xname, + features); + r = EIO; + } + + DPRINTF("%s: features 0x%llx: %d\n", __func__, features, r); + out: vio_ctrl_wakeup(sc, FREE); return r; }