From: Jan Klemkow Subject: Re: vio(4): TCP Large Receive Offload To: Stefan Fritsch Cc: tech@openbsd.org Date: Fri, 7 Jun 2024 13:53:37 +0200 On Fri, Jun 07, 2024 at 10:37:42AM +0200, Stefan Fritsch wrote: > Looks mostly good. I have also tested with Linux/KVM. Comments inline. > > On Tue, 28 May 2024, jan@openbsd.org wrote: > > This diff implements TCP Large Receive Offload for vio(4). LRO is only > > used when guest offload control is also available. Without that, it > > would be impossible to turn off LRO. > > > > @@ -454,8 +466,8 @@ vio_alloc_mem(struct vio_softc *sc) > > sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize; > > > > for (i = 0; i < rxqsize; i++) { > > - r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0, > > - BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]); > > + r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES, 16, MCLBYTES, > > + 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]); > > From what I read in uipc_mbuf.c, clusters have only an assured alignment > of 64 bytes. This means we may need 17 dma segments for a 64k cluster, not > 16. You could write it as MAXMCLBYTES/PAGE_SIZE + 1 for clarity. > > @@ -697,6 +722,20 @@ vio_init(struct ifnet *ifp) > > ifq_clr_oactive(&ifp->if_snd); > > vio_iff(sc); > > vio_link_state(ifp); > > + > > + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { > > + uint64_t features = 0; > > + > > + SET(features, VIRTIO_NET_F_GUEST_CSUM); > > + > > + if (ISSET(ifp->if_xflags, IFXF_LRO)) { > > + SET(features, VIRTIO_NET_F_GUEST_TSO4); > > + SET(features, VIRTIO_NET_F_GUEST_TSO6); > > You need to check the feature bits here again because above, you enable > IFXF_LRO if either TSO4 or TSO6 is available. The standard says "A driver > MUST NOT enable an offload for which the appropriate feature has not been > negotiated." I updated the diff with your improvements. Thanks, Jan Index: dev/pv/if_vio.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/dev/pv/if_vio.c,v diff -u -p -r1.37 if_vio.c --- dev/pv/if_vio.c 4 Jun 2024 09:51:52 -0000 1.37 +++ dev/pv/if_vio.c 7 Jun 2024 11:00:40 -0000 @@ -169,6 +169,9 @@ struct virtio_net_ctrl_cmd { # define VIRTIO_NET_CTRL_VLAN_ADD 0 # define VIRTIO_NET_CTRL_VLAN_DEL 1 +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 +# define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 + struct virtio_net_ctrl_status { uint8_t ack; } __packed; @@ -179,6 +182,10 @@ struct virtio_net_ctrl_rx { uint8_t onoff; } __packed; +struct virtio_net_ctrl_guest_offloads { + uint64_t offloads; +} __packed; + struct virtio_net_ctrl_mac_tbl { uint32_t nentries; uint8_t macs[][ETHER_ADDR_LEN]; @@ -220,6 +227,7 @@ struct vio_softc { struct virtio_net_ctrl_cmd *sc_ctrl_cmd; struct virtio_net_ctrl_status *sc_ctrl_status; struct virtio_net_ctrl_rx *sc_ctrl_rx; + struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads; struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc; #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc; @@ -289,6 +297,7 @@ void vio_txtick(void *); void vio_link_state(struct ifnet *); int vio_config_change(struct virtio_softc *); int vio_ctrl_rx(struct vio_softc *, int, int); +int vio_ctrl_guest_offloads(struct vio_softc *, uint64_t); int vio_set_rx_filter(struct vio_softc *); void vio_iff(struct vio_softc *); int vio_media_change(struct ifnet *); @@ -414,6 +423,7 @@ vio_alloc_mem(struct vio_softc *sc) allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1; allocsize += sizeof(struct virtio_net_ctrl_status) * 1; allocsize += sizeof(struct virtio_net_ctrl_rx) * 1; + allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1; allocsize += VIO_CTRL_MAC_INFO_SIZE; } sc->sc_dma_size = allocsize; @@ -433,6 +443,8 @@ vio_alloc_mem(struct vio_softc *sc) offset += sizeof(*sc->sc_ctrl_status); sc->sc_ctrl_rx = (void*)(kva + offset); offset += sizeof(*sc->sc_ctrl_rx); + sc->sc_ctrl_guest_offloads = (void*)(kva + offset); + offset += sizeof(*sc->sc_ctrl_guest_offloads); sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset); offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) + ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES; @@ -454,7 +466,8 @@ vio_alloc_mem(struct vio_softc *sc) sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize; for (i = 0; i < rxqsize; i++) { - r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0, + r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES, + MAXMCLBYTES/PAGE_SIZE + 1, MCLBYTES, 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]); if (r != 0) goto err_reqs; @@ -550,6 +563,10 @@ vio_attach(struct device *parent, struct vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4; vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6; + vsc->sc_driver_features |= VIRTIO_NET_F_CTRL_GUEST_OFFLOADS; + vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO4; + vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO6; + virtio_negotiate_features(vsc, virtio_net_feature_names); if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) { vio_get_lladdr(&sc->sc_ac, vsc); @@ -612,6 +629,14 @@ vio_attach(struct device *parent, struct ifp->if_capabilities |= IFCAP_TSOv4; if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6)) ifp->if_capabilities |= IFCAP_TSOv6; + + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) && + (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4) || + virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6))) { + ifp->if_xflags |= IFXF_LRO; + ifp->if_capabilities |= IFCAP_LRO; + } + ifq_init_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1); ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status); ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); @@ -688,6 +713,7 @@ int vio_init(struct ifnet *ifp) { struct vio_softc *sc = ifp->if_softc; + struct virtio_softc *vsc = sc->sc_virtio; vio_stop(ifp, 0); if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1), @@ -697,6 +723,22 @@ vio_init(struct ifnet *ifp) ifq_clr_oactive(&ifp->if_snd); vio_iff(sc); vio_link_state(ifp); + + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { + uint64_t features = 0; + + SET(features, VIRTIO_NET_F_GUEST_CSUM); + + if (ISSET(ifp->if_xflags, IFXF_LRO)) { + if (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4)) + SET(features, VIRTIO_NET_F_GUEST_TSO4); + if (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6)) + SET(features, VIRTIO_NET_F_GUEST_TSO6); + } + + vio_ctrl_guest_offloads(sc, features); + } + return 0; } @@ -1083,6 +1125,24 @@ vio_rx_offload(struct mbuf *m, struct vi if (ISSET(hdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); } + + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || + hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { + uint16_t mss = hdr->gso_size; + + if (!ext.tcp || mss == 0) { + tcpstat_inc(tcps_inbadlro); + return; + } + + if ((ext.paylen + mss - 1) / mss <= 1) + return; + + tcpstat_inc(tcps_inhwlro); + tcpstat_add(tcps_inpktlro, (ext.paylen + mss - 1) / mss); + SET(m->m_pkthdr.csum_flags, M_TCP_TSO); + m->m_pkthdr.ph_mss = mss; + } } /* dequeue received packets */ @@ -1368,6 +1428,67 @@ vio_ctrl_rx(struct vio_softc *sc, int cm DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, onoff, r); out: + vio_ctrl_wakeup(sc, FREE); + return r; +} + +int +vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features) +{ + struct virtio_softc *vsc = sc->sc_virtio; + struct virtqueue *vq = &sc->sc_vq[VQCTL]; + int r, slot; + + splassert(IPL_NET); + + if ((r = vio_wait_ctrl(sc)) != 0) + return r; + + sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; + sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; + sc->sc_ctrl_guest_offloads->offloads = features; + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads, + sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_PREWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD); + + r = virtio_enqueue_prep(vq, &slot); + if (r != 0) + panic("%s: control vq busy!?", sc->sc_dev.dv_xname); + r = virtio_enqueue_reserve(vq, slot, 3); + if (r != 0) + panic("%s: control vq busy!?", sc->sc_dev.dv_xname); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_guest_offloads, + sizeof(*sc->sc_ctrl_guest_offloads), 1); + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), 0); + virtio_enqueue_commit(vsc, vq, slot, 1); + + if ((r = vio_wait_ctrl_done(sc)) != 0) + goto out; + + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads, + sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_POSTWRITE); + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD); + + if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) { + r = 0; + } else { + printf("%s: features 0x%llx failed\n", sc->sc_dev.dv_xname, + features); + r = EIO; + } + + DPRINTF("%s: features 0x%llx: %d\n", __func__, features, r); + out: vio_ctrl_wakeup(sc, FREE); return r; }