From: Stefan Fritsch Subject: Re: vio(4): TCP Large Receive Offload To: jan@openbsd.org Cc: tech@openbsd.org Date: Fri, 7 Jun 2024 10:37:42 +0200 Looks mostly good. I have also tested with Linux/KVM. Comments inline. On Tue, 28 May 2024, jan@openbsd.org wrote: > Hi, > > This diff implements TCP Large Receive Offload for vio(4). LRO is only > used when guest offload control is also available. Without that, it > would be impossible to turn off LRO. > > I tested it on KVM with external Linux machines. > > Tests are welcome on various platforms with vio(4). > > bye, > Jan > > Index: dev/pv/if_vio.c > =================================================================== > RCS file: /cvs/src/sys/dev/pv/if_vio.c,v > diff -u -p -r1.36 if_vio.c > --- dev/pv/if_vio.c 28 May 2024 12:11:26 -0000 1.36 > +++ dev/pv/if_vio.c 28 May 2024 13:55:12 -0000 > @@ -169,6 +169,9 @@ struct virtio_net_ctrl_cmd { > # define VIRTIO_NET_CTRL_VLAN_ADD 0 > # define VIRTIO_NET_CTRL_VLAN_DEL 1 > > +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 > +# define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 > + > struct virtio_net_ctrl_status { > uint8_t ack; > } __packed; > @@ -179,6 +182,10 @@ struct virtio_net_ctrl_rx { > uint8_t onoff; > } __packed; > > +struct virtio_net_ctrl_guest_offloads { > + uint64_t offloads; > +} __packed; > + > struct virtio_net_ctrl_mac_tbl { > uint32_t nentries; > uint8_t macs[][ETHER_ADDR_LEN]; > @@ -220,6 +227,7 @@ struct vio_softc { > struct virtio_net_ctrl_cmd *sc_ctrl_cmd; > struct virtio_net_ctrl_status *sc_ctrl_status; > struct virtio_net_ctrl_rx *sc_ctrl_rx; > + struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads; > struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc; > #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc > struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc; > @@ -289,6 +297,7 @@ void vio_txtick(void *); > void vio_link_state(struct ifnet *); > int vio_config_change(struct virtio_softc *); > int vio_ctrl_rx(struct vio_softc *, int, int); > +int vio_ctrl_guest_offloads(struct vio_softc *, uint64_t); > int vio_set_rx_filter(struct vio_softc *); > void vio_iff(struct vio_softc *); > int vio_media_change(struct ifnet *); > @@ -414,6 +423,7 @@ vio_alloc_mem(struct vio_softc *sc) > allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1; > allocsize += sizeof(struct virtio_net_ctrl_status) * 1; > allocsize += sizeof(struct virtio_net_ctrl_rx) * 1; > + allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1; > allocsize += VIO_CTRL_MAC_INFO_SIZE; > } > sc->sc_dma_size = allocsize; > @@ -433,6 +443,8 @@ vio_alloc_mem(struct vio_softc *sc) > offset += sizeof(*sc->sc_ctrl_status); > sc->sc_ctrl_rx = (void*)(kva + offset); > offset += sizeof(*sc->sc_ctrl_rx); > + sc->sc_ctrl_guest_offloads = (void*)(kva + offset); > + offset += sizeof(*sc->sc_ctrl_guest_offloads); > sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset); > offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) + > ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES; > @@ -454,8 +466,8 @@ vio_alloc_mem(struct vio_softc *sc) > sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize; > > for (i = 0; i < rxqsize; i++) { > - r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0, > - BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]); > + r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES, 16, MCLBYTES, > + 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]); From what I read in uipc_mbuf.c, clusters have only an assured alignment of 64 bytes. This means we may need 17 dma segments for a 64k cluster, not 16. You could write it as MAXMCLBYTES/PAGE_SIZE + 1 for clarity. > if (r != 0) > goto err_reqs; > } > @@ -550,6 +562,10 @@ vio_attach(struct device *parent, struct > vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4; > vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6; > > + vsc->sc_driver_features |= VIRTIO_NET_F_CTRL_GUEST_OFFLOADS; > + vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO4; > + vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO6; > + > virtio_negotiate_features(vsc, virtio_net_feature_names); > if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) { > vio_get_lladdr(&sc->sc_ac, vsc); > @@ -612,6 +628,14 @@ vio_attach(struct device *parent, struct > ifp->if_capabilities |= IFCAP_TSOv4; > if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6)) > ifp->if_capabilities |= IFCAP_TSOv6; > + > + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) && > + (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4) || > + virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6))) { > + ifp->if_xflags |= IFXF_LRO; > + ifp->if_capabilities |= IFCAP_LRO; > + } > + > ifq_init_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1); > ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status); > ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); > @@ -688,6 +712,7 @@ int > vio_init(struct ifnet *ifp) > { > struct vio_softc *sc = ifp->if_softc; > + struct virtio_softc *vsc = sc->sc_virtio; > > vio_stop(ifp, 0); > if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1), > @@ -697,6 +722,20 @@ vio_init(struct ifnet *ifp) > ifq_clr_oactive(&ifp->if_snd); > vio_iff(sc); > vio_link_state(ifp); > + > + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { > + uint64_t features = 0; > + > + SET(features, VIRTIO_NET_F_GUEST_CSUM); > + > + if (ISSET(ifp->if_xflags, IFXF_LRO)) { > + SET(features, VIRTIO_NET_F_GUEST_TSO4); > + SET(features, VIRTIO_NET_F_GUEST_TSO6); You need to check the feature bits here again because above, you enable IFXF_LRO if either TSO4 or TSO6 is available. The standard says "A driver MUST NOT enable an offload for which the appropriate feature has not been negotiated." > + } > + > + vio_ctrl_guest_offloads(sc, features); > + } > + > return 0; > } > > @@ -1083,6 +1122,24 @@ vio_rx_offload(struct mbuf *m, struct vi > if (ISSET(hdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) > SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); > } > + > + if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || > + hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { > + uint16_t mss = hdr->gso_size; > + > + if (!ext.tcp || mss == 0) { > + tcpstat_inc(tcps_inbadlro); > + return; > + } > + > + if ((ext.paylen + mss - 1) / mss <= 1) > + return; > + > + tcpstat_inc(tcps_inhwlro); > + tcpstat_add(tcps_inpktlro, (ext.paylen + mss - 1) / mss); > + SET(m->m_pkthdr.csum_flags, M_TCP_TSO); > + m->m_pkthdr.ph_mss = mss; > + } > } > > /* dequeue received packets */ > @@ -1370,6 +1427,67 @@ vio_ctrl_rx(struct vio_softc *sc, int cm > > DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r); > out: > + vio_ctrl_wakeup(sc, FREE); > + return r; > +} > + > +int > +vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features) > +{ > + struct virtio_softc *vsc = sc->sc_virtio; > + struct virtqueue *vq = &sc->sc_vq[VQCTL]; > + int r, slot; > + > + splassert(IPL_NET); > + > + if ((r = vio_wait_ctrl(sc)) != 0) > + return r; > + > + sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; > + sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; > + sc->sc_ctrl_guest_offloads->offloads = features; > + > + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, > + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE); > + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads, > + sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_PREWRITE); > + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, > + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD); > + > + r = virtio_enqueue_prep(vq, &slot); > + if (r != 0) > + panic("%s: control vq busy!?", sc->sc_dev.dv_xname); > + r = virtio_enqueue_reserve(vq, slot, 3); > + if (r != 0) > + panic("%s: control vq busy!?", sc->sc_dev.dv_xname); > + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd, > + sizeof(*sc->sc_ctrl_cmd), 1); > + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_guest_offloads, > + sizeof(*sc->sc_ctrl_guest_offloads), 1); > + VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status, > + sizeof(*sc->sc_ctrl_status), 0); > + virtio_enqueue_commit(vsc, vq, slot, 1); > + > + if ((r = vio_wait_ctrl_done(sc)) != 0) > + goto out; > + > + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd, > + sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE); > + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads, > + sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_POSTWRITE); > + VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status, > + sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD); > + > + if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) { > + r = 0; > + } else { > + printf("%s: features 0x%llx failed\n", sc->sc_dev.dv_xname, > + features); > + r = EIO; > + } > + > + DPRINTF("%s: features 0x%llx: %d\n", __func__, features, r); > + out: > vio_ctrl_wakeup(sc, FREE); > return r; > } > >