Download raw body.
vio(4): TCP Large Receive Offload
On Fri, Jun 07, 2024 at 10:37:42AM +0200, Stefan Fritsch wrote:
> Looks mostly good. I have also tested with Linux/KVM. Comments inline.
>
> On Tue, 28 May 2024, jan@openbsd.org wrote:
> > This diff implements TCP Large Receive Offload for vio(4). LRO is only
> > used when guest offload control is also available. Without that, it
> > would be impossible to turn off LRO.
> >
> > @@ -454,8 +466,8 @@ vio_alloc_mem(struct vio_softc *sc)
> > sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
> >
> > for (i = 0; i < rxqsize; i++) {
> > - r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0,
> > - BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
> > + r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES, 16, MCLBYTES,
> > + 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
>
> From what I read in uipc_mbuf.c, clusters have only an assured alignment
> of 64 bytes. This means we may need 17 dma segments for a 64k cluster, not
> 16. You could write it as MAXMCLBYTES/PAGE_SIZE + 1 for clarity.
> > @@ -697,6 +722,20 @@ vio_init(struct ifnet *ifp)
> > ifq_clr_oactive(&ifp->if_snd);
> > vio_iff(sc);
> > vio_link_state(ifp);
> > +
> > + if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
> > + uint64_t features = 0;
> > +
> > + SET(features, VIRTIO_NET_F_GUEST_CSUM);
> > +
> > + if (ISSET(ifp->if_xflags, IFXF_LRO)) {
> > + SET(features, VIRTIO_NET_F_GUEST_TSO4);
> > + SET(features, VIRTIO_NET_F_GUEST_TSO6);
>
> You need to check the feature bits here again because above, you enable
> IFXF_LRO if either TSO4 or TSO6 is available. The standard says "A driver
> MUST NOT enable an offload for which the appropriate feature has not been
> negotiated."
I updated the diff with your improvements.
Thanks,
Jan
Index: dev/pv/if_vio.c
===================================================================
RCS file: /mount/openbsd/cvs/src/sys/dev/pv/if_vio.c,v
diff -u -p -r1.37 if_vio.c
--- dev/pv/if_vio.c 4 Jun 2024 09:51:52 -0000 1.37
+++ dev/pv/if_vio.c 7 Jun 2024 11:00:40 -0000
@@ -169,6 +169,9 @@ struct virtio_net_ctrl_cmd {
# define VIRTIO_NET_CTRL_VLAN_ADD 0
# define VIRTIO_NET_CTRL_VLAN_DEL 1
+#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5
+# define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0
+
struct virtio_net_ctrl_status {
uint8_t ack;
} __packed;
@@ -179,6 +182,10 @@ struct virtio_net_ctrl_rx {
uint8_t onoff;
} __packed;
+struct virtio_net_ctrl_guest_offloads {
+ uint64_t offloads;
+} __packed;
+
struct virtio_net_ctrl_mac_tbl {
uint32_t nentries;
uint8_t macs[][ETHER_ADDR_LEN];
@@ -220,6 +227,7 @@ struct vio_softc {
struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
struct virtio_net_ctrl_status *sc_ctrl_status;
struct virtio_net_ctrl_rx *sc_ctrl_rx;
+ struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads;
struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
#define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
@@ -289,6 +297,7 @@ void vio_txtick(void *);
void vio_link_state(struct ifnet *);
int vio_config_change(struct virtio_softc *);
int vio_ctrl_rx(struct vio_softc *, int, int);
+int vio_ctrl_guest_offloads(struct vio_softc *, uint64_t);
int vio_set_rx_filter(struct vio_softc *);
void vio_iff(struct vio_softc *);
int vio_media_change(struct ifnet *);
@@ -414,6 +423,7 @@ vio_alloc_mem(struct vio_softc *sc)
allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
+ allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1;
allocsize += VIO_CTRL_MAC_INFO_SIZE;
}
sc->sc_dma_size = allocsize;
@@ -433,6 +443,8 @@ vio_alloc_mem(struct vio_softc *sc)
offset += sizeof(*sc->sc_ctrl_status);
sc->sc_ctrl_rx = (void*)(kva + offset);
offset += sizeof(*sc->sc_ctrl_rx);
+ sc->sc_ctrl_guest_offloads = (void*)(kva + offset);
+ offset += sizeof(*sc->sc_ctrl_guest_offloads);
sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset);
offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) +
ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES;
@@ -454,7 +466,8 @@ vio_alloc_mem(struct vio_softc *sc)
sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
for (i = 0; i < rxqsize; i++) {
- r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0,
+ r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES,
+ MAXMCLBYTES/PAGE_SIZE + 1, MCLBYTES, 0,
BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
if (r != 0)
goto err_reqs;
@@ -550,6 +563,10 @@ vio_attach(struct device *parent, struct
vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4;
vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6;
+ vsc->sc_driver_features |= VIRTIO_NET_F_CTRL_GUEST_OFFLOADS;
+ vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO4;
+ vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO6;
+
virtio_negotiate_features(vsc, virtio_net_feature_names);
if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) {
vio_get_lladdr(&sc->sc_ac, vsc);
@@ -612,6 +629,14 @@ vio_attach(struct device *parent, struct
ifp->if_capabilities |= IFCAP_TSOv4;
if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6))
ifp->if_capabilities |= IFCAP_TSOv6;
+
+ if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) &&
+ (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4) ||
+ virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6))) {
+ ifp->if_xflags |= IFXF_LRO;
+ ifp->if_capabilities |= IFCAP_LRO;
+ }
+
ifq_init_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
@@ -688,6 +713,7 @@ int
vio_init(struct ifnet *ifp)
{
struct vio_softc *sc = ifp->if_softc;
+ struct virtio_softc *vsc = sc->sc_virtio;
vio_stop(ifp, 0);
if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
@@ -697,6 +723,22 @@ vio_init(struct ifnet *ifp)
ifq_clr_oactive(&ifp->if_snd);
vio_iff(sc);
vio_link_state(ifp);
+
+ if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
+ uint64_t features = 0;
+
+ SET(features, VIRTIO_NET_F_GUEST_CSUM);
+
+ if (ISSET(ifp->if_xflags, IFXF_LRO)) {
+ if (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4))
+ SET(features, VIRTIO_NET_F_GUEST_TSO4);
+ if (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6))
+ SET(features, VIRTIO_NET_F_GUEST_TSO6);
+ }
+
+ vio_ctrl_guest_offloads(sc, features);
+ }
+
return 0;
}
@@ -1083,6 +1125,24 @@ vio_rx_offload(struct mbuf *m, struct vi
if (ISSET(hdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM))
SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT);
}
+
+ if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
+ hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
+ uint16_t mss = hdr->gso_size;
+
+ if (!ext.tcp || mss == 0) {
+ tcpstat_inc(tcps_inbadlro);
+ return;
+ }
+
+ if ((ext.paylen + mss - 1) / mss <= 1)
+ return;
+
+ tcpstat_inc(tcps_inhwlro);
+ tcpstat_add(tcps_inpktlro, (ext.paylen + mss - 1) / mss);
+ SET(m->m_pkthdr.csum_flags, M_TCP_TSO);
+ m->m_pkthdr.ph_mss = mss;
+ }
}
/* dequeue received packets */
@@ -1368,6 +1428,67 @@ vio_ctrl_rx(struct vio_softc *sc, int cm
DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, onoff, r);
out:
+ vio_ctrl_wakeup(sc, FREE);
+ return r;
+}
+
+int
+vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features)
+{
+ struct virtio_softc *vsc = sc->sc_virtio;
+ struct virtqueue *vq = &sc->sc_vq[VQCTL];
+ int r, slot;
+
+ splassert(IPL_NET);
+
+ if ((r = vio_wait_ctrl(sc)) != 0)
+ return r;
+
+ sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
+ sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
+ sc->sc_ctrl_guest_offloads->offloads = features;
+
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
+ sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads,
+ sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_PREWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
+ sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
+
+ r = virtio_enqueue_prep(vq, &slot);
+ if (r != 0)
+ panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
+ r = virtio_enqueue_reserve(vq, slot, 3);
+ if (r != 0)
+ panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
+ sizeof(*sc->sc_ctrl_cmd), 1);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_guest_offloads,
+ sizeof(*sc->sc_ctrl_guest_offloads), 1);
+ VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
+ sizeof(*sc->sc_ctrl_status), 0);
+ virtio_enqueue_commit(vsc, vq, slot, 1);
+
+ if ((r = vio_wait_ctrl_done(sc)) != 0)
+ goto out;
+
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
+ sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads,
+ sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_POSTWRITE);
+ VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
+ sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
+
+ if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
+ r = 0;
+ } else {
+ printf("%s: features 0x%llx failed\n", sc->sc_dev.dv_xname,
+ features);
+ r = EIO;
+ }
+
+ DPRINTF("%s: features 0x%llx: %d\n", __func__, features, r);
+ out:
vio_ctrl_wakeup(sc, FREE);
return r;
}
vio(4): TCP Large Receive Offload