Index | Thread | Search

From:
jan@openbsd.org
Subject:
vio(4): TCP Large Receive Offload
To:
tech@openbsd.org
Date:
Tue, 28 May 2024 16:06:11 +0200

Download raw body.

Thread
Hi,

This diff implements TCP Large Receive Offload for vio(4).  LRO is only
used when guest offload control is also available. Without that, it
would be impossible to turn off LRO.

I tested it on KVM with external Linux machines.

Tests are welcome on various platforms with vio(4).

bye,
Jan

Index: dev/pv/if_vio.c
===================================================================
RCS file: /cvs/src/sys/dev/pv/if_vio.c,v
diff -u -p -r1.36 if_vio.c
--- dev/pv/if_vio.c	28 May 2024 12:11:26 -0000	1.36
+++ dev/pv/if_vio.c	28 May 2024 13:55:12 -0000
@@ -169,6 +169,9 @@ struct virtio_net_ctrl_cmd {
 # define VIRTIO_NET_CTRL_VLAN_ADD	0
 # define VIRTIO_NET_CTRL_VLAN_DEL	1
 
+#define VIRTIO_NET_CTRL_GUEST_OFFLOADS	5
+# define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET	0
+
 struct virtio_net_ctrl_status {
 	uint8_t	ack;
 } __packed;
@@ -179,6 +182,10 @@ struct virtio_net_ctrl_rx {
 	uint8_t	onoff;
 } __packed;
 
+struct virtio_net_ctrl_guest_offloads {
+	uint64_t offloads;
+} __packed;
+
 struct virtio_net_ctrl_mac_tbl {
 	uint32_t nentries;
 	uint8_t macs[][ETHER_ADDR_LEN];
@@ -220,6 +227,7 @@ struct vio_softc {
 	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
 	struct virtio_net_ctrl_status *sc_ctrl_status;
 	struct virtio_net_ctrl_rx *sc_ctrl_rx;
+	struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads;
 	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
 #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
 	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
@@ -289,6 +297,7 @@ void	vio_txtick(void *);
 void	vio_link_state(struct ifnet *);
 int	vio_config_change(struct virtio_softc *);
 int	vio_ctrl_rx(struct vio_softc *, int, int);
+int	vio_ctrl_guest_offloads(struct vio_softc *, uint64_t);
 int	vio_set_rx_filter(struct vio_softc *);
 void	vio_iff(struct vio_softc *);
 int	vio_media_change(struct ifnet *);
@@ -414,6 +423,7 @@ vio_alloc_mem(struct vio_softc *sc)
 		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
 		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
 		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
+		allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1;
 		allocsize += VIO_CTRL_MAC_INFO_SIZE;
 	}
 	sc->sc_dma_size = allocsize;
@@ -433,6 +443,8 @@ vio_alloc_mem(struct vio_softc *sc)
 		offset += sizeof(*sc->sc_ctrl_status);
 		sc->sc_ctrl_rx = (void*)(kva + offset);
 		offset += sizeof(*sc->sc_ctrl_rx);
+		sc->sc_ctrl_guest_offloads = (void*)(kva + offset);
+		offset += sizeof(*sc->sc_ctrl_guest_offloads);
 		sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset);
 		offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) +
 		    ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES;
@@ -454,8 +466,8 @@ vio_alloc_mem(struct vio_softc *sc)
 	sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
 
 	for (i = 0; i < rxqsize; i++) {
-		r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0,
-		    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
+		r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES, 16, MCLBYTES,
+		    0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
 		if (r != 0)
 			goto err_reqs;
 	}
@@ -550,6 +562,10 @@ vio_attach(struct device *parent, struct
 	vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4;
 	vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6;
 
+	vsc->sc_driver_features |= VIRTIO_NET_F_CTRL_GUEST_OFFLOADS;
+	vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO4;
+	vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO6;
+
 	virtio_negotiate_features(vsc, virtio_net_feature_names);
 	if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) {
 		vio_get_lladdr(&sc->sc_ac, vsc);
@@ -612,6 +628,14 @@ vio_attach(struct device *parent, struct
 		ifp->if_capabilities |= IFCAP_TSOv4;
 	if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6))
 		ifp->if_capabilities |= IFCAP_TSOv6;
+
+	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) &&
+	    (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4) ||
+	     virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6))) {
+		ifp->if_xflags |= IFXF_LRO;
+		ifp->if_capabilities |= IFCAP_LRO;
+	}
+
 	ifq_init_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
 	ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
@@ -688,6 +712,7 @@ int
 vio_init(struct ifnet *ifp)
 {
 	struct vio_softc *sc = ifp->if_softc;
+	struct virtio_softc *vsc = sc->sc_virtio;
 
 	vio_stop(ifp, 0);
 	if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
@@ -697,6 +722,20 @@ vio_init(struct ifnet *ifp)
 	ifq_clr_oactive(&ifp->if_snd);
 	vio_iff(sc);
 	vio_link_state(ifp);
+
+	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
+		uint64_t features = 0;
+
+		SET(features, VIRTIO_NET_F_GUEST_CSUM);
+
+		if (ISSET(ifp->if_xflags, IFXF_LRO)) {
+			SET(features, VIRTIO_NET_F_GUEST_TSO4);
+			SET(features, VIRTIO_NET_F_GUEST_TSO6);
+		}
+
+		vio_ctrl_guest_offloads(sc, features);
+	}
+
 	return 0;
 }
 
@@ -1083,6 +1122,24 @@ vio_rx_offload(struct mbuf *m, struct vi
 		if (ISSET(hdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM))
 			SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT);
 	}
+
+	if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
+	    hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
+		uint16_t mss = hdr->gso_size;
+
+		if (!ext.tcp || mss == 0) {
+			tcpstat_inc(tcps_inbadlro);
+			return;
+		}
+
+		if ((ext.paylen + mss - 1) / mss <= 1)
+			return;
+
+		tcpstat_inc(tcps_inhwlro);
+		tcpstat_add(tcps_inpktlro, (ext.paylen + mss - 1) / mss);
+		SET(m->m_pkthdr.csum_flags, M_TCP_TSO);
+		m->m_pkthdr.ph_mss = mss;
+	}
 }
 
 /* dequeue received packets */
@@ -1370,6 +1427,67 @@ vio_ctrl_rx(struct vio_softc *sc, int cm
 
 	DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r);
 out:
+	vio_ctrl_wakeup(sc, FREE);
+	return r;
+}
+
+int
+vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features)
+{
+	struct virtio_softc *vsc = sc->sc_virtio;
+	struct virtqueue *vq = &sc->sc_vq[VQCTL];
+	int r, slot;
+
+	splassert(IPL_NET);
+
+	if ((r = vio_wait_ctrl(sc)) != 0)
+		return r;
+
+	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
+	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
+	sc->sc_ctrl_guest_offloads->offloads = features;
+
+	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
+	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
+	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads,
+	    sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_PREWRITE);
+	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
+	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
+
+	r = virtio_enqueue_prep(vq, &slot);
+	if (r != 0)
+		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
+	r = virtio_enqueue_reserve(vq, slot, 3);
+	if (r != 0)
+		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
+	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
+	    sizeof(*sc->sc_ctrl_cmd), 1);
+	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_guest_offloads,
+	    sizeof(*sc->sc_ctrl_guest_offloads), 1);
+	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
+	    sizeof(*sc->sc_ctrl_status), 0);
+	virtio_enqueue_commit(vsc, vq, slot, 1);
+
+	if ((r = vio_wait_ctrl_done(sc)) != 0)
+		goto out;
+
+	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
+	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
+	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads,
+	    sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_POSTWRITE);
+	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
+	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
+
+	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
+		r = 0;
+	} else {
+		printf("%s: features 0x%llx failed\n", sc->sc_dev.dv_xname,
+		    features);
+		r = EIO;
+	}
+
+	DPRINTF("%s: features 0x%llx: %d\n", __func__, features, r);
+ out:
 	vio_ctrl_wakeup(sc, FREE);
 	return r;
 }