Index | Thread | Search

From:
Stefan Fritsch <sf@openbsd.org>
Subject:
Re: vio(4): TCP Large Receive Offload
To:
jan@openbsd.org
Cc:
tech@openbsd.org
Date:
Fri, 7 Jun 2024 10:37:42 +0200

Download raw body.

Thread
Looks mostly good. I have also tested with Linux/KVM. Comments inline.

On Tue, 28 May 2024, jan@openbsd.org wrote:

> Hi,
> 
> This diff implements TCP Large Receive Offload for vio(4).  LRO is only
> used when guest offload control is also available. Without that, it
> would be impossible to turn off LRO.
> 
> I tested it on KVM with external Linux machines.
> 
> Tests are welcome on various platforms with vio(4).
> 
> bye,
> Jan
> 
> Index: dev/pv/if_vio.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pv/if_vio.c,v
> diff -u -p -r1.36 if_vio.c
> --- dev/pv/if_vio.c	28 May 2024 12:11:26 -0000	1.36
> +++ dev/pv/if_vio.c	28 May 2024 13:55:12 -0000
> @@ -169,6 +169,9 @@ struct virtio_net_ctrl_cmd {
>  # define VIRTIO_NET_CTRL_VLAN_ADD	0
>  # define VIRTIO_NET_CTRL_VLAN_DEL	1
>  
> +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS	5
> +# define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET	0
> +
>  struct virtio_net_ctrl_status {
>  	uint8_t	ack;
>  } __packed;
> @@ -179,6 +182,10 @@ struct virtio_net_ctrl_rx {
>  	uint8_t	onoff;
>  } __packed;
>  
> +struct virtio_net_ctrl_guest_offloads {
> +	uint64_t offloads;
> +} __packed;
> +
>  struct virtio_net_ctrl_mac_tbl {
>  	uint32_t nentries;
>  	uint8_t macs[][ETHER_ADDR_LEN];
> @@ -220,6 +227,7 @@ struct vio_softc {
>  	struct virtio_net_ctrl_cmd *sc_ctrl_cmd;
>  	struct virtio_net_ctrl_status *sc_ctrl_status;
>  	struct virtio_net_ctrl_rx *sc_ctrl_rx;
> +	struct virtio_net_ctrl_guest_offloads *sc_ctrl_guest_offloads;
>  	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_uc;
>  #define sc_ctrl_mac_info sc_ctrl_mac_tbl_uc
>  	struct virtio_net_ctrl_mac_tbl *sc_ctrl_mac_tbl_mc;
> @@ -289,6 +297,7 @@ void	vio_txtick(void *);
>  void	vio_link_state(struct ifnet *);
>  int	vio_config_change(struct virtio_softc *);
>  int	vio_ctrl_rx(struct vio_softc *, int, int);
> +int	vio_ctrl_guest_offloads(struct vio_softc *, uint64_t);
>  int	vio_set_rx_filter(struct vio_softc *);
>  void	vio_iff(struct vio_softc *);
>  int	vio_media_change(struct ifnet *);
> @@ -414,6 +423,7 @@ vio_alloc_mem(struct vio_softc *sc)
>  		allocsize += sizeof(struct virtio_net_ctrl_cmd) * 1;
>  		allocsize += sizeof(struct virtio_net_ctrl_status) * 1;
>  		allocsize += sizeof(struct virtio_net_ctrl_rx) * 1;
> +		allocsize += sizeof(struct virtio_net_ctrl_guest_offloads) * 1;
>  		allocsize += VIO_CTRL_MAC_INFO_SIZE;
>  	}
>  	sc->sc_dma_size = allocsize;
> @@ -433,6 +443,8 @@ vio_alloc_mem(struct vio_softc *sc)
>  		offset += sizeof(*sc->sc_ctrl_status);
>  		sc->sc_ctrl_rx = (void*)(kva + offset);
>  		offset += sizeof(*sc->sc_ctrl_rx);
> +		sc->sc_ctrl_guest_offloads = (void*)(kva + offset);
> +		offset += sizeof(*sc->sc_ctrl_guest_offloads);
>  		sc->sc_ctrl_mac_tbl_uc = (void*)(kva + offset);
>  		offset += sizeof(*sc->sc_ctrl_mac_tbl_uc) +
>  		    ETHER_ADDR_LEN * VIRTIO_NET_CTRL_MAC_UC_ENTRIES;
> @@ -454,8 +466,8 @@ vio_alloc_mem(struct vio_softc *sc)
>  	sc->sc_tx_mbufs = sc->sc_rx_mbufs + rxqsize;
>  
>  	for (i = 0; i < rxqsize; i++) {
> -		r = bus_dmamap_create(vsc->sc_dmat, MCLBYTES, 1, MCLBYTES, 0,
> -		    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);
> +		r = bus_dmamap_create(vsc->sc_dmat, MAXMCLBYTES, 16, MCLBYTES,
> +		    0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &sc->sc_rx_dmamaps[i]);

From what I read in uipc_mbuf.c, clusters have only an assured alignment 
of 64 bytes. This means we may need 17 dma segments for a 64k cluster, not 
16. You could write it as MAXMCLBYTES/PAGE_SIZE + 1 for clarity.



>  		if (r != 0)
>  			goto err_reqs;
>  	}
> @@ -550,6 +562,10 @@ vio_attach(struct device *parent, struct
>  	vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4;
>  	vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6;
>  
> +	vsc->sc_driver_features |= VIRTIO_NET_F_CTRL_GUEST_OFFLOADS;
> +	vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO4;
> +	vsc->sc_driver_features |= VIRTIO_NET_F_GUEST_TSO6;
> +
>  	virtio_negotiate_features(vsc, virtio_net_feature_names);
>  	if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) {
>  		vio_get_lladdr(&sc->sc_ac, vsc);
> @@ -612,6 +628,14 @@ vio_attach(struct device *parent, struct
>  		ifp->if_capabilities |= IFCAP_TSOv4;
>  	if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6))
>  		ifp->if_capabilities |= IFCAP_TSOv6;
> +
> +	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) &&
> +	    (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4) ||
> +	     virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6))) {
> +		ifp->if_xflags |= IFXF_LRO;
> +		ifp->if_capabilities |= IFCAP_LRO;
> +	}
> +
>  	ifq_init_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
>  	ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
>  	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
> @@ -688,6 +712,7 @@ int
>  vio_init(struct ifnet *ifp)
>  {
>  	struct vio_softc *sc = ifp->if_softc;
> +	struct virtio_softc *vsc = sc->sc_virtio;
>  
>  	vio_stop(ifp, 0);
>  	if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
> @@ -697,6 +722,20 @@ vio_init(struct ifnet *ifp)
>  	ifq_clr_oactive(&ifp->if_snd);
>  	vio_iff(sc);
>  	vio_link_state(ifp);
> +
> +	if (virtio_has_feature(vsc, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
> +		uint64_t features = 0;
> +
> +		SET(features, VIRTIO_NET_F_GUEST_CSUM);
> +
> +		if (ISSET(ifp->if_xflags, IFXF_LRO)) {
> +			SET(features, VIRTIO_NET_F_GUEST_TSO4);
> +			SET(features, VIRTIO_NET_F_GUEST_TSO6);

You need to check the feature bits here again because above, you enable 
IFXF_LRO if either TSO4 or TSO6 is available. The standard says "A driver 
MUST NOT enable an offload for which the appropriate feature has not been 
negotiated."

> +		}
> +
> +		vio_ctrl_guest_offloads(sc, features);
> +	}
> +
>  	return 0;
>  }
>  
> @@ -1083,6 +1122,24 @@ vio_rx_offload(struct mbuf *m, struct vi
>  		if (ISSET(hdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM))
>  			SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT);
>  	}
> +
> +	if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
> +	    hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
> +		uint16_t mss = hdr->gso_size;
> +
> +		if (!ext.tcp || mss == 0) {
> +			tcpstat_inc(tcps_inbadlro);
> +			return;
> +		}
> +
> +		if ((ext.paylen + mss - 1) / mss <= 1)
> +			return;
> +
> +		tcpstat_inc(tcps_inhwlro);
> +		tcpstat_add(tcps_inpktlro, (ext.paylen + mss - 1) / mss);
> +		SET(m->m_pkthdr.csum_flags, M_TCP_TSO);
> +		m->m_pkthdr.ph_mss = mss;
> +	}
>  }
>  
>  /* dequeue received packets */
> @@ -1370,6 +1427,67 @@ vio_ctrl_rx(struct vio_softc *sc, int cm
>  
>  	DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r);
>  out:
> +	vio_ctrl_wakeup(sc, FREE);
> +	return r;
> +}
> +
> +int
> +vio_ctrl_guest_offloads(struct vio_softc *sc, uint64_t features)
> +{
> +	struct virtio_softc *vsc = sc->sc_virtio;
> +	struct virtqueue *vq = &sc->sc_vq[VQCTL];
> +	int r, slot;
> +
> +	splassert(IPL_NET);
> +
> +	if ((r = vio_wait_ctrl(sc)) != 0)
> +		return r;
> +
> +	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
> +	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
> +	sc->sc_ctrl_guest_offloads->offloads = features;
> +
> +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
> +	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_PREWRITE);
> +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads,
> +	    sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_PREWRITE);
> +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
> +	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_PREREAD);
> +
> +	r = virtio_enqueue_prep(vq, &slot);
> +	if (r != 0)
> +		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
> +	r = virtio_enqueue_reserve(vq, slot, 3);
> +	if (r != 0)
> +		panic("%s: control vq busy!?", sc->sc_dev.dv_xname);
> +	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_cmd,
> +	    sizeof(*sc->sc_ctrl_cmd), 1);
> +	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_guest_offloads,
> +	    sizeof(*sc->sc_ctrl_guest_offloads), 1);
> +	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
> +	    sizeof(*sc->sc_ctrl_status), 0);
> +	virtio_enqueue_commit(vsc, vq, slot, 1);
> +
> +	if ((r = vio_wait_ctrl_done(sc)) != 0)
> +		goto out;
> +
> +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_cmd,
> +	    sizeof(*sc->sc_ctrl_cmd), BUS_DMASYNC_POSTWRITE);
> +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_guest_offloads,
> +	    sizeof(*sc->sc_ctrl_guest_offloads), BUS_DMASYNC_POSTWRITE);
> +	VIO_DMAMEM_SYNC(vsc, sc, sc->sc_ctrl_status,
> +	    sizeof(*sc->sc_ctrl_status), BUS_DMASYNC_POSTREAD);
> +
> +	if (sc->sc_ctrl_status->ack == VIRTIO_NET_OK) {
> +		r = 0;
> +	} else {
> +		printf("%s: features 0x%llx failed\n", sc->sc_dev.dv_xname,
> +		    features);
> +		r = EIO;
> +	}
> +
> +	DPRINTF("%s: features 0x%llx: %d\n", __func__, features, r);
> + out:
>  	vio_ctrl_wakeup(sc, FREE);
>  	return r;
>  }
> 
>