Index | Thread | Search

From:
Helg <helg-openbsd@gmx.de>
Subject:
Re: virtio: Fix condition for split rx buffers, negotiate VIRTIO_F_ANY_LAYOUT
To:
Stefan Fritsch <sf@sfritsch.de>
Cc:
tech@openbsd.org
Date:
Sun, 21 Dec 2025 19:04:51 +0100

Download raw body.

Thread
On Sun, Dec 21, 2025 at 03:51:02PM +0100, Stefan Fritsch wrote:
> Hi,
> 
> In 0.9-only time, the VIO_HAVE_MRG_RXBUF macro checked just for the 
> VIRTIO_NET_F_MRG_RXBUF feature. The meaning was later changed to 
> (version_1 || VIRTIO_NET_F_MRG_RXBUF), but the new meaning is only correct 
> for one use of the macro. The buffer chaining must check for the MRG_RXBUF 
> feature exclusively.
> 
> On the other hand, the check if we have to split the header from the rest 
> of the buffer in the rx queue is a workaround for old kvm versions. The 
> standard has since then gained the ANY_LAYOUT feature flag to turn off 
> this workaround. According to the virtio 1.x standard, we should accept 
> VIRTIO_F_ANY_LAYOUT if it is offered for transitional devices. ANY_LAYOUT 
> is implicit if VERSION_1 has been negotiated.
> 
> Since accepting ANY_LAYOUT only relaxes the requirements for us, we can 
> simply accept it globally for all virtio device types. vioblk(4) and 
> vioscsi(4) unconditionally use the strict buffer layout required for 
> legacy devices without ANY_LAYOUT, anyway.
> 
> Problem noticed by helg@
> 

It's been working well here on Apple Virtualization and QEMU on arm64.
It also matches my reading of the spec, so OK from me.

The only question that comes to mind is if it's worth supporting
transitional devices? The 1.0 spec was released in 2014. Are there any
hypervisors that don't yet implement the 1.x spec? Instead of the
no_split logic, just fail if VIRTIO_F_VERSION_1 is not negotiated.

> ok?
> 
> Cheers,
> Stefan
> 
> diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
> index db5f5381d0e..179988771f5 100644
> --- a/sys/dev/fdt/virtio_mmio.c
> +++ b/sys/dev/fdt/virtio_mmio.c
> @@ -407,6 +407,7 @@ virtio_mmio_negotiate_features(struct virtio_softc *vsc,
>  		vsc->sc_driver_features &= ~(VIRTIO_F_RING_EVENT_IDX);
>  	}
>  
> +	vsc->sc_driver_features |= VIRTIO_F_ANY_LAYOUT;
>  	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
>  	    VIRTIO_MMIO_HOST_FEATURES_SEL, 0);
>  	host = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
> diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
> index 0f7d0d1bbcc..b61a162ddc7 100644
> --- a/sys/dev/pci/virtio_pci.c
> +++ b/sys/dev/pci/virtio_pci.c
> @@ -798,6 +798,7 @@ virtio_pci_negotiate_features(struct virtio_softc *vsc,
>  	uint64_t host, negotiated;
>  
>  	vsc->sc_active_features = 0;
> +	vsc->sc_driver_features |= VIRTIO_F_ANY_LAYOUT;
>  
>  	/*
>  	 * We enable indirect descriptors by default. They can be switched
> diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
> index 90d7ad8522b..d5c0767162d 100644
> --- a/sys/dev/pv/if_vio.c
> +++ b/sys/dev/pv/if_vio.c
> @@ -295,8 +295,6 @@ struct vio_softc {
>  #define VIO_DMAMEM_SYNC(vsc, sc, p, size, flags)		\
>  	bus_dmamap_sync((vsc)->sc_dmat, (sc)->sc_dma_map,	\
>  	    VIO_DMAMEM_OFFSET((sc), (p)), (size), (flags))
> -#define VIO_HAVE_MRG_RXBUF(sc)					\
> -	((sc)->sc_hdr_size == sizeof(struct virtio_net_hdr))
>  
>  /* vioq N uses the rx/tx vq pair 2*N and 2*N + 1 */
>  #define VIO_VQ2Q(sc, vq)	(&sc->sc_q[vq->vq_index/2])
> @@ -1398,7 +1396,9 @@ vio_populate_rx_mbufs(struct vio_softc *sc, struct vio_queue *vioq)
>  	int r, done = 0;
>  	u_int slots;
>  	struct virtqueue *vq = vioq->viq_rxvq;
> -	int mrg_rxbuf = VIO_HAVE_MRG_RXBUF(sc);
> +	int no_split = virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF) ||
> +		virtio_has_feature(vsc, VIRTIO_F_VERSION_1) ||
> +		virtio_has_feature(vsc, VIRTIO_F_ANY_LAYOUT);
>  
>  	MUTEX_ASSERT_LOCKED(&vioq->viq_rxmtx);
>  	for (slots = if_rxr_get(&vioq->viq_rxring, vq->vq_num);
> @@ -1418,7 +1418,7 @@ vio_populate_rx_mbufs(struct vio_softc *sc, struct vio_queue *vioq)
>  			}
>  		}
>  		r = virtio_enqueue_reserve(vq, slot,
> -		    vioq->viq_rxdmamaps[slot]->dm_nsegs + (mrg_rxbuf ? 0 : 1));
> +		    vioq->viq_rxdmamaps[slot]->dm_nsegs + (no_split ? 0 : 1));
>  		if (r != 0) {
>  			vio_free_rx_mbuf(sc, vioq, slot);
>  			break;
> @@ -1426,13 +1426,13 @@ vio_populate_rx_mbufs(struct vio_softc *sc, struct vio_queue *vioq)
>  		bus_dmamap_sync(vsc->sc_dmat, vioq->viq_rxdmamaps[slot], 0,
>  		    vioq->viq_rxdmamaps[slot]->dm_mapsize,
>  		    BUS_DMASYNC_PREREAD);
> -		if (mrg_rxbuf) {
> +		if (no_split) {
>  			virtio_enqueue(vq, slot, vioq->viq_rxdmamaps[slot], 0);
>  		} else {
>  			/*
> -			 * Buggy kvm wants a buffer of exactly the size of
> -			 * the header in this case, so we have to split in
> -			 * two.
> +			 * Legacy devices without VIRTIO_F_ANY_LAYOUT want a
> +			 * buffer of exactly the size of the header in this
> +			 * case, so we have to split in two.
>  			 */
>  			virtio_enqueue_p(vq, slot, vioq->viq_rxdmamaps[slot],
>  			    0, sc->sc_hdr_size, 0);
> @@ -1526,7 +1526,7 @@ vio_rxeof(struct vio_queue *vioq)
>  				    vioq->viq_ifiq->ifiq_idx;
>  				SET(m->m_pkthdr.csum_flags, M_FLOWID);
>  			}
> -			if (VIO_HAVE_MRG_RXBUF(sc))
> +			if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF))
>  				bufs_left = hdr->num_buffers - 1;
>  			else
>  				bufs_left = 0;