Index | Thread | Search

From:
Stefan Fritsch <sf@sfritsch.de>
Subject:
Re: virtio: Fix condition for split rx buffers, negotiate VIRTIO_F_ANY_LAYOUT
To:
Helg <helg-openbsd@gmx.de>
Cc:
tech@openbsd.org
Date:
Mon, 22 Dec 2025 21:15:49 +0100

Download raw body.

Thread
On Sun, 21 Dec 2025, Helg wrote:

> On Sun, Dec 21, 2025 at 03:51:02PM +0100, Stefan Fritsch wrote:
> > Hi,
> > 
> > In 0.9-only time, the VIO_HAVE_MRG_RXBUF macro checked just for the 
> > VIRTIO_NET_F_MRG_RXBUF feature. The meaning was later changed to 
> > (version_1 || VIRTIO_NET_F_MRG_RXBUF), but the new meaning is only correct 
> > for one use of the macro. The buffer chaining must check for the MRG_RXBUF 
> > feature exclusively.
> > 
> > On the other hand, the check if we have to split the header from the rest 
> > of the buffer in the rx queue is a workaround for old kvm versions. The 
> > standard has since then gained the ANY_LAYOUT feature flag to turn off 
> > this workaround. According to the virtio 1.x standard, we should accept 
> > VIRTIO_F_ANY_LAYOUT if it is offered for transitional devices. ANY_LAYOUT 
> > is implicit if VERSION_1 has been negotiated.
> > 
> > Since accepting ANY_LAYOUT only relaxes the requirements for us, we can 
> > simply accept it globally for all virtio device types. vioblk(4) and 
> > vioscsi(4) unconditionally use the strict buffer layout required for 
> > legacy devices without ANY_LAYOUT, anyway.
> > 
> > Problem noticed by helg@
> > 
> 
> It's been working well here on Apple Virtualization and QEMU on arm64.
> It also matches my reading of the spec, so OK from me.
> 
> The only question that comes to mind is if it's worth supporting
> transitional devices? The 1.0 spec was released in 2014. Are there any
> hypervisors that don't yet implement the 1.x spec? Instead of the
> no_split logic, just fail if VIRTIO_F_VERSION_1 is not negotiated.

OpenBSD's vmd gained virtio 1.x support only recently in 7.8. And I 
suspect that there are a few other hypervisors out there that are still 
0.9-only. I think we will have to keep support for transitional devices 
for a few years at least.

> 
> > ok?
> > 
> > Cheers,
> > Stefan
> > 
> > diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
> > index db5f5381d0e..179988771f5 100644
> > --- a/sys/dev/fdt/virtio_mmio.c
> > +++ b/sys/dev/fdt/virtio_mmio.c
> > @@ -407,6 +407,7 @@ virtio_mmio_negotiate_features(struct virtio_softc *vsc,
> >  		vsc->sc_driver_features &= ~(VIRTIO_F_RING_EVENT_IDX);
> >  	}
> >  
> > +	vsc->sc_driver_features |= VIRTIO_F_ANY_LAYOUT;
> >  	bus_space_write_4(sc->sc_iot, sc->sc_ioh,
> >  	    VIRTIO_MMIO_HOST_FEATURES_SEL, 0);
> >  	host = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
> > diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
> > index 0f7d0d1bbcc..b61a162ddc7 100644
> > --- a/sys/dev/pci/virtio_pci.c
> > +++ b/sys/dev/pci/virtio_pci.c
> > @@ -798,6 +798,7 @@ virtio_pci_negotiate_features(struct virtio_softc *vsc,
> >  	uint64_t host, negotiated;
> >  
> >  	vsc->sc_active_features = 0;
> > +	vsc->sc_driver_features |= VIRTIO_F_ANY_LAYOUT;
> >  
> >  	/*
> >  	 * We enable indirect descriptors by default. They can be switched
> > diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
> > index 90d7ad8522b..d5c0767162d 100644
> > --- a/sys/dev/pv/if_vio.c
> > +++ b/sys/dev/pv/if_vio.c
> > @@ -295,8 +295,6 @@ struct vio_softc {
> >  #define VIO_DMAMEM_SYNC(vsc, sc, p, size, flags)		\
> >  	bus_dmamap_sync((vsc)->sc_dmat, (sc)->sc_dma_map,	\
> >  	    VIO_DMAMEM_OFFSET((sc), (p)), (size), (flags))
> > -#define VIO_HAVE_MRG_RXBUF(sc)					\
> > -	((sc)->sc_hdr_size == sizeof(struct virtio_net_hdr))
> >  
> >  /* vioq N uses the rx/tx vq pair 2*N and 2*N + 1 */
> >  #define VIO_VQ2Q(sc, vq)	(&sc->sc_q[vq->vq_index/2])
> > @@ -1398,7 +1396,9 @@ vio_populate_rx_mbufs(struct vio_softc *sc, struct vio_queue *vioq)
> >  	int r, done = 0;
> >  	u_int slots;
> >  	struct virtqueue *vq = vioq->viq_rxvq;
> > -	int mrg_rxbuf = VIO_HAVE_MRG_RXBUF(sc);
> > +	int no_split = virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF) ||
> > +		virtio_has_feature(vsc, VIRTIO_F_VERSION_1) ||
> > +		virtio_has_feature(vsc, VIRTIO_F_ANY_LAYOUT);
> >  
> >  	MUTEX_ASSERT_LOCKED(&vioq->viq_rxmtx);
> >  	for (slots = if_rxr_get(&vioq->viq_rxring, vq->vq_num);
> > @@ -1418,7 +1418,7 @@ vio_populate_rx_mbufs(struct vio_softc *sc, struct vio_queue *vioq)
> >  			}
> >  		}
> >  		r = virtio_enqueue_reserve(vq, slot,
> > -		    vioq->viq_rxdmamaps[slot]->dm_nsegs + (mrg_rxbuf ? 0 : 1));
> > +		    vioq->viq_rxdmamaps[slot]->dm_nsegs + (no_split ? 0 : 1));
> >  		if (r != 0) {
> >  			vio_free_rx_mbuf(sc, vioq, slot);
> >  			break;
> > @@ -1426,13 +1426,13 @@ vio_populate_rx_mbufs(struct vio_softc *sc, struct vio_queue *vioq)
> >  		bus_dmamap_sync(vsc->sc_dmat, vioq->viq_rxdmamaps[slot], 0,
> >  		    vioq->viq_rxdmamaps[slot]->dm_mapsize,
> >  		    BUS_DMASYNC_PREREAD);
> > -		if (mrg_rxbuf) {
> > +		if (no_split) {
> >  			virtio_enqueue(vq, slot, vioq->viq_rxdmamaps[slot], 0);
> >  		} else {
> >  			/*
> > -			 * Buggy kvm wants a buffer of exactly the size of
> > -			 * the header in this case, so we have to split in
> > -			 * two.
> > +			 * Legacy devices without VIRTIO_F_ANY_LAYOUT want a
> > +			 * buffer of exactly the size of the header in this
> > +			 * case, so we have to split in two.
> >  			 */
> >  			virtio_enqueue_p(vq, slot, vioq->viq_rxdmamaps[slot],
> >  			    0, sc->sc_hdr_size, 0);
> > @@ -1526,7 +1526,7 @@ vio_rxeof(struct vio_queue *vioq)
> >  				    vioq->viq_ifiq->ifiq_idx;
> >  				SET(m->m_pkthdr.csum_flags, M_FLOWID);
> >  			}
> > -			if (VIO_HAVE_MRG_RXBUF(sc))
> > +			if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF))
> >  				bufs_left = hdr->num_buffers - 1;
> >  			else
> >  				bufs_left = 0;
> 
>