Index | Thread | Search

From:
Jan Klemkow <jan@openbsd.org>
Subject:
Re: vio: Support MTU feature
To:
Stefan Fritsch <sf@sfritsch.de>
Cc:
tech@openbsd.org
Date:
Wed, 14 Jan 2026 21:08:42 +0100

Download raw body.

Thread
On Wed, Jan 14, 2026 at 01:24:50PM +0100, Stefan Fritsch wrote:
> here is the final bit to make OpenBSD work on Apple Virtualization, which 
> refuses to work if the MTU feature is not negotiated:
> 
> Add support for the VIRTIO_NET_F_MTU which allows to get the hardmtu from 
> the hypervisor. Also set the current mtu to the same value. The virtio 
> standard is not clear if that is recommended, but Linux does this, too.
> 
> Use ETHER_MAX_HARDMTU_LEN as upper hardmtu limit instead of MAXMCLBYTES, 
> as this seems to be more correct.
> 
> If the hypervisor requests a MTU larger than ETHER_MAX_HARDMTU_LEN, redo 
> feature negotiation without VIRTIO_NET_F_MTU.
> 
> Input and testing from @helg
> 
> ok?

Works in my setups.

I tested it on Linux/KVM with the following MTU tag in my config:
	<mtu size="1200"/>

vio0 at virtio0: 1 queue, address 52:54:00:0a:91:cc
vio1 at virtio1: 1 queue, address 52:54:00:2e:69:7a, mtu 1200

Also works without regressions in my vmm(4)/vmd(8) setup.

Diff looks fine for me.

ok jan@

> diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
> index ae138c0ee59..b2e0435fb85 100644
> --- a/sys/dev/pv/if_vio.c
> +++ b/sys/dev/pv/if_vio.c
> @@ -294,6 +294,7 @@ struct vio_softc {
>  	struct intrmap		*sc_intrmap;
>  	struct vio_queue	*sc_q;
>  	uint16_t		sc_nqueues;
> +	uint16_t		sc_rxr_lwm;
>  	int			sc_tx_slots_per_req;
>  	int			sc_rx_mbuf_size;
>  
> @@ -367,7 +368,7 @@ int	vio_ctrl_start(struct vio_softc *, uint8_t, uint8_t, int, int *);
>  int	vio_ctrl_submit(struct vio_softc *, int);
>  void	vio_ctrl_finish(struct vio_softc *);
>  void	vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state);
> -int	vio_alloc_mem(struct vio_softc *, int);
> +int	vio_alloc_mem(struct vio_softc *, int, bus_size_t);
>  int	vio_alloc_dmamem(struct vio_softc *);
>  void	vio_free_dmamem(struct vio_softc *);
>  
> @@ -466,12 +467,10 @@ vio_free_dmamem(struct vio_softc *sc)
>   *   viq_txmbufs[slot]:		mbuf pointer array for sent frames
>   */
>  int
> -vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
> +vio_alloc_mem(struct vio_softc *sc, int tx_max_segments, bus_size_t txsize)
>  {
>  	struct virtio_softc	*vsc = sc->sc_virtio;
> -	struct ifnet		*ifp = &sc->sc_ac.ac_if;
>  	size_t			 allocsize, rxqsize, txqsize, offset = 0;
> -	bus_size_t		 txsize;
>  	caddr_t			 kva;
>  	int			 i, qidx, r;
>  
> @@ -528,11 +527,6 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
>  	}
>  	KASSERT(offset == allocsize);
>  
> -	if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO4) ||
> -	    virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6))
> -		txsize = MAXMCLBYTES + sc->sc_hdr_size + ETHER_HDR_LEN;
> -	else
> -		txsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN;
>  
>  	for (qidx = 0; qidx < sc->sc_nqueues; qidx++) {
>  		struct vio_queue *vioq = &sc->sc_q[qidx];
> @@ -635,7 +629,9 @@ vio_attach(struct device *parent, struct device *self, void *aux)
>  	struct vio_softc *sc = (struct vio_softc *)self;
>  	struct virtio_softc *vsc = (struct virtio_softc *)parent;
>  	struct virtio_attach_args *va = aux;
> -	int i, r, tx_max_segments, want_tso = 1;
> +	int i, r, tx_max_segments, want_tso = 1, want_mtu = 1;
> +	unsigned int device_mtu = 0;
> +	bus_size_t txsize, rxsize;
>  	struct ifnet *ifp = &sc->sc_ac.ac_if;
>  
>  	if (vsc->sc_child != NULL) {
> @@ -665,6 +661,8 @@ negotiate:
>  		vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4;
>  		vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6;
>  	}
> +	if (want_mtu)
> +		vsc->sc_driver_features |= VIRTIO_NET_F_MTU;
>  	if (va->va_nintr > 3 && ncpus > 1)
>  		vsc->sc_driver_features |= VIRTIO_NET_F_MQ;
>  
> @@ -690,6 +688,24 @@ negotiate:
>  		goto negotiate;
>  	}
>  
> +	if (virtio_has_feature(vsc, VIRTIO_NET_F_MTU)) {
> +		device_mtu = virtio_read_device_config_2(vsc,
> +		    VIRTIO_NET_CONFIG_MTU);
> +		if (device_mtu > ETHER_MAX_HARDMTU_LEN) {
> +			/*
> +			 * According to the standard we MUST supply rx buffers
> +			 * for the MTU length, but we only support up to
> +			 * ETHER_MAX_HARDMTU_LEN.
> +			 */
> +			printf("%s: mtu %u not supported\n",
> +			    sc->sc_dev.dv_xname, device_mtu);
> +			device_mtu = 0;
> +			want_mtu = 0;
> +			virtio_reset(vsc);
> +			goto negotiate;
> +		}
> +	}
> +
>  	if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) {
>  		i = virtio_read_device_config_2(vsc,
>  		    VIRTIO_NET_CONFIG_MAX_QUEUES);
> @@ -724,7 +740,7 @@ negotiate:
>  		vio_get_lladdr(&sc->sc_ac, vsc);
>  	else
>  		ether_fakeaddr(ifp);
> -	printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
> +	printf(", address %s", ether_sprintf(sc->sc_ac.ac_enaddr));
>  
>  	if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF) ||
>  	    vsc->sc_version_1) {
> @@ -757,11 +773,31 @@ negotiate:
>  		sc->sc_rx_mbuf_size = 4 * 1024;
>  	}
>  
> -	if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF))
> -		ifp->if_hardmtu = MAXMCLBYTES;
> -	else
> -		ifp->if_hardmtu = sc->sc_rx_mbuf_size - sc->sc_hdr_size -
> -		    ETHER_HDR_LEN;
> +	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
> +	if (device_mtu) {
> +		printf(", mtu %u", device_mtu);
> +		ifp->if_hardmtu = device_mtu;
> +	}
> +	if (!virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF)) {
> +		if (device_mtu)
> +			sc->sc_rx_mbuf_size = MAX(sc->sc_rx_mbuf_size,
> +			    device_mtu + sc->sc_hdr_size + ETHER_HDR_LEN);
> +		ifp->if_hardmtu = MIN(ifp->if_hardmtu,
> +		    sc->sc_rx_mbuf_size - sc->sc_hdr_size - ETHER_HDR_LEN);
> +	}
> +
> +	txsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN;
> +	if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO4) ||
> +	    virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6))
> +		txsize = MAXMCLBYTES + sc->sc_hdr_size;
> +
> +	rxsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN;
> +	if (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4) ||
> +	    virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6))
> +		rxsize = MAXMCLBYTES + sc->sc_hdr_size;
> +
> +	printf("\n");
> +	sc->sc_rxr_lwm = 2 * howmany(rxsize, sc->sc_rx_mbuf_size);
>  
>  	/* defrag for longer mbuf chains */
>  	tx_max_segments = 16;
> @@ -864,7 +900,7 @@ negotiate:
>  		}
>  	}
>  
> -	if (vio_alloc_mem(sc, tx_max_segments) < 0)
> +	if (vio_alloc_mem(sc, tx_max_segments, txsize) < 0)
>  		goto err;
>  
>  	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
> @@ -891,6 +927,8 @@ negotiate:
>  	if_attach(ifp);
>  	ether_ifattach(ifp);
>  	vio_link_state(ifp);
> +	if (device_mtu)
> +		ifp->if_mtu = device_mtu;
>  
>  	if_attach_queues(ifp, sc->sc_nqueues);
>  	if_attach_iqueues(ifp, sc->sc_nqueues);
> @@ -1025,8 +1063,7 @@ vio_init(struct ifnet *ifp)
>  		struct vio_queue *vioq = &sc->sc_q[qidx];
>  
>  		mtx_enter(&vioq->viq_rxmtx);
> -		if_rxr_init(&vioq->viq_rxring,
> -		    2 * ((ifp->if_hardmtu / sc->sc_rx_mbuf_size) + 1),
> +		if_rxr_init(&vioq->viq_rxring, sc->sc_rxr_lwm,
>  		    vioq->viq_rxvq->vq_num);
>  		vio_populate_rx_mbufs(sc, vioq);
>  		ifq_clr_oactive(vioq->viq_ifq);
>