From: Jan Klemkow Subject: Re: vio: Support MTU feature To: Stefan Fritsch Cc: tech@openbsd.org Date: Wed, 14 Jan 2026 21:08:42 +0100 On Wed, Jan 14, 2026 at 01:24:50PM +0100, Stefan Fritsch wrote: > here is the final bit to make OpenBSD work on Apple Virtualization, which > refuses to work if the MTU feature is not negotiated: > > Add support for the VIRTIO_NET_F_MTU which allows to get the hardmtu from > the hypervisor. Also set the current mtu to the same value. The virtio > standard is not clear if that is recommended, but Linux does this, too. > > Use ETHER_MAX_HARDMTU_LEN as upper hardmtu limit instead of MAXMCLBYTES, > as this seems to be more correct. > > If the hypervisor requests a MTU larger than ETHER_MAX_HARDMTU_LEN, redo > feature negotiation without VIRTIO_NET_F_MTU. > > Input and testing from @helg > > ok? Works in my setups. I tested it on Linux/KVM with the following MTU tag in my config: vio0 at virtio0: 1 queue, address 52:54:00:0a:91:cc vio1 at virtio1: 1 queue, address 52:54:00:2e:69:7a, mtu 1200 Also works without regressions in my vmm(4)/vmd(8) setup. Diff looks fine for me. ok jan@ > diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c > index ae138c0ee59..b2e0435fb85 100644 > --- a/sys/dev/pv/if_vio.c > +++ b/sys/dev/pv/if_vio.c > @@ -294,6 +294,7 @@ struct vio_softc { > struct intrmap *sc_intrmap; > struct vio_queue *sc_q; > uint16_t sc_nqueues; > + uint16_t sc_rxr_lwm; > int sc_tx_slots_per_req; > int sc_rx_mbuf_size; > > @@ -367,7 +368,7 @@ int vio_ctrl_start(struct vio_softc *, uint8_t, uint8_t, int, int *); > int vio_ctrl_submit(struct vio_softc *, int); > void vio_ctrl_finish(struct vio_softc *); > void vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state); > -int vio_alloc_mem(struct vio_softc *, int); > +int vio_alloc_mem(struct vio_softc *, int, bus_size_t); > int vio_alloc_dmamem(struct vio_softc *); > void vio_free_dmamem(struct vio_softc *); > > @@ -466,12 +467,10 @@ vio_free_dmamem(struct vio_softc *sc) > * viq_txmbufs[slot]: mbuf pointer array for sent frames > */ > int > -vio_alloc_mem(struct vio_softc *sc, int tx_max_segments) > +vio_alloc_mem(struct vio_softc *sc, int tx_max_segments, bus_size_t txsize) > { > struct virtio_softc *vsc = sc->sc_virtio; > - struct ifnet *ifp = &sc->sc_ac.ac_if; > size_t allocsize, rxqsize, txqsize, offset = 0; > - bus_size_t txsize; > caddr_t kva; > int i, qidx, r; > > @@ -528,11 +527,6 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments) > } > KASSERT(offset == allocsize); > > - if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO4) || > - virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6)) > - txsize = MAXMCLBYTES + sc->sc_hdr_size + ETHER_HDR_LEN; > - else > - txsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN; > > for (qidx = 0; qidx < sc->sc_nqueues; qidx++) { > struct vio_queue *vioq = &sc->sc_q[qidx]; > @@ -635,7 +629,9 @@ vio_attach(struct device *parent, struct device *self, void *aux) > struct vio_softc *sc = (struct vio_softc *)self; > struct virtio_softc *vsc = (struct virtio_softc *)parent; > struct virtio_attach_args *va = aux; > - int i, r, tx_max_segments, want_tso = 1; > + int i, r, tx_max_segments, want_tso = 1, want_mtu = 1; > + unsigned int device_mtu = 0; > + bus_size_t txsize, rxsize; > struct ifnet *ifp = &sc->sc_ac.ac_if; > > if (vsc->sc_child != NULL) { > @@ -665,6 +661,8 @@ negotiate: > vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4; > vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6; > } > + if (want_mtu) > + vsc->sc_driver_features |= VIRTIO_NET_F_MTU; > if (va->va_nintr > 3 && ncpus > 1) > vsc->sc_driver_features |= VIRTIO_NET_F_MQ; > > @@ -690,6 +688,24 @@ negotiate: > goto negotiate; > } > > + if (virtio_has_feature(vsc, VIRTIO_NET_F_MTU)) { > + device_mtu = virtio_read_device_config_2(vsc, > + VIRTIO_NET_CONFIG_MTU); > + if (device_mtu > ETHER_MAX_HARDMTU_LEN) { > + /* > + * According to the standard we MUST supply rx buffers > + * for the MTU length, but we only support up to > + * ETHER_MAX_HARDMTU_LEN. > + */ > + printf("%s: mtu %u not supported\n", > + sc->sc_dev.dv_xname, device_mtu); > + device_mtu = 0; > + want_mtu = 0; > + virtio_reset(vsc); > + goto negotiate; > + } > + } > + > if (virtio_has_feature(vsc, VIRTIO_NET_F_MQ)) { > i = virtio_read_device_config_2(vsc, > VIRTIO_NET_CONFIG_MAX_QUEUES); > @@ -724,7 +740,7 @@ negotiate: > vio_get_lladdr(&sc->sc_ac, vsc); > else > ether_fakeaddr(ifp); > - printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr)); > + printf(", address %s", ether_sprintf(sc->sc_ac.ac_enaddr)); > > if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF) || > vsc->sc_version_1) { > @@ -757,11 +773,31 @@ negotiate: > sc->sc_rx_mbuf_size = 4 * 1024; > } > > - if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF)) > - ifp->if_hardmtu = MAXMCLBYTES; > - else > - ifp->if_hardmtu = sc->sc_rx_mbuf_size - sc->sc_hdr_size - > - ETHER_HDR_LEN; > + ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; > + if (device_mtu) { > + printf(", mtu %u", device_mtu); > + ifp->if_hardmtu = device_mtu; > + } > + if (!virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF)) { > + if (device_mtu) > + sc->sc_rx_mbuf_size = MAX(sc->sc_rx_mbuf_size, > + device_mtu + sc->sc_hdr_size + ETHER_HDR_LEN); > + ifp->if_hardmtu = MIN(ifp->if_hardmtu, > + sc->sc_rx_mbuf_size - sc->sc_hdr_size - ETHER_HDR_LEN); > + } > + > + txsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN; > + if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO4) || > + virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6)) > + txsize = MAXMCLBYTES + sc->sc_hdr_size; > + > + rxsize = ifp->if_hardmtu + sc->sc_hdr_size + ETHER_HDR_LEN; > + if (virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO4) || > + virtio_has_feature(vsc, VIRTIO_NET_F_GUEST_TSO6)) > + rxsize = MAXMCLBYTES + sc->sc_hdr_size; > + > + printf("\n"); > + sc->sc_rxr_lwm = 2 * howmany(rxsize, sc->sc_rx_mbuf_size); > > /* defrag for longer mbuf chains */ > tx_max_segments = 16; > @@ -864,7 +900,7 @@ negotiate: > } > } > > - if (vio_alloc_mem(sc, tx_max_segments) < 0) > + if (vio_alloc_mem(sc, tx_max_segments, txsize) < 0) > goto err; > > strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ); > @@ -891,6 +927,8 @@ negotiate: > if_attach(ifp); > ether_ifattach(ifp); > vio_link_state(ifp); > + if (device_mtu) > + ifp->if_mtu = device_mtu; > > if_attach_queues(ifp, sc->sc_nqueues); > if_attach_iqueues(ifp, sc->sc_nqueues); > @@ -1025,8 +1063,7 @@ vio_init(struct ifnet *ifp) > struct vio_queue *vioq = &sc->sc_q[qidx]; > > mtx_enter(&vioq->viq_rxmtx); > - if_rxr_init(&vioq->viq_rxring, > - 2 * ((ifp->if_hardmtu / sc->sc_rx_mbuf_size) + 1), > + if_rxr_init(&vioq->viq_rxring, sc->sc_rxr_lwm, > vioq->viq_rxvq->vq_num); > vio_populate_rx_mbufs(sc, vioq); > ifq_clr_oactive(vioq->viq_ifq); >