From: Tobias Heider Subject: Re: vionet: fix feature negotiation with Linux 6.17 To: Dave Voutila Cc: tech@openbsd.org, mlarkin@openbsd.org, sf@openbsd.org Date: Mon, 20 Oct 2025 00:56:36 +0200 On Sun, Oct 19, 2025 at 01:38:44PM -0400, Dave Voutila wrote: > Tobias Heider writes: > > > After upgrading my Linux kernel vionet stopped working. > > It looks like a recent kernel change exposes a bug in our feature negotiation > > code. > > > > The responsible Linux change seems to be: > > https://github.com/torvalds/linux/commit/69b9461512246599ed80cf13358e7e6aff7285f9 > > > > This extends the feature bits to 128 meaning Linux will query > > device_feature_select 0-3 instead of 0-1 as it did previously. > > > > Ah, yeah...this is why on the vionet_cfg_write() side I had it > explicitly ignore values > 1. sf@ warned me this day was coming. > > It's really fun that virtio doesn't allow for a way to communicate how > many bits it supports. > > > In vionet_cfg_read we only handle device_feature_seelect 0 and 1 in the > > VIO1_PCI_DEVICE_FEATURE case and (falsely) return -1 on higher values. > > This translates to fffffffff which Linux interprets as all features are > > supported. > > > > mea culpa... I approached this more like an actual PCI device. We should > be defaulting to zero here, even for unsupported registers. > > Unfortunately, this means the issue is more widespread as this pattern > is used in multiple devices. > > > I hacked some prints into the Linux kernel to show what happens on the > > driver side: > > Oct 19 12:38:44 ubuntu kernel: vp_modern_get_extended_features: 0:0020 > > Oct 19 12:38:44 ubuntu kernel: vp_modern_get_extended_features: 1:0001 > > Oct 19 12:38:44 ubuntu kernel: vp_modern_get_extended_features: 2:ffffffff > > Oct 19 12:38:44 ubuntu kernel: vp_modern_get_extended_features: 3:ffffffff > > Oct 19 12:38:44 ubuntu kernel: vp_modern_get_extended_features: 00000020ffffffff > > Oct 19 12:38:44 ubuntu kernel: vp_modern_set_extended_features: 000000200000001e > > Oct 19 12:38:44 ubuntu kernel: vp_modern_set_extended_features: 0:0020 > > Oct 19 12:38:44 ubuntu kernel: vp_modern_set_extended_features: 1:0001 > > Oct 19 12:38:44 ubuntu kernel: vp_modern_set_extended_features: 2:001e > > Oct 19 12:38:44 ubuntu kernel: vp_modern_set_extended_features: 3:0000 > > > > Linux as a consequence enables GSO on the driver side causing it to send a > > larger than expected vionet header which vmd can't handle and causes a bunch > > of extra null bytes in front of each sent network frame. > > > > The is changing the default return value to 0 in vionet_cfg_read as is the > > case in virtio.c:virtio_io_cfg(). As far as I can tell all the error cases > > are essentially meant to be no-ops, so I don't see when we would ever want > > to return -1. > > > > We really need to make this change in many places, not just > vionet, for correctness. Makes sense. All of those look ok to me. > > The one area I'm not 100% confident in is the read of the notification > register. It's undefined behavior AFAICT if the driver *reads* the > notification register. Returning (uint32_t)(-1) make sense for PCI, but > this isn't actually a PCI device. Should we switch that to 0 as well? > > sf@ any thoughts here? > > > diff refs/heads/master refs/heads/tobhe-vionet > commit - c35d419de8268874d06c8b777c19e93d64ac1cdf > commit + 4e340e0a2d87004c02c9241226070261cce9c578 > blob - 534289578e269f40c194681761279ad631c608d6 > blob + d9abdf58a46f7b9419feb8ad5f7b10b1131b8b37 > --- usr.sbin/vmd/vioblk.c > +++ usr.sbin/vmd/vioblk.c > @@ -558,7 +558,7 @@ vioblk_write(struct virtio_dev *dev, struct viodev_msg > static uint32_t > vioblk_read(struct virtio_dev *dev, struct viodev_msg *msg, int *deassert) > { > - uint32_t data = (uint32_t)(-1); > + uint32_t data = 0; > uint16_t reg = msg->reg; > uint8_t sz = msg->io_sz; > > @@ -588,7 +588,7 @@ static uint32_t > vioblk_dev_read(struct virtio_dev *dev, struct viodev_msg *msg) > { > struct vioblk_dev *vioblk = (struct vioblk_dev *)&dev->vioblk; > - uint32_t data = (uint32_t)(-1); > + uint32_t data = 0; > uint16_t reg = msg->reg; > uint8_t sz = msg->io_sz; > > @@ -627,7 +627,6 @@ vioblk_dev_read(struct virtio_dev *dev, struct viodev_ > break; > default: > log_warnx("%s: invalid register 0x%04x", __func__, reg); > - return (uint32_t)(-1); > } > > return (data); > blob - 6eb013e48bf23565b1e57abf5b2d039ec073783f > blob + 22ec82e24bab649d5877d9b3b5b5281a2d01fa4a > --- usr.sbin/vmd/vionet.c > +++ usr.sbin/vmd/vionet.c > @@ -980,7 +980,7 @@ static uint32_t > vionet_cfg_read(struct virtio_dev *dev, struct viodev_msg *msg) > { > struct virtio_pci_common_cfg *pci_cfg = &dev->pci_cfg; > - uint32_t data = (uint32_t)(-1); > + uint32_t data = 0; > uint16_t reg = msg->reg & 0x00FF; > > pthread_rwlock_rdlock(&lock); > @@ -1272,7 +1272,7 @@ vionet_cfg_write(struct virtio_dev *dev, struct viodev > static uint32_t > vionet_read(struct virtio_dev *dev, struct viodev_msg *msg, int *deassert) > { > - uint32_t data = (uint32_t)(-1); > + uint32_t data = 0; > uint16_t reg = msg->reg; > > switch (reg & 0xFF00) { > @@ -1284,6 +1284,7 @@ vionet_read(struct virtio_dev *dev, struct viodev_msg > break; > case VIO1_NOTIFY_BAR_OFFSET: > /* Reads of notify register return all 1's. */ > + data = (uint32_t)(-1); > break; > case VIO1_ISR_BAR_OFFSET: > pthread_rwlock_wrlock(&lock); > @@ -1326,7 +1327,7 @@ static uint32_t > vionet_dev_read(struct virtio_dev *dev, struct viodev_msg *msg) > { > struct vionet_dev *vionet = (struct vionet_dev *)&dev->vionet; > - uint32_t data = (uint32_t)(-1); > + uint32_t data = 0; > uint16_t reg = msg->reg & 0xFF; > > switch (reg) { > @@ -1340,7 +1341,6 @@ vionet_dev_read(struct virtio_dev *dev, struct viodev_ > break; > default: > log_warnx("%s: invalid register 0x%04x", __func__, reg); > - return (uint32_t)(-1); > } > > return (data); > blob - c73c26257429504895fb58d02e7977e0be2da755 > blob + 7df482f914006fcab90889f314cad87af50083d1 > --- usr.sbin/vmd/vioscsi.c > +++ usr.sbin/vmd/vioscsi.c > @@ -1812,7 +1812,6 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint > break; > default: > log_warnx("%s: invalid register 0x%04x", __func__, reg); > - *data = (uint32_t)(-1); > } > }