Index | Thread | Search

From:
Dave Voutila <dv@openbsd.org>
Subject:
Re: vionet: fix feature negotiation with Linux 6.17
To:
Tobias Heider <tobias.heider@stusta.de>
Cc:
tech@openbsd.org, dv@openbsd.org, mlarkin@openbsd.org, sf@openbsd.org
Date:
Sun, 19 Oct 2025 13:38:44 -0400

Download raw body.

Thread
Tobias Heider <tobias.heider@stusta.de> writes:

> After upgrading my Linux kernel vionet stopped working.
> It looks like a recent kernel change exposes a bug in our feature negotiation
> code.
>
> The responsible Linux change seems to be:
> https://github.com/torvalds/linux/commit/69b9461512246599ed80cf13358e7e6aff7285f9
>
> This extends the feature bits to 128 meaning Linux will query
> device_feature_select 0-3 instead of 0-1 as it did previously.
>

Ah, yeah...this is why on the vionet_cfg_write() side I had it
explicitly ignore values > 1. sf@ warned me this day was coming.

It's really fun that virtio doesn't allow for a way to communicate how
many bits it supports.

> In vionet_cfg_read we only handle device_feature_seelect 0 and 1 in the
> VIO1_PCI_DEVICE_FEATURE case and (falsely) return -1 on higher values.
> This translates to fffffffff which Linux interprets as all features are
> supported.
>

mea culpa... I approached this more like an actual PCI device. We should
be defaulting to zero here, even for unsupported registers.

Unfortunately, this means the issue is more widespread as this pattern
is used in multiple devices.

> I hacked some prints into the Linux kernel to show what happens on the
> driver side:
> Oct 19 12:38:44 ubuntu kernel: vp_modern_get_extended_features: 0:0020
> Oct 19 12:38:44 ubuntu kernel: vp_modern_get_extended_features: 1:0001
> Oct 19 12:38:44 ubuntu kernel: vp_modern_get_extended_features: 2:ffffffff
> Oct 19 12:38:44 ubuntu kernel: vp_modern_get_extended_features: 3:ffffffff
> Oct 19 12:38:44 ubuntu kernel: vp_modern_get_extended_features: 00000020ffffffff
> Oct 19 12:38:44 ubuntu kernel: vp_modern_set_extended_features: 000000200000001e
> Oct 19 12:38:44 ubuntu kernel: vp_modern_set_extended_features: 0:0020
> Oct 19 12:38:44 ubuntu kernel: vp_modern_set_extended_features: 1:0001
> Oct 19 12:38:44 ubuntu kernel: vp_modern_set_extended_features: 2:001e
> Oct 19 12:38:44 ubuntu kernel: vp_modern_set_extended_features: 3:0000
>
> Linux as a consequence enables GSO on the driver side causing it to send a
> larger than expected vionet header which vmd can't handle and causes a bunch
> of extra null bytes in front of each sent network frame.
>
> The is changing the default return value to 0 in vionet_cfg_read as is the
> case in virtio.c:virtio_io_cfg(). As far as I can tell all the error cases
> are essentially meant to be no-ops, so I don't see when we would ever want
> to return -1.
>

We really need to make this change in many places, not just
vionet, for correctness.

The one area I'm not 100% confident in is the read of the notification
register. It's undefined behavior AFAICT if the driver *reads* the
notification register. Returning (uint32_t)(-1) make sense for PCI, but
this isn't actually a PCI device. Should we switch that to 0 as well?

sf@ any thoughts here?


diff refs/heads/master refs/heads/tobhe-vionet
commit - c35d419de8268874d06c8b777c19e93d64ac1cdf
commit + 4e340e0a2d87004c02c9241226070261cce9c578
blob - 534289578e269f40c194681761279ad631c608d6
blob + d9abdf58a46f7b9419feb8ad5f7b10b1131b8b37
--- usr.sbin/vmd/vioblk.c
+++ usr.sbin/vmd/vioblk.c
@@ -558,7 +558,7 @@ vioblk_write(struct virtio_dev *dev, struct viodev_msg
 static uint32_t
 vioblk_read(struct virtio_dev *dev, struct viodev_msg *msg, int *deassert)
 {
-	uint32_t data = (uint32_t)(-1);
+	uint32_t data = 0;
 	uint16_t reg = msg->reg;
 	uint8_t sz = msg->io_sz;

@@ -588,7 +588,7 @@ static uint32_t
 vioblk_dev_read(struct virtio_dev *dev, struct viodev_msg *msg)
 {
 	struct vioblk_dev *vioblk = (struct vioblk_dev *)&dev->vioblk;
-	uint32_t data = (uint32_t)(-1);
+	uint32_t data = 0;
 	uint16_t reg = msg->reg;
 	uint8_t sz = msg->io_sz;

@@ -627,7 +627,6 @@ vioblk_dev_read(struct virtio_dev *dev, struct viodev_
 		break;
 	default:
 		log_warnx("%s: invalid register 0x%04x", __func__, reg);
-		return (uint32_t)(-1);
 	}

 	return (data);
blob - 6eb013e48bf23565b1e57abf5b2d039ec073783f
blob + 22ec82e24bab649d5877d9b3b5b5281a2d01fa4a
--- usr.sbin/vmd/vionet.c
+++ usr.sbin/vmd/vionet.c
@@ -980,7 +980,7 @@ static uint32_t
 vionet_cfg_read(struct virtio_dev *dev, struct viodev_msg *msg)
 {
 	struct virtio_pci_common_cfg *pci_cfg = &dev->pci_cfg;
-	uint32_t data = (uint32_t)(-1);
+	uint32_t data = 0;
 	uint16_t reg = msg->reg & 0x00FF;

 	pthread_rwlock_rdlock(&lock);
@@ -1272,7 +1272,7 @@ vionet_cfg_write(struct virtio_dev *dev, struct viodev
 static uint32_t
 vionet_read(struct virtio_dev *dev, struct viodev_msg *msg, int *deassert)
 {
-	uint32_t data = (uint32_t)(-1);
+	uint32_t data = 0;
 	uint16_t reg = msg->reg;

 	switch (reg & 0xFF00) {
@@ -1284,6 +1284,7 @@ vionet_read(struct virtio_dev *dev, struct viodev_msg
 		break;
 	case VIO1_NOTIFY_BAR_OFFSET:
 		/* Reads of notify register return all 1's. */
+		data = (uint32_t)(-1);
 		break;
 	case VIO1_ISR_BAR_OFFSET:
 		pthread_rwlock_wrlock(&lock);
@@ -1326,7 +1327,7 @@ static uint32_t
 vionet_dev_read(struct virtio_dev *dev, struct viodev_msg *msg)
 {
 	struct vionet_dev *vionet = (struct vionet_dev *)&dev->vionet;
-	uint32_t data = (uint32_t)(-1);
+	uint32_t data = 0;
 	uint16_t reg = msg->reg & 0xFF;

 	switch (reg) {
@@ -1340,7 +1341,6 @@ vionet_dev_read(struct virtio_dev *dev, struct viodev_
 		break;
 	default:
 		log_warnx("%s: invalid register 0x%04x", __func__, reg);
-		return (uint32_t)(-1);
 	}

 	return (data);
blob - c73c26257429504895fb58d02e7977e0be2da755
blob + 7df482f914006fcab90889f314cad87af50083d1
--- usr.sbin/vmd/vioscsi.c
+++ usr.sbin/vmd/vioscsi.c
@@ -1812,7 +1812,6 @@ vioscsi_io(int dir, uint16_t reg, uint32_t *data, uint
 			break;
 		default:
 			log_warnx("%s: invalid register 0x%04x", __func__, reg);
-			*data = (uint32_t)(-1);
 		}
 	}