Index | Thread | Search

From:
Stefan Fritsch <sf@sfritsch.de>
Subject:
Re: Don't release NET_LOCK in vio(4) ioctl
To:
Mark Kettenis <mark.kettenis@xs4all.nl>
Cc:
bluhm@openbsd.org, mvs@openbsd.org, tech@openbsd.org
Date:
Sun, 28 Apr 2024 10:23:08 +0200

Download raw body.

Thread
On Thu, 25 Apr 2024, Mark Kettenis wrote:
> > it seems I went down the wrong rabbit hole with my first diff. The actual 
> > issue is that signals are not handled correctly while sleeping. After a 
> > signal, there is a race condition where sc_ctrl_inuse is first set to FREE 
> > and then the interrupt handler sets it to DONE, causing a hang in the next 
> > vio_wait_ctrl() call. This was probably not noticed before NET_LOCK 
> > existed, because an ifconfig down/up fixed it. However since the NET_LOCK 
> > now serializes the relevant ioctl paths, a parallel ifconfig down now 
> > hangs too, waiting for the NET_LOCK. The attached patch does three things:
> > 
> > * Revert the NET_LOCK unlocking work-around.
> > 
> > * Remove PCATCH from the sleep calls so that we always wait for the control
> >   requests to complete, avoiding the race with vio_ctrleof().
> 
> What guarantee is there that those control requests complete?

in the absence of hypervisor bugs, it should complete. Which probably 
means we should not trust it too much. Also, since virtio 1.x, there is a 
bit in the device status that the hypervisor can use to tell the guest 
that the device needs a reset.

The attached updated diff now limits the wait on control queue requests to 
5 seconds. If there is a timeout or if the device-needs-reset status bit 
is set, the control queue is not tried again until the next ifconfig 
down/up. Reading the device status needed a new interface in the transport 
drivers. I have a raspi with qemu now, so I could also test the 
virtio-mmio part. I have also fixed your and bluhm's other comments.

diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
index 4d92508f72e..b5746e7ab3b 100644
--- a/sys/dev/fdt/virtio_mmio.c
+++ b/sys/dev/fdt/virtio_mmio.c
@@ -97,6 +97,7 @@ void		virtio_mmio_write_device_config_4(struct virtio_softc *, int, uint32_t);
 void		virtio_mmio_write_device_config_8(struct virtio_softc *, int, uint64_t);
 uint16_t	virtio_mmio_read_queue_size(struct virtio_softc *, uint16_t);
 void		virtio_mmio_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
+int		virtio_mmio_get_status(struct virtio_softc *);
 void		virtio_mmio_set_status(struct virtio_softc *, int);
 int		virtio_mmio_negotiate_features(struct virtio_softc *,
     const struct virtio_feature_name *);
@@ -144,6 +145,7 @@ struct virtio_ops virtio_mmio_ops = {
 	virtio_mmio_write_device_config_8,
 	virtio_mmio_read_queue_size,
 	virtio_mmio_setup_queue,
+	virtio_mmio_get_status,
 	virtio_mmio_set_status,
 	virtio_mmio_negotiate_features,
 	virtio_mmio_intr,
@@ -194,6 +196,15 @@ virtio_mmio_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
 	}
 }
 
+int
+virtio_mmio_get_status(struct virtio_softc *vsc)
+{
+	struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc;
+
+	return bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+	    VIRTIO_MMIO_STATUS);
+}
+
 void
 virtio_mmio_set_status(struct virtio_softc *vsc, int status)
 {
diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
index 36a62c13bd1..b60fe740587 100644
--- a/sys/dev/pci/virtio_pci.c
+++ b/sys/dev/pci/virtio_pci.c
@@ -72,6 +72,7 @@ void		virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t);
 void		virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t);
 uint16_t	virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
 void		virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
+int		virtio_pci_get_status(struct virtio_softc *);
 void		virtio_pci_set_status(struct virtio_softc *, int);
 int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *);
 int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
@@ -155,6 +156,7 @@ struct virtio_ops virtio_pci_ops = {
 	virtio_pci_write_device_config_8,
 	virtio_pci_read_queue_size,
 	virtio_pci_setup_queue,
+	virtio_pci_get_status,
 	virtio_pci_set_status,
 	virtio_pci_negotiate_features,
 	virtio_pci_poll_intr,
@@ -275,6 +277,18 @@ virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
 	}
 }
 
+int
+virtio_pci_get_status(struct virtio_softc *vsc)
+{
+	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+
+	if (sc->sc_sc.sc_version_1)
+		return CREAD(sc, device_status);
+	else
+		return bus_space_read_1(sc->sc_iot, sc->sc_ioh,
+		    VIRTIO_CONFIG_DEVICE_STATUS);
+}
+
 void
 virtio_pci_set_status(struct virtio_softc *vsc, int status)
 {
diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
index 9ad8e36e346..456e332890a 100644
--- a/sys/dev/pv/if_vio.c
+++ b/sys/dev/pv/if_vio.c
@@ -252,6 +252,7 @@ struct vio_softc {
 #define VIRTIO_NET_TX_MAXNSEGS		16 /* for larger chains, defrag */
 #define VIRTIO_NET_CTRL_MAC_MC_ENTRIES	64 /* for more entries, use ALLMULTI */
 #define VIRTIO_NET_CTRL_MAC_UC_ENTRIES	 1 /* one entry for own unicast addr */
+#define VIRTIO_NET_CTRL_TIMEOUT		(5*1000*1000*1000ULL) /* 5 seconds */
 
 #define VIO_CTRL_MAC_INFO_SIZE					\
 	(2*sizeof(struct virtio_net_ctrl_mac_tbl) +		\
@@ -512,6 +513,17 @@ vio_put_lladdr(struct arpcom *ac, struct virtio_softc *vsc)
 	}
 }
 
+static int vio_needs_reset(struct vio_softc *sc)
+{
+	if (virtio_get_status(sc->sc_virtio) &
+	    VIRTIO_CONFIG_DEVICE_STATUS_DEVICE_NEEDS_RESET) {
+		printf("%s: device needs reset", sc->sc_dev.dv_xname);
+		vio_ctrl_wakeup(sc, RESET);
+		return 1;
+	}
+	return 0;
+}
+
 void
 vio_attach(struct device *parent, struct device *self, void *aux)
 {
@@ -649,6 +661,7 @@ vio_config_change(struct virtio_softc *vsc)
 {
 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
 	vio_link_state(&sc->sc_ac.ac_if);
+	vio_needs_reset(sc);
 	return 1;
 }
 
@@ -703,7 +716,7 @@ vio_stop(struct ifnet *ifp, int disable)
 	virtio_reset(vsc);
 	vio_rxeof(sc);
 	if (vsc->sc_nvqs >= 3)
-		vio_ctrleof(&sc->sc_vq[VQCTL]);
+		vio_ctrl_wakeup(sc, RESET);
 	vio_tx_drain(sc);
 	if (disable)
 		vio_rx_drain(sc);
@@ -714,11 +727,8 @@ vio_stop(struct ifnet *ifp, int disable)
 	if (vsc->sc_nvqs >= 3)
 		virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
 	virtio_reinit_end(vsc);
-	if (vsc->sc_nvqs >= 3) {
-		if (sc->sc_ctrl_inuse != FREE)
-			sc->sc_ctrl_inuse = RESET;
-		wakeup(&sc->sc_ctrl_inuse);
-	}
+	if (vsc->sc_nvqs >= 3)
+		vio_ctrl_wakeup(sc, FREE);
 }
 
 static inline uint16_t
@@ -1230,6 +1240,9 @@ vio_txeof(struct virtqueue *vq)
 	int r = 0;
 	int slot, len;
 
+	if (!ISSET(ifp->if_flags, IFF_RUNNING))
+		return 0;
+
 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
 		struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot];
 		r++;
@@ -1363,32 +1376,15 @@ out:
 	return r;
 }
 
-/*
- * XXXSMP As long as some per-ifp ioctl(2)s are executed with the
- * NET_LOCK() deadlocks are possible.  So release it here.
- */
-static inline int
-vio_sleep(struct vio_softc *sc, const char *wmesg)
-{
-	int status = rw_status(&netlock);
-
-	if (status != RW_WRITE && status != RW_READ)
-		return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg,
-		    INFSLP);
-
-	return rwsleep_nsec(&sc->sc_ctrl_inuse, &netlock, PRIBIO|PCATCH, wmesg,
-	    INFSLP);
-}
-
 int
 vio_wait_ctrl(struct vio_softc *sc)
 {
 	int r = 0;
 
 	while (sc->sc_ctrl_inuse != FREE) {
-		r = vio_sleep(sc, "viowait");
-		if (r == EINTR)
-			return r;
+		if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
+			return ENXIO;
+		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viowait", INFSLP);
 	}
 	sc->sc_ctrl_inuse = INUSE;
 
@@ -1400,14 +1396,16 @@ vio_wait_ctrl_done(struct vio_softc *sc)
 {
 	int r = 0;
 
-	while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
-		if (sc->sc_ctrl_inuse == RESET) {
-			r = 1;
-			break;
+	while (sc->sc_ctrl_inuse != DONE) {
+		if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
+			return ENXIO;
+		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viodone",
+		    VIRTIO_NET_CTRL_TIMEOUT);
+		if (r == EWOULDBLOCK) {
+			printf("%s: ctrl queue timeout", sc->sc_dev.dv_xname);
+			vio_ctrl_wakeup(sc, RESET);
+			return ENXIO;
 		}
-		r = vio_sleep(sc, "viodone");
-		if (r == EINTR)
-			break;
 	}
 	return r;
 }
diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
index d12cb2c59b8..ee9dc877bae 100644
--- a/sys/dev/pv/virtiovar.h
+++ b/sys/dev/pv/virtiovar.h
@@ -154,6 +154,7 @@ struct virtio_ops {
 	void		(*write_dev_cfg_8)(struct virtio_softc *, int, uint64_t);
 	uint16_t	(*read_queue_size)(struct virtio_softc *, uint16_t);
 	void		(*setup_queue)(struct virtio_softc *, struct virtqueue *, uint64_t);
+	int		(*get_status)(struct virtio_softc *);
 	void		(*set_status)(struct virtio_softc *, int);
 	int		(*neg_features)(struct virtio_softc *, const struct virtio_feature_name *);
 	int		(*poll_intr)(void *);
@@ -197,9 +198,10 @@ struct virtio_softc {
 #define	virtio_setup_queue(sc, i, v)		(sc)->sc_ops->setup_queue(sc, i, v)
 #define	virtio_negotiate_features(sc, n)	(sc)->sc_ops->neg_features(sc, n)
 #define	virtio_poll_intr(sc)			(sc)->sc_ops->poll_intr(sc)
+#define	virtio_get_status(sc)			(sc)->sc_ops->get_status(sc)
+#define	virtio_set_status(sc, i)		(sc)->sc_ops->set_status(sc, i)
 
 /* only for transport drivers */
-#define	virtio_set_status(sc, i)		(sc)->sc_ops->set_status(sc, i)
 #define	virtio_device_reset(sc)			virtio_set_status((sc), 0)
 
 static inline int