Download raw body.
Don't release NET_LOCK in vio(4) ioctl
On Thu, 25 Apr 2024, Mark Kettenis wrote:
> > it seems I went down the wrong rabbit hole with my first diff. The actual
> > issue is that signals are not handled correctly while sleeping. After a
> > signal, there is a race condition where sc_ctrl_inuse is first set to FREE
> > and then the interrupt handler sets it to DONE, causing a hang in the next
> > vio_wait_ctrl() call. This was probably not noticed before NET_LOCK
> > existed, because an ifconfig down/up fixed it. However since the NET_LOCK
> > now serializes the relevant ioctl paths, a parallel ifconfig down now
> > hangs too, waiting for the NET_LOCK. The attached patch does three things:
> >
> > * Revert the NET_LOCK unlocking work-around.
> >
> > * Remove PCATCH from the sleep calls so that we always wait for the control
> > requests to complete, avoiding the race with vio_ctrleof().
>
> What guarantee is there that those control requests complete?
in the absence of hypervisor bugs, it should complete. Which probably
means we should not trust it too much. Also, since virtio 1.x, there is a
bit in the device status that the hypervisor can use to tell the guest
that the device needs a reset.
The attached updated diff now limits the wait on control queue requests to
5 seconds. If there is a timeout or if the device-needs-reset status bit
is set, the control queue is not tried again until the next ifconfig
down/up. Reading the device status needed a new interface in the transport
drivers. I have a raspi with qemu now, so I could also test the
virtio-mmio part. I have also fixed your and bluhm's other comments.
diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
index 4d92508f72e..b5746e7ab3b 100644
--- a/sys/dev/fdt/virtio_mmio.c
+++ b/sys/dev/fdt/virtio_mmio.c
@@ -97,6 +97,7 @@ void virtio_mmio_write_device_config_4(struct virtio_softc *, int, uint32_t);
void virtio_mmio_write_device_config_8(struct virtio_softc *, int, uint64_t);
uint16_t virtio_mmio_read_queue_size(struct virtio_softc *, uint16_t);
void virtio_mmio_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
+int virtio_mmio_get_status(struct virtio_softc *);
void virtio_mmio_set_status(struct virtio_softc *, int);
int virtio_mmio_negotiate_features(struct virtio_softc *,
const struct virtio_feature_name *);
@@ -144,6 +145,7 @@ struct virtio_ops virtio_mmio_ops = {
virtio_mmio_write_device_config_8,
virtio_mmio_read_queue_size,
virtio_mmio_setup_queue,
+ virtio_mmio_get_status,
virtio_mmio_set_status,
virtio_mmio_negotiate_features,
virtio_mmio_intr,
@@ -194,6 +196,15 @@ virtio_mmio_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
}
}
+int
+virtio_mmio_get_status(struct virtio_softc *vsc)
+{
+ struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc;
+
+ return bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+ VIRTIO_MMIO_STATUS);
+}
+
void
virtio_mmio_set_status(struct virtio_softc *vsc, int status)
{
diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
index 36a62c13bd1..b60fe740587 100644
--- a/sys/dev/pci/virtio_pci.c
+++ b/sys/dev/pci/virtio_pci.c
@@ -72,6 +72,7 @@ void virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t);
void virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t);
uint16_t virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
void virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
+int virtio_pci_get_status(struct virtio_softc *);
void virtio_pci_set_status(struct virtio_softc *, int);
int virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *);
int virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
@@ -155,6 +156,7 @@ struct virtio_ops virtio_pci_ops = {
virtio_pci_write_device_config_8,
virtio_pci_read_queue_size,
virtio_pci_setup_queue,
+ virtio_pci_get_status,
virtio_pci_set_status,
virtio_pci_negotiate_features,
virtio_pci_poll_intr,
@@ -275,6 +277,18 @@ virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
}
}
+int
+virtio_pci_get_status(struct virtio_softc *vsc)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+
+ if (sc->sc_sc.sc_version_1)
+ return CREAD(sc, device_status);
+ else
+ return bus_space_read_1(sc->sc_iot, sc->sc_ioh,
+ VIRTIO_CONFIG_DEVICE_STATUS);
+}
+
void
virtio_pci_set_status(struct virtio_softc *vsc, int status)
{
diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
index 9ad8e36e346..456e332890a 100644
--- a/sys/dev/pv/if_vio.c
+++ b/sys/dev/pv/if_vio.c
@@ -252,6 +252,7 @@ struct vio_softc {
#define VIRTIO_NET_TX_MAXNSEGS 16 /* for larger chains, defrag */
#define VIRTIO_NET_CTRL_MAC_MC_ENTRIES 64 /* for more entries, use ALLMULTI */
#define VIRTIO_NET_CTRL_MAC_UC_ENTRIES 1 /* one entry for own unicast addr */
+#define VIRTIO_NET_CTRL_TIMEOUT (5*1000*1000*1000ULL) /* 5 seconds */
#define VIO_CTRL_MAC_INFO_SIZE \
(2*sizeof(struct virtio_net_ctrl_mac_tbl) + \
@@ -512,6 +513,17 @@ vio_put_lladdr(struct arpcom *ac, struct virtio_softc *vsc)
}
}
+static int vio_needs_reset(struct vio_softc *sc)
+{
+ if (virtio_get_status(sc->sc_virtio) &
+ VIRTIO_CONFIG_DEVICE_STATUS_DEVICE_NEEDS_RESET) {
+ printf("%s: device needs reset", sc->sc_dev.dv_xname);
+ vio_ctrl_wakeup(sc, RESET);
+ return 1;
+ }
+ return 0;
+}
+
void
vio_attach(struct device *parent, struct device *self, void *aux)
{
@@ -649,6 +661,7 @@ vio_config_change(struct virtio_softc *vsc)
{
struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
vio_link_state(&sc->sc_ac.ac_if);
+ vio_needs_reset(sc);
return 1;
}
@@ -703,7 +716,7 @@ vio_stop(struct ifnet *ifp, int disable)
virtio_reset(vsc);
vio_rxeof(sc);
if (vsc->sc_nvqs >= 3)
- vio_ctrleof(&sc->sc_vq[VQCTL]);
+ vio_ctrl_wakeup(sc, RESET);
vio_tx_drain(sc);
if (disable)
vio_rx_drain(sc);
@@ -714,11 +727,8 @@ vio_stop(struct ifnet *ifp, int disable)
if (vsc->sc_nvqs >= 3)
virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
virtio_reinit_end(vsc);
- if (vsc->sc_nvqs >= 3) {
- if (sc->sc_ctrl_inuse != FREE)
- sc->sc_ctrl_inuse = RESET;
- wakeup(&sc->sc_ctrl_inuse);
- }
+ if (vsc->sc_nvqs >= 3)
+ vio_ctrl_wakeup(sc, FREE);
}
static inline uint16_t
@@ -1230,6 +1240,9 @@ vio_txeof(struct virtqueue *vq)
int r = 0;
int slot, len;
+ if (!ISSET(ifp->if_flags, IFF_RUNNING))
+ return 0;
+
while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot];
r++;
@@ -1363,32 +1376,15 @@ out:
return r;
}
-/*
- * XXXSMP As long as some per-ifp ioctl(2)s are executed with the
- * NET_LOCK() deadlocks are possible. So release it here.
- */
-static inline int
-vio_sleep(struct vio_softc *sc, const char *wmesg)
-{
- int status = rw_status(&netlock);
-
- if (status != RW_WRITE && status != RW_READ)
- return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg,
- INFSLP);
-
- return rwsleep_nsec(&sc->sc_ctrl_inuse, &netlock, PRIBIO|PCATCH, wmesg,
- INFSLP);
-}
-
int
vio_wait_ctrl(struct vio_softc *sc)
{
int r = 0;
while (sc->sc_ctrl_inuse != FREE) {
- r = vio_sleep(sc, "viowait");
- if (r == EINTR)
- return r;
+ if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
+ return ENXIO;
+ r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viowait", INFSLP);
}
sc->sc_ctrl_inuse = INUSE;
@@ -1400,14 +1396,16 @@ vio_wait_ctrl_done(struct vio_softc *sc)
{
int r = 0;
- while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
- if (sc->sc_ctrl_inuse == RESET) {
- r = 1;
- break;
+ while (sc->sc_ctrl_inuse != DONE) {
+ if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
+ return ENXIO;
+ r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viodone",
+ VIRTIO_NET_CTRL_TIMEOUT);
+ if (r == EWOULDBLOCK) {
+ printf("%s: ctrl queue timeout", sc->sc_dev.dv_xname);
+ vio_ctrl_wakeup(sc, RESET);
+ return ENXIO;
}
- r = vio_sleep(sc, "viodone");
- if (r == EINTR)
- break;
}
return r;
}
diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
index d12cb2c59b8..ee9dc877bae 100644
--- a/sys/dev/pv/virtiovar.h
+++ b/sys/dev/pv/virtiovar.h
@@ -154,6 +154,7 @@ struct virtio_ops {
void (*write_dev_cfg_8)(struct virtio_softc *, int, uint64_t);
uint16_t (*read_queue_size)(struct virtio_softc *, uint16_t);
void (*setup_queue)(struct virtio_softc *, struct virtqueue *, uint64_t);
+ int (*get_status)(struct virtio_softc *);
void (*set_status)(struct virtio_softc *, int);
int (*neg_features)(struct virtio_softc *, const struct virtio_feature_name *);
int (*poll_intr)(void *);
@@ -197,9 +198,10 @@ struct virtio_softc {
#define virtio_setup_queue(sc, i, v) (sc)->sc_ops->setup_queue(sc, i, v)
#define virtio_negotiate_features(sc, n) (sc)->sc_ops->neg_features(sc, n)
#define virtio_poll_intr(sc) (sc)->sc_ops->poll_intr(sc)
+#define virtio_get_status(sc) (sc)->sc_ops->get_status(sc)
+#define virtio_set_status(sc, i) (sc)->sc_ops->set_status(sc, i)
/* only for transport drivers */
-#define virtio_set_status(sc, i) (sc)->sc_ops->set_status(sc, i)
#define virtio_device_reset(sc) virtio_set_status((sc), 0)
static inline int
Don't release NET_LOCK in vio(4) ioctl