From: Alexander Bluhm Subject: Re: Don't release NET_LOCK in vio(4) ioctl To: Stefan Fritsch Cc: Mark Kettenis , mvs@openbsd.org, tech@openbsd.org Date: Wed, 15 May 2024 16:53:59 +0200 On Sun, Apr 28, 2024 at 10:23:08AM +0200, Stefan Fritsch wrote: > On Thu, 25 Apr 2024, Mark Kettenis wrote: > > > it seems I went down the wrong rabbit hole with my first diff. The actual > > > issue is that signals are not handled correctly while sleeping. After a > > > signal, there is a race condition where sc_ctrl_inuse is first set to FREE > > > and then the interrupt handler sets it to DONE, causing a hang in the next > > > vio_wait_ctrl() call. This was probably not noticed before NET_LOCK > > > existed, because an ifconfig down/up fixed it. However since the NET_LOCK > > > now serializes the relevant ioctl paths, a parallel ifconfig down now > > > hangs too, waiting for the NET_LOCK. The attached patch does three things: > > > > > > * Revert the NET_LOCK unlocking work-around. > > > > > > * Remove PCATCH from the sleep calls so that we always wait for the control > > > requests to complete, avoiding the race with vio_ctrleof(). > > > > What guarantee is there that those control requests complete? > > in the absence of hypervisor bugs, it should complete. Which probably > means we should not trust it too much. Also, since virtio 1.x, there is a > bit in the device status that the hypervisor can use to tell the guest > that the device needs a reset. > > The attached updated diff now limits the wait on control queue requests to > 5 seconds. If there is a timeout or if the device-needs-reset status bit > is set, the control queue is not tried again until the next ifconfig > down/up. Reading the device status needed a new interface in the transport > drivers. I have a raspi with qemu now, so I could also test the > virtio-mmio part. I have also fixed your and bluhm's other comments. I have tested it with network load on KVM. OK bluhm@ > diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c > index 4d92508f72e..b5746e7ab3b 100644 > --- a/sys/dev/fdt/virtio_mmio.c > +++ b/sys/dev/fdt/virtio_mmio.c > @@ -97,6 +97,7 @@ void virtio_mmio_write_device_config_4(struct virtio_softc *, int, uint32_t); > void virtio_mmio_write_device_config_8(struct virtio_softc *, int, uint64_t); > uint16_t virtio_mmio_read_queue_size(struct virtio_softc *, uint16_t); > void virtio_mmio_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t); > +int virtio_mmio_get_status(struct virtio_softc *); > void virtio_mmio_set_status(struct virtio_softc *, int); > int virtio_mmio_negotiate_features(struct virtio_softc *, > const struct virtio_feature_name *); > @@ -144,6 +145,7 @@ struct virtio_ops virtio_mmio_ops = { > virtio_mmio_write_device_config_8, > virtio_mmio_read_queue_size, > virtio_mmio_setup_queue, > + virtio_mmio_get_status, > virtio_mmio_set_status, > virtio_mmio_negotiate_features, > virtio_mmio_intr, > @@ -194,6 +196,15 @@ virtio_mmio_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq, > } > } > > +int > +virtio_mmio_get_status(struct virtio_softc *vsc) > +{ > + struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; > + > + return bus_space_read_4(sc->sc_iot, sc->sc_ioh, > + VIRTIO_MMIO_STATUS); > +} > + > void > virtio_mmio_set_status(struct virtio_softc *vsc, int status) > { > diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c > index 36a62c13bd1..b60fe740587 100644 > --- a/sys/dev/pci/virtio_pci.c > +++ b/sys/dev/pci/virtio_pci.c > @@ -72,6 +72,7 @@ void virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t); > void virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t); > uint16_t virtio_pci_read_queue_size(struct virtio_softc *, uint16_t); > void virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t); > +int virtio_pci_get_status(struct virtio_softc *); > void virtio_pci_set_status(struct virtio_softc *, int); > int virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *); > int virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *); > @@ -155,6 +156,7 @@ struct virtio_ops virtio_pci_ops = { > virtio_pci_write_device_config_8, > virtio_pci_read_queue_size, > virtio_pci_setup_queue, > + virtio_pci_get_status, > virtio_pci_set_status, > virtio_pci_negotiate_features, > virtio_pci_poll_intr, > @@ -275,6 +277,18 @@ virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq, > } > } > > +int > +virtio_pci_get_status(struct virtio_softc *vsc) > +{ > + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; > + > + if (sc->sc_sc.sc_version_1) > + return CREAD(sc, device_status); > + else > + return bus_space_read_1(sc->sc_iot, sc->sc_ioh, > + VIRTIO_CONFIG_DEVICE_STATUS); > +} > + > void > virtio_pci_set_status(struct virtio_softc *vsc, int status) > { > diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c > index 9ad8e36e346..456e332890a 100644 > --- a/sys/dev/pv/if_vio.c > +++ b/sys/dev/pv/if_vio.c > @@ -252,6 +252,7 @@ struct vio_softc { > #define VIRTIO_NET_TX_MAXNSEGS 16 /* for larger chains, defrag */ > #define VIRTIO_NET_CTRL_MAC_MC_ENTRIES 64 /* for more entries, use ALLMULTI */ > #define VIRTIO_NET_CTRL_MAC_UC_ENTRIES 1 /* one entry for own unicast addr */ > +#define VIRTIO_NET_CTRL_TIMEOUT (5*1000*1000*1000ULL) /* 5 seconds */ > > #define VIO_CTRL_MAC_INFO_SIZE \ > (2*sizeof(struct virtio_net_ctrl_mac_tbl) + \ > @@ -512,6 +513,17 @@ vio_put_lladdr(struct arpcom *ac, struct virtio_softc *vsc) > } > } > > +static int vio_needs_reset(struct vio_softc *sc) > +{ > + if (virtio_get_status(sc->sc_virtio) & > + VIRTIO_CONFIG_DEVICE_STATUS_DEVICE_NEEDS_RESET) { > + printf("%s: device needs reset", sc->sc_dev.dv_xname); > + vio_ctrl_wakeup(sc, RESET); > + return 1; > + } > + return 0; > +} > + > void > vio_attach(struct device *parent, struct device *self, void *aux) > { > @@ -649,6 +661,7 @@ vio_config_change(struct virtio_softc *vsc) > { > struct vio_softc *sc = (struct vio_softc *)vsc->sc_child; > vio_link_state(&sc->sc_ac.ac_if); > + vio_needs_reset(sc); > return 1; > } > > @@ -703,7 +716,7 @@ vio_stop(struct ifnet *ifp, int disable) > virtio_reset(vsc); > vio_rxeof(sc); > if (vsc->sc_nvqs >= 3) > - vio_ctrleof(&sc->sc_vq[VQCTL]); > + vio_ctrl_wakeup(sc, RESET); > vio_tx_drain(sc); > if (disable) > vio_rx_drain(sc); > @@ -714,11 +727,8 @@ vio_stop(struct ifnet *ifp, int disable) > if (vsc->sc_nvqs >= 3) > virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]); > virtio_reinit_end(vsc); > - if (vsc->sc_nvqs >= 3) { > - if (sc->sc_ctrl_inuse != FREE) > - sc->sc_ctrl_inuse = RESET; > - wakeup(&sc->sc_ctrl_inuse); > - } > + if (vsc->sc_nvqs >= 3) > + vio_ctrl_wakeup(sc, FREE); > } > > static inline uint16_t > @@ -1230,6 +1240,9 @@ vio_txeof(struct virtqueue *vq) > int r = 0; > int slot, len; > > + if (!ISSET(ifp->if_flags, IFF_RUNNING)) > + return 0; > + > while (virtio_dequeue(vsc, vq, &slot, &len) == 0) { > struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot]; > r++; > @@ -1363,32 +1376,15 @@ out: > return r; > } > > -/* > - * XXXSMP As long as some per-ifp ioctl(2)s are executed with the > - * NET_LOCK() deadlocks are possible. So release it here. > - */ > -static inline int > -vio_sleep(struct vio_softc *sc, const char *wmesg) > -{ > - int status = rw_status(&netlock); > - > - if (status != RW_WRITE && status != RW_READ) > - return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg, > - INFSLP); > - > - return rwsleep_nsec(&sc->sc_ctrl_inuse, &netlock, PRIBIO|PCATCH, wmesg, > - INFSLP); > -} > - > int > vio_wait_ctrl(struct vio_softc *sc) > { > int r = 0; > > while (sc->sc_ctrl_inuse != FREE) { > - r = vio_sleep(sc, "viowait"); > - if (r == EINTR) > - return r; > + if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc)) > + return ENXIO; > + r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viowait", INFSLP); > } > sc->sc_ctrl_inuse = INUSE; > > @@ -1400,14 +1396,16 @@ vio_wait_ctrl_done(struct vio_softc *sc) > { > int r = 0; > > - while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) { > - if (sc->sc_ctrl_inuse == RESET) { > - r = 1; > - break; > + while (sc->sc_ctrl_inuse != DONE) { > + if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc)) > + return ENXIO; > + r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viodone", > + VIRTIO_NET_CTRL_TIMEOUT); > + if (r == EWOULDBLOCK) { > + printf("%s: ctrl queue timeout", sc->sc_dev.dv_xname); > + vio_ctrl_wakeup(sc, RESET); > + return ENXIO; > } > - r = vio_sleep(sc, "viodone"); > - if (r == EINTR) > - break; > } > return r; > } > diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h > index d12cb2c59b8..ee9dc877bae 100644 > --- a/sys/dev/pv/virtiovar.h > +++ b/sys/dev/pv/virtiovar.h > @@ -154,6 +154,7 @@ struct virtio_ops { > void (*write_dev_cfg_8)(struct virtio_softc *, int, uint64_t); > uint16_t (*read_queue_size)(struct virtio_softc *, uint16_t); > void (*setup_queue)(struct virtio_softc *, struct virtqueue *, uint64_t); > + int (*get_status)(struct virtio_softc *); > void (*set_status)(struct virtio_softc *, int); > int (*neg_features)(struct virtio_softc *, const struct virtio_feature_name *); > int (*poll_intr)(void *); > @@ -197,9 +198,10 @@ struct virtio_softc { > #define virtio_setup_queue(sc, i, v) (sc)->sc_ops->setup_queue(sc, i, v) > #define virtio_negotiate_features(sc, n) (sc)->sc_ops->neg_features(sc, n) > #define virtio_poll_intr(sc) (sc)->sc_ops->poll_intr(sc) > +#define virtio_get_status(sc) (sc)->sc_ops->get_status(sc) > +#define virtio_set_status(sc, i) (sc)->sc_ops->set_status(sc, i) > > /* only for transport drivers */ > -#define virtio_set_status(sc, i) (sc)->sc_ops->set_status(sc, i) > #define virtio_device_reset(sc) virtio_set_status((sc), 0) > > static inline int