Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: Don't release NET_LOCK in vio(4) ioctl
To:
Stefan Fritsch <sf@sfritsch.de>
Cc:
Mark Kettenis <mark.kettenis@xs4all.nl>, mvs@openbsd.org, tech@openbsd.org
Date:
Wed, 15 May 2024 16:53:59 +0200

Download raw body.

Thread
  • Martin Pieuchot:

    Don't release NET_LOCK in vio(4) ioctl

  • On Sun, Apr 28, 2024 at 10:23:08AM +0200, Stefan Fritsch wrote:
    > On Thu, 25 Apr 2024, Mark Kettenis wrote:
    > > > it seems I went down the wrong rabbit hole with my first diff. The actual 
    > > > issue is that signals are not handled correctly while sleeping. After a 
    > > > signal, there is a race condition where sc_ctrl_inuse is first set to FREE 
    > > > and then the interrupt handler sets it to DONE, causing a hang in the next 
    > > > vio_wait_ctrl() call. This was probably not noticed before NET_LOCK 
    > > > existed, because an ifconfig down/up fixed it. However since the NET_LOCK 
    > > > now serializes the relevant ioctl paths, a parallel ifconfig down now 
    > > > hangs too, waiting for the NET_LOCK. The attached patch does three things:
    > > > 
    > > > * Revert the NET_LOCK unlocking work-around.
    > > > 
    > > > * Remove PCATCH from the sleep calls so that we always wait for the control
    > > >   requests to complete, avoiding the race with vio_ctrleof().
    > > 
    > > What guarantee is there that those control requests complete?
    > 
    > in the absence of hypervisor bugs, it should complete. Which probably 
    > means we should not trust it too much. Also, since virtio 1.x, there is a 
    > bit in the device status that the hypervisor can use to tell the guest 
    > that the device needs a reset.
    > 
    > The attached updated diff now limits the wait on control queue requests to 
    > 5 seconds. If there is a timeout or if the device-needs-reset status bit 
    > is set, the control queue is not tried again until the next ifconfig 
    > down/up. Reading the device status needed a new interface in the transport 
    > drivers. I have a raspi with qemu now, so I could also test the 
    > virtio-mmio part. I have also fixed your and bluhm's other comments.
    
    I have tested it with network load on KVM.  OK bluhm@
    
    > diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
    > index 4d92508f72e..b5746e7ab3b 100644
    > --- a/sys/dev/fdt/virtio_mmio.c
    > +++ b/sys/dev/fdt/virtio_mmio.c
    > @@ -97,6 +97,7 @@ void		virtio_mmio_write_device_config_4(struct virtio_softc *, int, uint32_t);
    >  void		virtio_mmio_write_device_config_8(struct virtio_softc *, int, uint64_t);
    >  uint16_t	virtio_mmio_read_queue_size(struct virtio_softc *, uint16_t);
    >  void		virtio_mmio_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
    > +int		virtio_mmio_get_status(struct virtio_softc *);
    >  void		virtio_mmio_set_status(struct virtio_softc *, int);
    >  int		virtio_mmio_negotiate_features(struct virtio_softc *,
    >      const struct virtio_feature_name *);
    > @@ -144,6 +145,7 @@ struct virtio_ops virtio_mmio_ops = {
    >  	virtio_mmio_write_device_config_8,
    >  	virtio_mmio_read_queue_size,
    >  	virtio_mmio_setup_queue,
    > +	virtio_mmio_get_status,
    >  	virtio_mmio_set_status,
    >  	virtio_mmio_negotiate_features,
    >  	virtio_mmio_intr,
    > @@ -194,6 +196,15 @@ virtio_mmio_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
    >  	}
    >  }
    >  
    > +int
    > +virtio_mmio_get_status(struct virtio_softc *vsc)
    > +{
    > +	struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc;
    > +
    > +	return bus_space_read_4(sc->sc_iot, sc->sc_ioh,
    > +	    VIRTIO_MMIO_STATUS);
    > +}
    > +
    >  void
    >  virtio_mmio_set_status(struct virtio_softc *vsc, int status)
    >  {
    > diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
    > index 36a62c13bd1..b60fe740587 100644
    > --- a/sys/dev/pci/virtio_pci.c
    > +++ b/sys/dev/pci/virtio_pci.c
    > @@ -72,6 +72,7 @@ void		virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t);
    >  void		virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t);
    >  uint16_t	virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
    >  void		virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
    > +int		virtio_pci_get_status(struct virtio_softc *);
    >  void		virtio_pci_set_status(struct virtio_softc *, int);
    >  int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *);
    >  int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
    > @@ -155,6 +156,7 @@ struct virtio_ops virtio_pci_ops = {
    >  	virtio_pci_write_device_config_8,
    >  	virtio_pci_read_queue_size,
    >  	virtio_pci_setup_queue,
    > +	virtio_pci_get_status,
    >  	virtio_pci_set_status,
    >  	virtio_pci_negotiate_features,
    >  	virtio_pci_poll_intr,
    > @@ -275,6 +277,18 @@ virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
    >  	}
    >  }
    >  
    > +int
    > +virtio_pci_get_status(struct virtio_softc *vsc)
    > +{
    > +	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
    > +
    > +	if (sc->sc_sc.sc_version_1)
    > +		return CREAD(sc, device_status);
    > +	else
    > +		return bus_space_read_1(sc->sc_iot, sc->sc_ioh,
    > +		    VIRTIO_CONFIG_DEVICE_STATUS);
    > +}
    > +
    >  void
    >  virtio_pci_set_status(struct virtio_softc *vsc, int status)
    >  {
    > diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
    > index 9ad8e36e346..456e332890a 100644
    > --- a/sys/dev/pv/if_vio.c
    > +++ b/sys/dev/pv/if_vio.c
    > @@ -252,6 +252,7 @@ struct vio_softc {
    >  #define VIRTIO_NET_TX_MAXNSEGS		16 /* for larger chains, defrag */
    >  #define VIRTIO_NET_CTRL_MAC_MC_ENTRIES	64 /* for more entries, use ALLMULTI */
    >  #define VIRTIO_NET_CTRL_MAC_UC_ENTRIES	 1 /* one entry for own unicast addr */
    > +#define VIRTIO_NET_CTRL_TIMEOUT		(5*1000*1000*1000ULL) /* 5 seconds */
    >  
    >  #define VIO_CTRL_MAC_INFO_SIZE					\
    >  	(2*sizeof(struct virtio_net_ctrl_mac_tbl) +		\
    > @@ -512,6 +513,17 @@ vio_put_lladdr(struct arpcom *ac, struct virtio_softc *vsc)
    >  	}
    >  }
    >  
    > +static int vio_needs_reset(struct vio_softc *sc)
    > +{
    > +	if (virtio_get_status(sc->sc_virtio) &
    > +	    VIRTIO_CONFIG_DEVICE_STATUS_DEVICE_NEEDS_RESET) {
    > +		printf("%s: device needs reset", sc->sc_dev.dv_xname);
    > +		vio_ctrl_wakeup(sc, RESET);
    > +		return 1;
    > +	}
    > +	return 0;
    > +}
    > +
    >  void
    >  vio_attach(struct device *parent, struct device *self, void *aux)
    >  {
    > @@ -649,6 +661,7 @@ vio_config_change(struct virtio_softc *vsc)
    >  {
    >  	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
    >  	vio_link_state(&sc->sc_ac.ac_if);
    > +	vio_needs_reset(sc);
    >  	return 1;
    >  }
    >  
    > @@ -703,7 +716,7 @@ vio_stop(struct ifnet *ifp, int disable)
    >  	virtio_reset(vsc);
    >  	vio_rxeof(sc);
    >  	if (vsc->sc_nvqs >= 3)
    > -		vio_ctrleof(&sc->sc_vq[VQCTL]);
    > +		vio_ctrl_wakeup(sc, RESET);
    >  	vio_tx_drain(sc);
    >  	if (disable)
    >  		vio_rx_drain(sc);
    > @@ -714,11 +727,8 @@ vio_stop(struct ifnet *ifp, int disable)
    >  	if (vsc->sc_nvqs >= 3)
    >  		virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
    >  	virtio_reinit_end(vsc);
    > -	if (vsc->sc_nvqs >= 3) {
    > -		if (sc->sc_ctrl_inuse != FREE)
    > -			sc->sc_ctrl_inuse = RESET;
    > -		wakeup(&sc->sc_ctrl_inuse);
    > -	}
    > +	if (vsc->sc_nvqs >= 3)
    > +		vio_ctrl_wakeup(sc, FREE);
    >  }
    >  
    >  static inline uint16_t
    > @@ -1230,6 +1240,9 @@ vio_txeof(struct virtqueue *vq)
    >  	int r = 0;
    >  	int slot, len;
    >  
    > +	if (!ISSET(ifp->if_flags, IFF_RUNNING))
    > +		return 0;
    > +
    >  	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
    >  		struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot];
    >  		r++;
    > @@ -1363,32 +1376,15 @@ out:
    >  	return r;
    >  }
    >  
    > -/*
    > - * XXXSMP As long as some per-ifp ioctl(2)s are executed with the
    > - * NET_LOCK() deadlocks are possible.  So release it here.
    > - */
    > -static inline int
    > -vio_sleep(struct vio_softc *sc, const char *wmesg)
    > -{
    > -	int status = rw_status(&netlock);
    > -
    > -	if (status != RW_WRITE && status != RW_READ)
    > -		return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg,
    > -		    INFSLP);
    > -
    > -	return rwsleep_nsec(&sc->sc_ctrl_inuse, &netlock, PRIBIO|PCATCH, wmesg,
    > -	    INFSLP);
    > -}
    > -
    >  int
    >  vio_wait_ctrl(struct vio_softc *sc)
    >  {
    >  	int r = 0;
    >  
    >  	while (sc->sc_ctrl_inuse != FREE) {
    > -		r = vio_sleep(sc, "viowait");
    > -		if (r == EINTR)
    > -			return r;
    > +		if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
    > +			return ENXIO;
    > +		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viowait", INFSLP);
    >  	}
    >  	sc->sc_ctrl_inuse = INUSE;
    >  
    > @@ -1400,14 +1396,16 @@ vio_wait_ctrl_done(struct vio_softc *sc)
    >  {
    >  	int r = 0;
    >  
    > -	while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
    > -		if (sc->sc_ctrl_inuse == RESET) {
    > -			r = 1;
    > -			break;
    > +	while (sc->sc_ctrl_inuse != DONE) {
    > +		if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
    > +			return ENXIO;
    > +		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viodone",
    > +		    VIRTIO_NET_CTRL_TIMEOUT);
    > +		if (r == EWOULDBLOCK) {
    > +			printf("%s: ctrl queue timeout", sc->sc_dev.dv_xname);
    > +			vio_ctrl_wakeup(sc, RESET);
    > +			return ENXIO;
    >  		}
    > -		r = vio_sleep(sc, "viodone");
    > -		if (r == EINTR)
    > -			break;
    >  	}
    >  	return r;
    >  }
    > diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
    > index d12cb2c59b8..ee9dc877bae 100644
    > --- a/sys/dev/pv/virtiovar.h
    > +++ b/sys/dev/pv/virtiovar.h
    > @@ -154,6 +154,7 @@ struct virtio_ops {
    >  	void		(*write_dev_cfg_8)(struct virtio_softc *, int, uint64_t);
    >  	uint16_t	(*read_queue_size)(struct virtio_softc *, uint16_t);
    >  	void		(*setup_queue)(struct virtio_softc *, struct virtqueue *, uint64_t);
    > +	int		(*get_status)(struct virtio_softc *);
    >  	void		(*set_status)(struct virtio_softc *, int);
    >  	int		(*neg_features)(struct virtio_softc *, const struct virtio_feature_name *);
    >  	int		(*poll_intr)(void *);
    > @@ -197,9 +198,10 @@ struct virtio_softc {
    >  #define	virtio_setup_queue(sc, i, v)		(sc)->sc_ops->setup_queue(sc, i, v)
    >  #define	virtio_negotiate_features(sc, n)	(sc)->sc_ops->neg_features(sc, n)
    >  #define	virtio_poll_intr(sc)			(sc)->sc_ops->poll_intr(sc)
    > +#define	virtio_get_status(sc)			(sc)->sc_ops->get_status(sc)
    > +#define	virtio_set_status(sc, i)		(sc)->sc_ops->set_status(sc, i)
    >  
    >  /* only for transport drivers */
    > -#define	virtio_set_status(sc, i)		(sc)->sc_ops->set_status(sc, i)
    >  #define	virtio_device_reset(sc)			virtio_set_status((sc), 0)
    >  
    >  static inline int
    
    
    
  • Martin Pieuchot:

    Don't release NET_LOCK in vio(4) ioctl