Index | Thread | Search

From:
Stefan Fritsch <sf@sfritsch.de>
Subject:
Re: Don't release NET_LOCK in vio(4) ioctl
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
Alexander Bluhm <alexander.bluhm@gmx.net>, tech@openbsd.org
Date:
Thu, 18 Apr 2024 08:24:33 +0200

Download raw body.

Thread
On Wed, 17 Apr 2024, Vitaliy Makkoveev wrote:
> >>> Sleep with netlock held stops packet processing and operations in inet
> >>> sockets. This is much worse when use another lock with netlock release.
> >>> pflowioctl() could be good example.
> >> 
> >> Holding net lock for a long time is not good.  But kernel crashes
> >> are worse.  I prefer short stop of packet processing during ifconfig
> >> over a crash in netinet code.  Here is the original bug report
> >> 
> >> https://marc.info/?l=openbsd-bugs&m=166124228103568&w=2
> >> 
> >> I would try get this in and work on fine grained vio(4) locking
> >> afterwards.
> > 
> > I agree. I have tried with a separate lock that serializes the whole 
> > vio_ioctl function but I still get crashes with that:
> > 
> > db_command_loop() at db_command_loop+0xea
> > db_trap(4,0) at db_trap+0x129
> > db_ktrap(4,0,ffff8000239d0f60) at db_ktrap+0x111
> > kerntrap(ffff8000239d0f60) at kerntrap+0xa7
> > alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
> > rt_ifa_addlocal(ffff8000008d7b00) at rt_ifa_addlocal+0x2a
> > in6_update_ifa(ffff8000000af2a8,ffff8000239d1230,0) at 
> > in6_update_ifa+0x581
> > in6_ifattach_linklocal(ffff8000000af2a8,0) at in6_ifattach_linklocal+0x204
> > in6_ifattach(ffff8000000af2a8) at in6_ifattach+0x103
> > ifioctl(fffffd8051ac7dd8,801169ab,ffff8000239d13f0,ffff8000239834a8) at 
> > ifioctl+0xc99
> > sys_ioctl(ffff8000239834a8,ffff8000239d15a0,ffff8000239d1510) at 
> > sys_ioctl+0x2af
> > syscall(ffff8000239d15a0) at syscall+0x588
> > Xsyscall() at Xsyscall+0x128
> > end of kernel
> > end trace frame: 0x773f17f14c30, count: -13
> > 
> 
> Just for curiosity, can I see your diff?

Sure, attached below. It is inspired by cad(4). It has not seen much 
testing, though.

> 
> > I suspect that the global ifioctl() function needs some changes in order 
> > to allow NIC drivers' ioctl functions to drop the NET_LOCK.
> 
> Periodically, I think about that. At least for some pseudo drivers
> netlock could be avoided.

diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
index 4d92508f72e..24568e0348b 100644
--- a/sys/dev/fdt/virtio_mmio.c
+++ b/sys/dev/fdt/virtio_mmio.c
@@ -101,6 +101,7 @@ void		virtio_mmio_set_status(struct virtio_softc *, int);
 int		virtio_mmio_negotiate_features(struct virtio_softc *,
     const struct virtio_feature_name *);
 int		virtio_mmio_intr(void *);
+void		virtio_mmio_intr_barrier(struct virtio_softc *vsc, int idx);
 
 struct virtio_mmio_softc {
 	struct virtio_softc	sc_sc;
@@ -147,6 +148,7 @@ struct virtio_ops virtio_mmio_ops = {
 	virtio_mmio_set_status,
 	virtio_mmio_negotiate_features,
 	virtio_mmio_intr,
+	virtio_mmio_intr_barrier,
 };
 
 uint16_t
@@ -496,6 +498,15 @@ virtio_mmio_intr(void *arg)
 	return r;
 }
 
+void
+virtio_mmio_intr_barrier(struct virtio_softc *vsc, int idx)
+{
+	struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc;
+
+	if (sc->sc_ih)
+		intr_barrier(sc->sc_ih);
+}
+
 void
 virtio_mmio_kick(struct virtio_softc *vsc, uint16_t idx)
 {
diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
index 36a62c13bd1..30b20029c27 100644
--- a/sys/dev/pci/virtio_pci.c
+++ b/sys/dev/pci/virtio_pci.c
@@ -81,6 +81,7 @@ int		virtio_pci_msix_establish(struct virtio_pci_softc *, struct pci_attach_args
 int		virtio_pci_setup_msix(struct virtio_pci_softc *, struct pci_attach_args *, int);
 void		virtio_pci_free_irqs(struct virtio_pci_softc *);
 int		virtio_pci_poll_intr(void *);
+void		virtio_pci_intr_barrier(struct virtio_softc *, int);
 int		virtio_pci_legacy_intr(void *);
 int		virtio_pci_legacy_intr_mpsafe(void *);
 int		virtio_pci_config_intr(void *);
@@ -158,6 +159,7 @@ struct virtio_ops virtio_pci_ops = {
 	virtio_pci_set_status,
 	virtio_pci_negotiate_features,
 	virtio_pci_poll_intr,
+	virtio_pci_intr_barrier,
 };
 
 static inline
@@ -1112,6 +1114,27 @@ virtio_pci_poll_intr(void *arg)
 	return r;
 }
 
+/*
+ * Call intr_barrier for interrupt of vqueue idx.
+ * idx == -1 for config interrupt.
+ */
+void
+virtio_pci_intr_barrier(struct virtio_softc *vsc, int idx)
+{
+	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+	void *ih = NULL;
+
+	if (idx == -1 || sc->sc_irq_type == IRQ_NO_MSIX)
+		ih = sc->sc_ih[0];
+	else if (sc->sc_irq_type == IRQ_MSIX_SHARED)
+		ih = sc->sc_ih[1];
+	else if (sc->sc_irq_type == IRQ_MSIX_PER_VQ)
+		ih = sc->sc_ih[1 + idx];
+
+	if (ih)
+		intr_barrier(ih);
+}
+
 void
 virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx)
 {
diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
index 9ad8e36e346..b0278051884 100644
--- a/sys/dev/pv/if_vio.c
+++ b/sys/dev/pv/if_vio.c
@@ -193,9 +193,6 @@ struct virtio_net_ctrl_vlan {
 /*
  * if_viovar.h:
  */
-enum vio_ctrl_state {
-	FREE, INUSE, DONE, RESET
-};
 
 struct vio_softc {
 	struct device		sc_dev;
@@ -234,7 +231,8 @@ struct vio_softc {
 	struct mbuf		**sc_tx_mbufs;
 	struct if_rxring	sc_rx_ring;
 
-	enum vio_ctrl_state	sc_ctrl_inuse;
+	int			sc_ctrl_inuse;
+	struct rwlock		sc_cfg_lock;
 
 	struct timeout		sc_txtick, sc_rxtick;
 };
@@ -264,7 +262,7 @@ void	vio_attach(struct device *, struct device *, void *);
 
 /* ifnet interface functions */
 int	vio_init(struct ifnet *);
-void	vio_stop(struct ifnet *, int);
+void	vio_stop(struct ifnet *);
 void	vio_start(struct ifnet *);
 int	vio_ioctl(struct ifnet *, u_long, caddr_t);
 void	vio_get_lladdr(struct arpcom *ac, struct virtio_softc *vsc);
@@ -295,9 +293,7 @@ void	vio_iff(struct vio_softc *);
 int	vio_media_change(struct ifnet *);
 void	vio_media_status(struct ifnet *, struct ifmediareq *);
 int	vio_ctrleof(struct virtqueue *);
-int	vio_wait_ctrl(struct vio_softc *sc);
 int	vio_wait_ctrl_done(struct vio_softc *sc);
-void	vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state);
 int	vio_alloc_mem(struct vio_softc *);
 int	vio_alloc_dmamem(struct vio_softc *);
 void	vio_free_dmamem(struct vio_softc *);
@@ -379,7 +375,7 @@ vio_free_dmamem(struct vio_softc *sc)
  *   sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
  *			 class command (WRITE)
  * sc_ctrl_* structures are allocated only one each; they are protected by
- * sc_ctrl_inuse, which must only be accessed at splnet
+ * sc_cfg_lock
  *
  * metadata headers for received frames are stored at the start of the
  * rx mbufs.
@@ -590,6 +586,7 @@ vio_attach(struct device *parent, struct device *self, void *aux)
 		goto err;
 
 	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
+	rw_init(&sc->sc_cfg_lock, "viocfg");
 	ifp->if_softc = sc;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_start = vio_start;
@@ -678,7 +675,9 @@ vio_init(struct ifnet *ifp)
 {
 	struct vio_softc *sc = ifp->if_softc;
 
-	vio_stop(ifp, 0);
+	rw_assert_wrlock(&sc->sc_cfg_lock);
+
+	vio_stop(ifp);
 	if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
 	    sc->sc_vq[VQRX].vq_num);
 	vio_populate_rx_mbufs(sc);
@@ -690,23 +689,28 @@ vio_init(struct ifnet *ifp)
 }
 
 void
-vio_stop(struct ifnet *ifp, int disable)
+vio_stop(struct ifnet *ifp)
 {
 	struct vio_softc *sc = ifp->if_softc;
 	struct virtio_softc *vsc = sc->sc_virtio;
 
+	rw_assert_wrlock(&sc->sc_cfg_lock);
+
 	timeout_del(&sc->sc_txtick);
 	timeout_del(&sc->sc_rxtick);
 	ifp->if_flags &= ~IFF_RUNNING;
 	ifq_clr_oactive(&ifp->if_snd);
+
+	/* Avoid lock order issues with barriers. */
+        NET_UNLOCK();
+
 	/* only way to stop I/O and DMA is resetting... */
 	virtio_reset(vsc);
-	vio_rxeof(sc);
-	if (vsc->sc_nvqs >= 3)
-		vio_ctrleof(&sc->sc_vq[VQCTL]);
+	virtio_intr_barrier(vsc, VQRX);
+	virtio_intr_barrier(vsc, VQTX);
+	virtio_intr_barrier(vsc, VQCTL);
 	vio_tx_drain(sc);
-	if (disable)
-		vio_rx_drain(sc);
+	vio_rx_drain(sc);
 
 	virtio_reinit_start(vsc);
 	virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
@@ -714,11 +718,7 @@ vio_stop(struct ifnet *ifp, int disable)
 	if (vsc->sc_nvqs >= 3)
 		virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
 	virtio_reinit_end(vsc);
-	if (vsc->sc_nvqs >= 3) {
-		if (sc->sc_ctrl_inuse != FREE)
-			sc->sc_ctrl_inuse = RESET;
-		wakeup(&sc->sc_ctrl_inuse);
-	}
+	NET_LOCK();
 }
 
 static inline uint16_t
@@ -916,9 +916,23 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
 	struct vio_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
-	int s, r = 0;
+	int s, r = 0, netlock_held = 1;
+
+	switch (cmd) {
+	case SIOCGIFMEDIA:
+	case SIOCSIFMEDIA:
+	case SIOCGIFSFFPAGE:
+		netlock_held = 0;
+		break;
+	}
 
+	if (netlock_held)
+		NET_UNLOCK();
+	rw_enter_write(&sc->sc_cfg_lock);
+	if (netlock_held)
+		NET_LOCK();
 	s = splnet();
+
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
@@ -937,7 +951,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 				vio_init(ifp);
 		} else {
 			if (ifp->if_flags & IFF_RUNNING)
-				vio_stop(ifp, 1);
+				vio_stop(ifp);
 		}
 		break;
 	case SIOCGIFMEDIA:
@@ -958,6 +972,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		r = 0;
 	}
 	splx(s);
+	rw_exit_write(&sc->sc_cfg_lock);
 	return r;
 }
 
@@ -1174,7 +1189,7 @@ vio_rxtick(void *arg)
 	splx(s);
 }
 
-/* free all the mbufs; called from if_stop(disable) */
+/* free all the mbufs; called from vio_stop() */
 void
 vio_rx_drain(struct vio_softc *sc)
 {
@@ -1312,9 +1327,6 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
 
 	splassert(IPL_NET);
 
-	if ((r = vio_wait_ctrl(sc)) != 0)
-		return r;
-
 	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
 	sc->sc_ctrl_cmd->command = cmd;
 	sc->sc_ctrl_rx->onoff = onoff;
@@ -1338,6 +1350,7 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
 	    sizeof(*sc->sc_ctrl_rx), 1);
 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
 	    sizeof(*sc->sc_ctrl_status), 0);
+	sc->sc_ctrl_inuse = 1;
 	virtio_enqueue_commit(vsc, vq, slot, 1);
 
 	if ((r = vio_wait_ctrl_done(sc)) != 0)
@@ -1359,7 +1372,6 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
 
 	DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r);
 out:
-	vio_ctrl_wakeup(sc, FREE);
 	return r;
 }
 
@@ -1372,6 +1384,7 @@ vio_sleep(struct vio_softc *sc, const char *wmesg)
 {
 	int status = rw_status(&netlock);
 
+	rw_assert_wrlock(&sc->sc_cfg_lock);
 	if (status != RW_WRITE && status != RW_READ)
 		return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg,
 		    INFSLP);
@@ -1380,31 +1393,12 @@ vio_sleep(struct vio_softc *sc, const char *wmesg)
 	    INFSLP);
 }
 
-int
-vio_wait_ctrl(struct vio_softc *sc)
-{
-	int r = 0;
-
-	while (sc->sc_ctrl_inuse != FREE) {
-		r = vio_sleep(sc, "viowait");
-		if (r == EINTR)
-			return r;
-	}
-	sc->sc_ctrl_inuse = INUSE;
-
-	return r;
-}
-
 int
 vio_wait_ctrl_done(struct vio_softc *sc)
 {
 	int r = 0;
 
-	while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
-		if (sc->sc_ctrl_inuse == RESET) {
-			r = 1;
-			break;
-		}
+	while (sc->sc_ctrl_inuse) {
 		r = vio_sleep(sc, "viodone");
 		if (r == EINTR)
 			break;
@@ -1412,13 +1406,6 @@ vio_wait_ctrl_done(struct vio_softc *sc)
 	return r;
 }
 
-void
-vio_ctrl_wakeup(struct vio_softc *sc, enum vio_ctrl_state new)
-{
-	sc->sc_ctrl_inuse = new;
-	wakeup(&sc->sc_ctrl_inuse);
-}
-
 int
 vio_ctrleof(struct virtqueue *vq)
 {
@@ -1432,7 +1419,8 @@ again:
 		return r;
 	virtio_dequeue_commit(vq, slot);
 	r++;
-	vio_ctrl_wakeup(sc, DONE);
+	sc->sc_ctrl_inuse = 0;
+	wakeup(&sc->sc_ctrl_inuse);
 	if (virtio_start_vq_intr(vsc, vq))
 		goto again;
 
@@ -1450,9 +1438,6 @@ vio_set_rx_filter(struct vio_softc *sc)
 
 	splassert(IPL_NET);
 
-	if ((r = vio_wait_ctrl(sc)) != 0)
-		return r;
-
 	sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
 	sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
 
@@ -1479,6 +1464,7 @@ vio_set_rx_filter(struct vio_softc *sc)
 	    sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN, 1);
 	VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
 	    sizeof(*sc->sc_ctrl_status), 0);
+	sc->sc_ctrl_inuse = 1;
 	virtio_enqueue_commit(vsc, vq, slot, 1);
 
 	if ((r = vio_wait_ctrl_done(sc)) != 0)
@@ -1500,7 +1486,6 @@ vio_set_rx_filter(struct vio_softc *sc)
 	}
 
 out:
-	vio_ctrl_wakeup(sc, FREE);
 	return r;
 }
 
@@ -1516,6 +1501,7 @@ vio_iff(struct vio_softc *sc)
 	int promisc = 0, allmulti = 0, rxfilter = 0;
 	int r;
 
+	rw_assert_wrlock(&sc->sc_cfg_lock);
 	splassert(IPL_NET);
 
 	ifp->if_flags &= ~IFF_ALLMULTI;
diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
index d12cb2c59b8..8cd53714afc 100644
--- a/sys/dev/pv/virtiovar.h
+++ b/sys/dev/pv/virtiovar.h
@@ -157,6 +157,7 @@ struct virtio_ops {
 	void		(*set_status)(struct virtio_softc *, int);
 	int		(*neg_features)(struct virtio_softc *, const struct virtio_feature_name *);
 	int		(*poll_intr)(void *);
+	void		(*intr_barrier)(struct virtio_softc *, int);
 };
 
 #define VIRTIO_CHILD_ERROR	((void*)1)
@@ -197,6 +198,7 @@ struct virtio_softc {
 #define	virtio_setup_queue(sc, i, v)		(sc)->sc_ops->setup_queue(sc, i, v)
 #define	virtio_negotiate_features(sc, n)	(sc)->sc_ops->neg_features(sc, n)
 #define	virtio_poll_intr(sc)			(sc)->sc_ops->poll_intr(sc)
+#define	virtio_intr_barrier(sc, i)		(sc)->sc_ops->intr_barrier(sc, i)
 
 /* only for transport drivers */
 #define	virtio_set_status(sc, i)		(sc)->sc_ops->set_status(sc, i)