From: Stefan Fritsch Subject: Re: Don't release NET_LOCK in vio(4) ioctl To: Vitaliy Makkoveev Cc: Alexander Bluhm , tech@openbsd.org Date: Thu, 18 Apr 2024 08:24:33 +0200 On Wed, 17 Apr 2024, Vitaliy Makkoveev wrote: > >>> Sleep with netlock held stops packet processing and operations in inet > >>> sockets. This is much worse when use another lock with netlock release. > >>> pflowioctl() could be good example. > >> > >> Holding net lock for a long time is not good. But kernel crashes > >> are worse. I prefer short stop of packet processing during ifconfig > >> over a crash in netinet code. Here is the original bug report > >> > >> https://marc.info/?l=openbsd-bugs&m=166124228103568&w=2 > >> > >> I would try get this in and work on fine grained vio(4) locking > >> afterwards. > > > > I agree. I have tried with a separate lock that serializes the whole > > vio_ioctl function but I still get crashes with that: > > > > db_command_loop() at db_command_loop+0xea > > db_trap(4,0) at db_trap+0x129 > > db_ktrap(4,0,ffff8000239d0f60) at db_ktrap+0x111 > > kerntrap(ffff8000239d0f60) at kerntrap+0xa7 > > alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b > > rt_ifa_addlocal(ffff8000008d7b00) at rt_ifa_addlocal+0x2a > > in6_update_ifa(ffff8000000af2a8,ffff8000239d1230,0) at > > in6_update_ifa+0x581 > > in6_ifattach_linklocal(ffff8000000af2a8,0) at in6_ifattach_linklocal+0x204 > > in6_ifattach(ffff8000000af2a8) at in6_ifattach+0x103 > > ifioctl(fffffd8051ac7dd8,801169ab,ffff8000239d13f0,ffff8000239834a8) at > > ifioctl+0xc99 > > sys_ioctl(ffff8000239834a8,ffff8000239d15a0,ffff8000239d1510) at > > sys_ioctl+0x2af > > syscall(ffff8000239d15a0) at syscall+0x588 > > Xsyscall() at Xsyscall+0x128 > > end of kernel > > end trace frame: 0x773f17f14c30, count: -13 > > > > Just for curiosity, can I see your diff? Sure, attached below. It is inspired by cad(4). It has not seen much testing, though. > > > I suspect that the global ifioctl() function needs some changes in order > > to allow NIC drivers' ioctl functions to drop the NET_LOCK. > > Periodically, I think about that. At least for some pseudo drivers > netlock could be avoided. diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c index 4d92508f72e..24568e0348b 100644 --- a/sys/dev/fdt/virtio_mmio.c +++ b/sys/dev/fdt/virtio_mmio.c @@ -101,6 +101,7 @@ void virtio_mmio_set_status(struct virtio_softc *, int); int virtio_mmio_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *); int virtio_mmio_intr(void *); +void virtio_mmio_intr_barrier(struct virtio_softc *vsc, int idx); struct virtio_mmio_softc { struct virtio_softc sc_sc; @@ -147,6 +148,7 @@ struct virtio_ops virtio_mmio_ops = { virtio_mmio_set_status, virtio_mmio_negotiate_features, virtio_mmio_intr, + virtio_mmio_intr_barrier, }; uint16_t @@ -496,6 +498,15 @@ virtio_mmio_intr(void *arg) return r; } +void +virtio_mmio_intr_barrier(struct virtio_softc *vsc, int idx) +{ + struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc; + + if (sc->sc_ih) + intr_barrier(sc->sc_ih); +} + void virtio_mmio_kick(struct virtio_softc *vsc, uint16_t idx) { diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c index 36a62c13bd1..30b20029c27 100644 --- a/sys/dev/pci/virtio_pci.c +++ b/sys/dev/pci/virtio_pci.c @@ -81,6 +81,7 @@ int virtio_pci_msix_establish(struct virtio_pci_softc *, struct pci_attach_args int virtio_pci_setup_msix(struct virtio_pci_softc *, struct pci_attach_args *, int); void virtio_pci_free_irqs(struct virtio_pci_softc *); int virtio_pci_poll_intr(void *); +void virtio_pci_intr_barrier(struct virtio_softc *, int); int virtio_pci_legacy_intr(void *); int virtio_pci_legacy_intr_mpsafe(void *); int virtio_pci_config_intr(void *); @@ -158,6 +159,7 @@ struct virtio_ops virtio_pci_ops = { virtio_pci_set_status, virtio_pci_negotiate_features, virtio_pci_poll_intr, + virtio_pci_intr_barrier, }; static inline @@ -1112,6 +1114,27 @@ virtio_pci_poll_intr(void *arg) return r; } +/* + * Call intr_barrier for interrupt of vqueue idx. + * idx == -1 for config interrupt. + */ +void +virtio_pci_intr_barrier(struct virtio_softc *vsc, int idx) +{ + struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc; + void *ih = NULL; + + if (idx == -1 || sc->sc_irq_type == IRQ_NO_MSIX) + ih = sc->sc_ih[0]; + else if (sc->sc_irq_type == IRQ_MSIX_SHARED) + ih = sc->sc_ih[1]; + else if (sc->sc_irq_type == IRQ_MSIX_PER_VQ) + ih = sc->sc_ih[1 + idx]; + + if (ih) + intr_barrier(ih); +} + void virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx) { diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c index 9ad8e36e346..b0278051884 100644 --- a/sys/dev/pv/if_vio.c +++ b/sys/dev/pv/if_vio.c @@ -193,9 +193,6 @@ struct virtio_net_ctrl_vlan { /* * if_viovar.h: */ -enum vio_ctrl_state { - FREE, INUSE, DONE, RESET -}; struct vio_softc { struct device sc_dev; @@ -234,7 +231,8 @@ struct vio_softc { struct mbuf **sc_tx_mbufs; struct if_rxring sc_rx_ring; - enum vio_ctrl_state sc_ctrl_inuse; + int sc_ctrl_inuse; + struct rwlock sc_cfg_lock; struct timeout sc_txtick, sc_rxtick; }; @@ -264,7 +262,7 @@ void vio_attach(struct device *, struct device *, void *); /* ifnet interface functions */ int vio_init(struct ifnet *); -void vio_stop(struct ifnet *, int); +void vio_stop(struct ifnet *); void vio_start(struct ifnet *); int vio_ioctl(struct ifnet *, u_long, caddr_t); void vio_get_lladdr(struct arpcom *ac, struct virtio_softc *vsc); @@ -295,9 +293,7 @@ void vio_iff(struct vio_softc *); int vio_media_change(struct ifnet *); void vio_media_status(struct ifnet *, struct ifmediareq *); int vio_ctrleof(struct virtqueue *); -int vio_wait_ctrl(struct vio_softc *sc); int vio_wait_ctrl_done(struct vio_softc *sc); -void vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state); int vio_alloc_mem(struct vio_softc *); int vio_alloc_dmamem(struct vio_softc *); void vio_free_dmamem(struct vio_softc *); @@ -379,7 +375,7 @@ vio_free_dmamem(struct vio_softc *sc) * sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC * class command (WRITE) * sc_ctrl_* structures are allocated only one each; they are protected by - * sc_ctrl_inuse, which must only be accessed at splnet + * sc_cfg_lock * * metadata headers for received frames are stored at the start of the * rx mbufs. @@ -590,6 +586,7 @@ vio_attach(struct device *parent, struct device *self, void *aux) goto err; strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ); + rw_init(&sc->sc_cfg_lock, "viocfg"); ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_start = vio_start; @@ -678,7 +675,9 @@ vio_init(struct ifnet *ifp) { struct vio_softc *sc = ifp->if_softc; - vio_stop(ifp, 0); + rw_assert_wrlock(&sc->sc_cfg_lock); + + vio_stop(ifp); if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1), sc->sc_vq[VQRX].vq_num); vio_populate_rx_mbufs(sc); @@ -690,23 +689,28 @@ vio_init(struct ifnet *ifp) } void -vio_stop(struct ifnet *ifp, int disable) +vio_stop(struct ifnet *ifp) { struct vio_softc *sc = ifp->if_softc; struct virtio_softc *vsc = sc->sc_virtio; + rw_assert_wrlock(&sc->sc_cfg_lock); + timeout_del(&sc->sc_txtick); timeout_del(&sc->sc_rxtick); ifp->if_flags &= ~IFF_RUNNING; ifq_clr_oactive(&ifp->if_snd); + + /* Avoid lock order issues with barriers. */ + NET_UNLOCK(); + /* only way to stop I/O and DMA is resetting... */ virtio_reset(vsc); - vio_rxeof(sc); - if (vsc->sc_nvqs >= 3) - vio_ctrleof(&sc->sc_vq[VQCTL]); + virtio_intr_barrier(vsc, VQRX); + virtio_intr_barrier(vsc, VQTX); + virtio_intr_barrier(vsc, VQCTL); vio_tx_drain(sc); - if (disable) - vio_rx_drain(sc); + vio_rx_drain(sc); virtio_reinit_start(vsc); virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]); @@ -714,11 +718,7 @@ vio_stop(struct ifnet *ifp, int disable) if (vsc->sc_nvqs >= 3) virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]); virtio_reinit_end(vsc); - if (vsc->sc_nvqs >= 3) { - if (sc->sc_ctrl_inuse != FREE) - sc->sc_ctrl_inuse = RESET; - wakeup(&sc->sc_ctrl_inuse); - } + NET_LOCK(); } static inline uint16_t @@ -916,9 +916,23 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct vio_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; - int s, r = 0; + int s, r = 0, netlock_held = 1; + + switch (cmd) { + case SIOCGIFMEDIA: + case SIOCSIFMEDIA: + case SIOCGIFSFFPAGE: + netlock_held = 0; + break; + } + if (netlock_held) + NET_UNLOCK(); + rw_enter_write(&sc->sc_cfg_lock); + if (netlock_held) + NET_LOCK(); s = splnet(); + switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; @@ -937,7 +951,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) vio_init(ifp); } else { if (ifp->if_flags & IFF_RUNNING) - vio_stop(ifp, 1); + vio_stop(ifp); } break; case SIOCGIFMEDIA: @@ -958,6 +972,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) r = 0; } splx(s); + rw_exit_write(&sc->sc_cfg_lock); return r; } @@ -1174,7 +1189,7 @@ vio_rxtick(void *arg) splx(s); } -/* free all the mbufs; called from if_stop(disable) */ +/* free all the mbufs; called from vio_stop() */ void vio_rx_drain(struct vio_softc *sc) { @@ -1312,9 +1327,6 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff) splassert(IPL_NET); - if ((r = vio_wait_ctrl(sc)) != 0) - return r; - sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX; sc->sc_ctrl_cmd->command = cmd; sc->sc_ctrl_rx->onoff = onoff; @@ -1338,6 +1350,7 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff) sizeof(*sc->sc_ctrl_rx), 1); VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status, sizeof(*sc->sc_ctrl_status), 0); + sc->sc_ctrl_inuse = 1; virtio_enqueue_commit(vsc, vq, slot, 1); if ((r = vio_wait_ctrl_done(sc)) != 0) @@ -1359,7 +1372,6 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff) DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r); out: - vio_ctrl_wakeup(sc, FREE); return r; } @@ -1372,6 +1384,7 @@ vio_sleep(struct vio_softc *sc, const char *wmesg) { int status = rw_status(&netlock); + rw_assert_wrlock(&sc->sc_cfg_lock); if (status != RW_WRITE && status != RW_READ) return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg, INFSLP); @@ -1380,31 +1393,12 @@ vio_sleep(struct vio_softc *sc, const char *wmesg) INFSLP); } -int -vio_wait_ctrl(struct vio_softc *sc) -{ - int r = 0; - - while (sc->sc_ctrl_inuse != FREE) { - r = vio_sleep(sc, "viowait"); - if (r == EINTR) - return r; - } - sc->sc_ctrl_inuse = INUSE; - - return r; -} - int vio_wait_ctrl_done(struct vio_softc *sc) { int r = 0; - while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) { - if (sc->sc_ctrl_inuse == RESET) { - r = 1; - break; - } + while (sc->sc_ctrl_inuse) { r = vio_sleep(sc, "viodone"); if (r == EINTR) break; @@ -1412,13 +1406,6 @@ vio_wait_ctrl_done(struct vio_softc *sc) return r; } -void -vio_ctrl_wakeup(struct vio_softc *sc, enum vio_ctrl_state new) -{ - sc->sc_ctrl_inuse = new; - wakeup(&sc->sc_ctrl_inuse); -} - int vio_ctrleof(struct virtqueue *vq) { @@ -1432,7 +1419,8 @@ again: return r; virtio_dequeue_commit(vq, slot); r++; - vio_ctrl_wakeup(sc, DONE); + sc->sc_ctrl_inuse = 0; + wakeup(&sc->sc_ctrl_inuse); if (virtio_start_vq_intr(vsc, vq)) goto again; @@ -1450,9 +1438,6 @@ vio_set_rx_filter(struct vio_softc *sc) splassert(IPL_NET); - if ((r = vio_wait_ctrl(sc)) != 0) - return r; - sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC; sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET; @@ -1479,6 +1464,7 @@ vio_set_rx_filter(struct vio_softc *sc) sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN, 1); VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status, sizeof(*sc->sc_ctrl_status), 0); + sc->sc_ctrl_inuse = 1; virtio_enqueue_commit(vsc, vq, slot, 1); if ((r = vio_wait_ctrl_done(sc)) != 0) @@ -1500,7 +1486,6 @@ vio_set_rx_filter(struct vio_softc *sc) } out: - vio_ctrl_wakeup(sc, FREE); return r; } @@ -1516,6 +1501,7 @@ vio_iff(struct vio_softc *sc) int promisc = 0, allmulti = 0, rxfilter = 0; int r; + rw_assert_wrlock(&sc->sc_cfg_lock); splassert(IPL_NET); ifp->if_flags &= ~IFF_ALLMULTI; diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h index d12cb2c59b8..8cd53714afc 100644 --- a/sys/dev/pv/virtiovar.h +++ b/sys/dev/pv/virtiovar.h @@ -157,6 +157,7 @@ struct virtio_ops { void (*set_status)(struct virtio_softc *, int); int (*neg_features)(struct virtio_softc *, const struct virtio_feature_name *); int (*poll_intr)(void *); + void (*intr_barrier)(struct virtio_softc *, int); }; #define VIRTIO_CHILD_ERROR ((void*)1) @@ -197,6 +198,7 @@ struct virtio_softc { #define virtio_setup_queue(sc, i, v) (sc)->sc_ops->setup_queue(sc, i, v) #define virtio_negotiate_features(sc, n) (sc)->sc_ops->neg_features(sc, n) #define virtio_poll_intr(sc) (sc)->sc_ops->poll_intr(sc) +#define virtio_intr_barrier(sc, i) (sc)->sc_ops->intr_barrier(sc, i) /* only for transport drivers */ #define virtio_set_status(sc, i) (sc)->sc_ops->set_status(sc, i)