Download raw body.
Don't release NET_LOCK in vio(4) ioctl
On Wed, 17 Apr 2024, Vitaliy Makkoveev wrote:
> >>> Sleep with netlock held stops packet processing and operations in inet
> >>> sockets. This is much worse when use another lock with netlock release.
> >>> pflowioctl() could be good example.
> >>
> >> Holding net lock for a long time is not good. But kernel crashes
> >> are worse. I prefer short stop of packet processing during ifconfig
> >> over a crash in netinet code. Here is the original bug report
> >>
> >> https://marc.info/?l=openbsd-bugs&m=166124228103568&w=2
> >>
> >> I would try get this in and work on fine grained vio(4) locking
> >> afterwards.
> >
> > I agree. I have tried with a separate lock that serializes the whole
> > vio_ioctl function but I still get crashes with that:
> >
> > db_command_loop() at db_command_loop+0xea
> > db_trap(4,0) at db_trap+0x129
> > db_ktrap(4,0,ffff8000239d0f60) at db_ktrap+0x111
> > kerntrap(ffff8000239d0f60) at kerntrap+0xa7
> > alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
> > rt_ifa_addlocal(ffff8000008d7b00) at rt_ifa_addlocal+0x2a
> > in6_update_ifa(ffff8000000af2a8,ffff8000239d1230,0) at
> > in6_update_ifa+0x581
> > in6_ifattach_linklocal(ffff8000000af2a8,0) at in6_ifattach_linklocal+0x204
> > in6_ifattach(ffff8000000af2a8) at in6_ifattach+0x103
> > ifioctl(fffffd8051ac7dd8,801169ab,ffff8000239d13f0,ffff8000239834a8) at
> > ifioctl+0xc99
> > sys_ioctl(ffff8000239834a8,ffff8000239d15a0,ffff8000239d1510) at
> > sys_ioctl+0x2af
> > syscall(ffff8000239d15a0) at syscall+0x588
> > Xsyscall() at Xsyscall+0x128
> > end of kernel
> > end trace frame: 0x773f17f14c30, count: -13
> >
>
> Just for curiosity, can I see your diff?
Sure, attached below. It is inspired by cad(4). It has not seen much
testing, though.
>
> > I suspect that the global ifioctl() function needs some changes in order
> > to allow NIC drivers' ioctl functions to drop the NET_LOCK.
>
> Periodically, I think about that. At least for some pseudo drivers
> netlock could be avoided.
diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
index 4d92508f72e..24568e0348b 100644
--- a/sys/dev/fdt/virtio_mmio.c
+++ b/sys/dev/fdt/virtio_mmio.c
@@ -101,6 +101,7 @@ void virtio_mmio_set_status(struct virtio_softc *, int);
int virtio_mmio_negotiate_features(struct virtio_softc *,
const struct virtio_feature_name *);
int virtio_mmio_intr(void *);
+void virtio_mmio_intr_barrier(struct virtio_softc *vsc, int idx);
struct virtio_mmio_softc {
struct virtio_softc sc_sc;
@@ -147,6 +148,7 @@ struct virtio_ops virtio_mmio_ops = {
virtio_mmio_set_status,
virtio_mmio_negotiate_features,
virtio_mmio_intr,
+ virtio_mmio_intr_barrier,
};
uint16_t
@@ -496,6 +498,15 @@ virtio_mmio_intr(void *arg)
return r;
}
+void
+virtio_mmio_intr_barrier(struct virtio_softc *vsc, int idx)
+{
+ struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc;
+
+ if (sc->sc_ih)
+ intr_barrier(sc->sc_ih);
+}
+
void
virtio_mmio_kick(struct virtio_softc *vsc, uint16_t idx)
{
diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
index 36a62c13bd1..30b20029c27 100644
--- a/sys/dev/pci/virtio_pci.c
+++ b/sys/dev/pci/virtio_pci.c
@@ -81,6 +81,7 @@ int virtio_pci_msix_establish(struct virtio_pci_softc *, struct pci_attach_args
int virtio_pci_setup_msix(struct virtio_pci_softc *, struct pci_attach_args *, int);
void virtio_pci_free_irqs(struct virtio_pci_softc *);
int virtio_pci_poll_intr(void *);
+void virtio_pci_intr_barrier(struct virtio_softc *, int);
int virtio_pci_legacy_intr(void *);
int virtio_pci_legacy_intr_mpsafe(void *);
int virtio_pci_config_intr(void *);
@@ -158,6 +159,7 @@ struct virtio_ops virtio_pci_ops = {
virtio_pci_set_status,
virtio_pci_negotiate_features,
virtio_pci_poll_intr,
+ virtio_pci_intr_barrier,
};
static inline
@@ -1112,6 +1114,27 @@ virtio_pci_poll_intr(void *arg)
return r;
}
+/*
+ * Call intr_barrier for interrupt of vqueue idx.
+ * idx == -1 for config interrupt.
+ */
+void
+virtio_pci_intr_barrier(struct virtio_softc *vsc, int idx)
+{
+ struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+ void *ih = NULL;
+
+ if (idx == -1 || sc->sc_irq_type == IRQ_NO_MSIX)
+ ih = sc->sc_ih[0];
+ else if (sc->sc_irq_type == IRQ_MSIX_SHARED)
+ ih = sc->sc_ih[1];
+ else if (sc->sc_irq_type == IRQ_MSIX_PER_VQ)
+ ih = sc->sc_ih[1 + idx];
+
+ if (ih)
+ intr_barrier(ih);
+}
+
void
virtio_pci_kick(struct virtio_softc *vsc, uint16_t idx)
{
diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
index 9ad8e36e346..b0278051884 100644
--- a/sys/dev/pv/if_vio.c
+++ b/sys/dev/pv/if_vio.c
@@ -193,9 +193,6 @@ struct virtio_net_ctrl_vlan {
/*
* if_viovar.h:
*/
-enum vio_ctrl_state {
- FREE, INUSE, DONE, RESET
-};
struct vio_softc {
struct device sc_dev;
@@ -234,7 +231,8 @@ struct vio_softc {
struct mbuf **sc_tx_mbufs;
struct if_rxring sc_rx_ring;
- enum vio_ctrl_state sc_ctrl_inuse;
+ int sc_ctrl_inuse;
+ struct rwlock sc_cfg_lock;
struct timeout sc_txtick, sc_rxtick;
};
@@ -264,7 +262,7 @@ void vio_attach(struct device *, struct device *, void *);
/* ifnet interface functions */
int vio_init(struct ifnet *);
-void vio_stop(struct ifnet *, int);
+void vio_stop(struct ifnet *);
void vio_start(struct ifnet *);
int vio_ioctl(struct ifnet *, u_long, caddr_t);
void vio_get_lladdr(struct arpcom *ac, struct virtio_softc *vsc);
@@ -295,9 +293,7 @@ void vio_iff(struct vio_softc *);
int vio_media_change(struct ifnet *);
void vio_media_status(struct ifnet *, struct ifmediareq *);
int vio_ctrleof(struct virtqueue *);
-int vio_wait_ctrl(struct vio_softc *sc);
int vio_wait_ctrl_done(struct vio_softc *sc);
-void vio_ctrl_wakeup(struct vio_softc *, enum vio_ctrl_state);
int vio_alloc_mem(struct vio_softc *);
int vio_alloc_dmamem(struct vio_softc *);
void vio_free_dmamem(struct vio_softc *);
@@ -379,7 +375,7 @@ vio_free_dmamem(struct vio_softc *sc)
* sc_ctrl_mac_tbl_mc: multicast MAC address filter for a VIRTIO_NET_CTRL_MAC
* class command (WRITE)
* sc_ctrl_* structures are allocated only one each; they are protected by
- * sc_ctrl_inuse, which must only be accessed at splnet
+ * sc_cfg_lock
*
* metadata headers for received frames are stored at the start of the
* rx mbufs.
@@ -590,6 +586,7 @@ vio_attach(struct device *parent, struct device *self, void *aux)
goto err;
strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
+ rw_init(&sc->sc_cfg_lock, "viocfg");
ifp->if_softc = sc;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_start = vio_start;
@@ -678,7 +675,9 @@ vio_init(struct ifnet *ifp)
{
struct vio_softc *sc = ifp->if_softc;
- vio_stop(ifp, 0);
+ rw_assert_wrlock(&sc->sc_cfg_lock);
+
+ vio_stop(ifp);
if_rxr_init(&sc->sc_rx_ring, 2 * ((ifp->if_hardmtu / MCLBYTES) + 1),
sc->sc_vq[VQRX].vq_num);
vio_populate_rx_mbufs(sc);
@@ -690,23 +689,28 @@ vio_init(struct ifnet *ifp)
}
void
-vio_stop(struct ifnet *ifp, int disable)
+vio_stop(struct ifnet *ifp)
{
struct vio_softc *sc = ifp->if_softc;
struct virtio_softc *vsc = sc->sc_virtio;
+ rw_assert_wrlock(&sc->sc_cfg_lock);
+
timeout_del(&sc->sc_txtick);
timeout_del(&sc->sc_rxtick);
ifp->if_flags &= ~IFF_RUNNING;
ifq_clr_oactive(&ifp->if_snd);
+
+ /* Avoid lock order issues with barriers. */
+ NET_UNLOCK();
+
/* only way to stop I/O and DMA is resetting... */
virtio_reset(vsc);
- vio_rxeof(sc);
- if (vsc->sc_nvqs >= 3)
- vio_ctrleof(&sc->sc_vq[VQCTL]);
+ virtio_intr_barrier(vsc, VQRX);
+ virtio_intr_barrier(vsc, VQTX);
+ virtio_intr_barrier(vsc, VQCTL);
vio_tx_drain(sc);
- if (disable)
- vio_rx_drain(sc);
+ vio_rx_drain(sc);
virtio_reinit_start(vsc);
virtio_start_vq_intr(vsc, &sc->sc_vq[VQRX]);
@@ -714,11 +718,7 @@ vio_stop(struct ifnet *ifp, int disable)
if (vsc->sc_nvqs >= 3)
virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
virtio_reinit_end(vsc);
- if (vsc->sc_nvqs >= 3) {
- if (sc->sc_ctrl_inuse != FREE)
- sc->sc_ctrl_inuse = RESET;
- wakeup(&sc->sc_ctrl_inuse);
- }
+ NET_LOCK();
}
static inline uint16_t
@@ -916,9 +916,23 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct vio_softc *sc = ifp->if_softc;
struct ifreq *ifr = (struct ifreq *)data;
- int s, r = 0;
+ int s, r = 0, netlock_held = 1;
+
+ switch (cmd) {
+ case SIOCGIFMEDIA:
+ case SIOCSIFMEDIA:
+ case SIOCGIFSFFPAGE:
+ netlock_held = 0;
+ break;
+ }
+ if (netlock_held)
+ NET_UNLOCK();
+ rw_enter_write(&sc->sc_cfg_lock);
+ if (netlock_held)
+ NET_LOCK();
s = splnet();
+
switch (cmd) {
case SIOCSIFADDR:
ifp->if_flags |= IFF_UP;
@@ -937,7 +951,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
vio_init(ifp);
} else {
if (ifp->if_flags & IFF_RUNNING)
- vio_stop(ifp, 1);
+ vio_stop(ifp);
}
break;
case SIOCGIFMEDIA:
@@ -958,6 +972,7 @@ vio_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
r = 0;
}
splx(s);
+ rw_exit_write(&sc->sc_cfg_lock);
return r;
}
@@ -1174,7 +1189,7 @@ vio_rxtick(void *arg)
splx(s);
}
-/* free all the mbufs; called from if_stop(disable) */
+/* free all the mbufs; called from vio_stop() */
void
vio_rx_drain(struct vio_softc *sc)
{
@@ -1312,9 +1327,6 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
splassert(IPL_NET);
- if ((r = vio_wait_ctrl(sc)) != 0)
- return r;
-
sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_RX;
sc->sc_ctrl_cmd->command = cmd;
sc->sc_ctrl_rx->onoff = onoff;
@@ -1338,6 +1350,7 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
sizeof(*sc->sc_ctrl_rx), 1);
VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
sizeof(*sc->sc_ctrl_status), 0);
+ sc->sc_ctrl_inuse = 1;
virtio_enqueue_commit(vsc, vq, slot, 1);
if ((r = vio_wait_ctrl_done(sc)) != 0)
@@ -1359,7 +1372,6 @@ vio_ctrl_rx(struct vio_softc *sc, int cmd, int onoff)
DPRINTF("%s: cmd %d %d: %d\n", __func__, cmd, (int)onoff, r);
out:
- vio_ctrl_wakeup(sc, FREE);
return r;
}
@@ -1372,6 +1384,7 @@ vio_sleep(struct vio_softc *sc, const char *wmesg)
{
int status = rw_status(&netlock);
+ rw_assert_wrlock(&sc->sc_cfg_lock);
if (status != RW_WRITE && status != RW_READ)
return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg,
INFSLP);
@@ -1380,31 +1393,12 @@ vio_sleep(struct vio_softc *sc, const char *wmesg)
INFSLP);
}
-int
-vio_wait_ctrl(struct vio_softc *sc)
-{
- int r = 0;
-
- while (sc->sc_ctrl_inuse != FREE) {
- r = vio_sleep(sc, "viowait");
- if (r == EINTR)
- return r;
- }
- sc->sc_ctrl_inuse = INUSE;
-
- return r;
-}
-
int
vio_wait_ctrl_done(struct vio_softc *sc)
{
int r = 0;
- while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
- if (sc->sc_ctrl_inuse == RESET) {
- r = 1;
- break;
- }
+ while (sc->sc_ctrl_inuse) {
r = vio_sleep(sc, "viodone");
if (r == EINTR)
break;
@@ -1412,13 +1406,6 @@ vio_wait_ctrl_done(struct vio_softc *sc)
return r;
}
-void
-vio_ctrl_wakeup(struct vio_softc *sc, enum vio_ctrl_state new)
-{
- sc->sc_ctrl_inuse = new;
- wakeup(&sc->sc_ctrl_inuse);
-}
-
int
vio_ctrleof(struct virtqueue *vq)
{
@@ -1432,7 +1419,8 @@ again:
return r;
virtio_dequeue_commit(vq, slot);
r++;
- vio_ctrl_wakeup(sc, DONE);
+ sc->sc_ctrl_inuse = 0;
+ wakeup(&sc->sc_ctrl_inuse);
if (virtio_start_vq_intr(vsc, vq))
goto again;
@@ -1450,9 +1438,6 @@ vio_set_rx_filter(struct vio_softc *sc)
splassert(IPL_NET);
- if ((r = vio_wait_ctrl(sc)) != 0)
- return r;
-
sc->sc_ctrl_cmd->class = VIRTIO_NET_CTRL_MAC;
sc->sc_ctrl_cmd->command = VIRTIO_NET_CTRL_MAC_TABLE_SET;
@@ -1479,6 +1464,7 @@ vio_set_rx_filter(struct vio_softc *sc)
sc->sc_ctrl_mac_tbl_mc->nentries * ETHER_ADDR_LEN, 1);
VIO_DMAMEM_ENQUEUE(sc, vq, slot, sc->sc_ctrl_status,
sizeof(*sc->sc_ctrl_status), 0);
+ sc->sc_ctrl_inuse = 1;
virtio_enqueue_commit(vsc, vq, slot, 1);
if ((r = vio_wait_ctrl_done(sc)) != 0)
@@ -1500,7 +1486,6 @@ vio_set_rx_filter(struct vio_softc *sc)
}
out:
- vio_ctrl_wakeup(sc, FREE);
return r;
}
@@ -1516,6 +1501,7 @@ vio_iff(struct vio_softc *sc)
int promisc = 0, allmulti = 0, rxfilter = 0;
int r;
+ rw_assert_wrlock(&sc->sc_cfg_lock);
splassert(IPL_NET);
ifp->if_flags &= ~IFF_ALLMULTI;
diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
index d12cb2c59b8..8cd53714afc 100644
--- a/sys/dev/pv/virtiovar.h
+++ b/sys/dev/pv/virtiovar.h
@@ -157,6 +157,7 @@ struct virtio_ops {
void (*set_status)(struct virtio_softc *, int);
int (*neg_features)(struct virtio_softc *, const struct virtio_feature_name *);
int (*poll_intr)(void *);
+ void (*intr_barrier)(struct virtio_softc *, int);
};
#define VIRTIO_CHILD_ERROR ((void*)1)
@@ -197,6 +198,7 @@ struct virtio_softc {
#define virtio_setup_queue(sc, i, v) (sc)->sc_ops->setup_queue(sc, i, v)
#define virtio_negotiate_features(sc, n) (sc)->sc_ops->neg_features(sc, n)
#define virtio_poll_intr(sc) (sc)->sc_ops->poll_intr(sc)
+#define virtio_intr_barrier(sc, i) (sc)->sc_ops->intr_barrier(sc, i)
/* only for transport drivers */
#define virtio_set_status(sc, i) (sc)->sc_ops->set_status(sc, i)
Don't release NET_LOCK in vio(4) ioctl