Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: OpenBSD 7.5 kernel panic: backtrace: rt_ifa_del, in6_unlink_ifa, in6_purgeaddr, nd6_expire
To:
Hiltjo Posthuma <hiltjo@codemadness.org>
Cc:
tech@openbsd.org
Date:
Thu, 16 May 2024 21:58:34 +0200

Download raw body.

Thread
On Thu, May 16, 2024 at 09:11:12PM +0200, Hiltjo Posthuma wrote:
> Today I noticed a kernel panic on a server running OpenBSD 7.5 stable on a VPS.
> It only occurred once so far, but maybe this report is useful, because the
> network/lock code is being worked on.
> 
> ddb backtrace and registers:
> https://codemadness.org/tmp/obsd_panic_75/bt.png
> https://codemadness.org/tmp/obsd_panic_75/reg.png

I think this is the bug that Stefan Fritsch is currently fixing.
https://marc.info/?l=openbsd-tech&m=171429251108477&w=2

I guess you are using KVM, there is a race in vio(4).
Try his diff below.

bluhm

diff --git a/sys/dev/fdt/virtio_mmio.c b/sys/dev/fdt/virtio_mmio.c
index 4d92508f72e..b5746e7ab3b 100644
--- dev/fdt/virtio_mmio.c
+++ dev/fdt/virtio_mmio.c
@@ -97,6 +97,7 @@ void		virtio_mmio_write_device_config_4(struct virtio_softc *, int, uint32_t);
 void		virtio_mmio_write_device_config_8(struct virtio_softc *, int, uint64_t);
 uint16_t	virtio_mmio_read_queue_size(struct virtio_softc *, uint16_t);
 void		virtio_mmio_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
+int		virtio_mmio_get_status(struct virtio_softc *);
 void		virtio_mmio_set_status(struct virtio_softc *, int);
 int		virtio_mmio_negotiate_features(struct virtio_softc *,
     const struct virtio_feature_name *);
@@ -144,6 +145,7 @@ struct virtio_ops virtio_mmio_ops = {
 	virtio_mmio_write_device_config_8,
 	virtio_mmio_read_queue_size,
 	virtio_mmio_setup_queue,
+	virtio_mmio_get_status,
 	virtio_mmio_set_status,
 	virtio_mmio_negotiate_features,
 	virtio_mmio_intr,
@@ -194,6 +196,15 @@ virtio_mmio_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
 	}
 }
 
+int
+virtio_mmio_get_status(struct virtio_softc *vsc)
+{
+	struct virtio_mmio_softc *sc = (struct virtio_mmio_softc *)vsc;
+
+	return bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+	    VIRTIO_MMIO_STATUS);
+}
+
 void
 virtio_mmio_set_status(struct virtio_softc *vsc, int status)
 {
diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
index 36a62c13bd1..b60fe740587 100644
--- dev/pci/virtio_pci.c
+++ dev/pci/virtio_pci.c
@@ -72,6 +72,7 @@ void		virtio_pci_write_device_config_4(struct virtio_softc *, int, uint32_t);
 void		virtio_pci_write_device_config_8(struct virtio_softc *, int, uint64_t);
 uint16_t	virtio_pci_read_queue_size(struct virtio_softc *, uint16_t);
 void		virtio_pci_setup_queue(struct virtio_softc *, struct virtqueue *, uint64_t);
+int		virtio_pci_get_status(struct virtio_softc *);
 void		virtio_pci_set_status(struct virtio_softc *, int);
 int		virtio_pci_negotiate_features(struct virtio_softc *, const struct virtio_feature_name *);
 int		virtio_pci_negotiate_features_10(struct virtio_softc *, const struct virtio_feature_name *);
@@ -155,6 +156,7 @@ struct virtio_ops virtio_pci_ops = {
 	virtio_pci_write_device_config_8,
 	virtio_pci_read_queue_size,
 	virtio_pci_setup_queue,
+	virtio_pci_get_status,
 	virtio_pci_set_status,
 	virtio_pci_negotiate_features,
 	virtio_pci_poll_intr,
@@ -275,6 +277,18 @@ virtio_pci_setup_queue(struct virtio_softc *vsc, struct virtqueue *vq,
 	}
 }
 
+int
+virtio_pci_get_status(struct virtio_softc *vsc)
+{
+	struct virtio_pci_softc *sc = (struct virtio_pci_softc *)vsc;
+
+	if (sc->sc_sc.sc_version_1)
+		return CREAD(sc, device_status);
+	else
+		return bus_space_read_1(sc->sc_iot, sc->sc_ioh,
+		    VIRTIO_CONFIG_DEVICE_STATUS);
+}
+
 void
 virtio_pci_set_status(struct virtio_softc *vsc, int status)
 {
diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
index 9ad8e36e346..456e332890a 100644
--- dev/pv/if_vio.c
+++ dev/pv/if_vio.c
@@ -252,6 +252,7 @@ struct vio_softc {
 #define VIRTIO_NET_TX_MAXNSEGS		16 /* for larger chains, defrag */
 #define VIRTIO_NET_CTRL_MAC_MC_ENTRIES	64 /* for more entries, use ALLMULTI */
 #define VIRTIO_NET_CTRL_MAC_UC_ENTRIES	 1 /* one entry for own unicast addr */
+#define VIRTIO_NET_CTRL_TIMEOUT		(5*1000*1000*1000ULL) /* 5 seconds */
 
 #define VIO_CTRL_MAC_INFO_SIZE					\
 	(2*sizeof(struct virtio_net_ctrl_mac_tbl) +		\
@@ -512,6 +513,17 @@ vio_put_lladdr(struct arpcom *ac, struct virtio_softc *vsc)
 	}
 }
 
+static int vio_needs_reset(struct vio_softc *sc)
+{
+	if (virtio_get_status(sc->sc_virtio) &
+	    VIRTIO_CONFIG_DEVICE_STATUS_DEVICE_NEEDS_RESET) {
+		printf("%s: device needs reset", sc->sc_dev.dv_xname);
+		vio_ctrl_wakeup(sc, RESET);
+		return 1;
+	}
+	return 0;
+}
+
 void
 vio_attach(struct device *parent, struct device *self, void *aux)
 {
@@ -649,6 +661,7 @@ vio_config_change(struct virtio_softc *vsc)
 {
 	struct vio_softc *sc = (struct vio_softc *)vsc->sc_child;
 	vio_link_state(&sc->sc_ac.ac_if);
+	vio_needs_reset(sc);
 	return 1;
 }
 
@@ -703,7 +716,7 @@ vio_stop(struct ifnet *ifp, int disable)
 	virtio_reset(vsc);
 	vio_rxeof(sc);
 	if (vsc->sc_nvqs >= 3)
-		vio_ctrleof(&sc->sc_vq[VQCTL]);
+		vio_ctrl_wakeup(sc, RESET);
 	vio_tx_drain(sc);
 	if (disable)
 		vio_rx_drain(sc);
@@ -714,11 +727,8 @@ vio_stop(struct ifnet *ifp, int disable)
 	if (vsc->sc_nvqs >= 3)
 		virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
 	virtio_reinit_end(vsc);
-	if (vsc->sc_nvqs >= 3) {
-		if (sc->sc_ctrl_inuse != FREE)
-			sc->sc_ctrl_inuse = RESET;
-		wakeup(&sc->sc_ctrl_inuse);
-	}
+	if (vsc->sc_nvqs >= 3)
+		vio_ctrl_wakeup(sc, FREE);
 }
 
 static inline uint16_t
@@ -1230,6 +1240,9 @@ vio_txeof(struct virtqueue *vq)
 	int r = 0;
 	int slot, len;
 
+	if (!ISSET(ifp->if_flags, IFF_RUNNING))
+		return 0;
+
 	while (virtio_dequeue(vsc, vq, &slot, &len) == 0) {
 		struct virtio_net_hdr *hdr = &sc->sc_tx_hdrs[slot];
 		r++;
@@ -1363,32 +1376,15 @@ out:
 	return r;
 }
 
-/*
- * XXXSMP As long as some per-ifp ioctl(2)s are executed with the
- * NET_LOCK() deadlocks are possible.  So release it here.
- */
-static inline int
-vio_sleep(struct vio_softc *sc, const char *wmesg)
-{
-	int status = rw_status(&netlock);
-
-	if (status != RW_WRITE && status != RW_READ)
-		return tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO|PCATCH, wmesg,
-		    INFSLP);
-
-	return rwsleep_nsec(&sc->sc_ctrl_inuse, &netlock, PRIBIO|PCATCH, wmesg,
-	    INFSLP);
-}
-
 int
 vio_wait_ctrl(struct vio_softc *sc)
 {
 	int r = 0;
 
 	while (sc->sc_ctrl_inuse != FREE) {
-		r = vio_sleep(sc, "viowait");
-		if (r == EINTR)
-			return r;
+		if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
+			return ENXIO;
+		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viowait", INFSLP);
 	}
 	sc->sc_ctrl_inuse = INUSE;
 
@@ -1400,14 +1396,16 @@ vio_wait_ctrl_done(struct vio_softc *sc)
 {
 	int r = 0;
 
-	while (sc->sc_ctrl_inuse != DONE && sc->sc_ctrl_inuse != RESET) {
-		if (sc->sc_ctrl_inuse == RESET) {
-			r = 1;
-			break;
+	while (sc->sc_ctrl_inuse != DONE) {
+		if (sc->sc_ctrl_inuse == RESET || vio_needs_reset(sc))
+			return ENXIO;
+		r = tsleep_nsec(&sc->sc_ctrl_inuse, PRIBIO, "viodone",
+		    VIRTIO_NET_CTRL_TIMEOUT);
+		if (r == EWOULDBLOCK) {
+			printf("%s: ctrl queue timeout", sc->sc_dev.dv_xname);
+			vio_ctrl_wakeup(sc, RESET);
+			return ENXIO;
 		}
-		r = vio_sleep(sc, "viodone");
-		if (r == EINTR)
-			break;
 	}
 	return r;
 }
diff --git a/sys/dev/pv/virtiovar.h b/sys/dev/pv/virtiovar.h
index d12cb2c59b8..ee9dc877bae 100644
--- dev/pv/virtiovar.h
+++ dev/pv/virtiovar.h
@@ -154,6 +154,7 @@ struct virtio_ops {
 	void		(*write_dev_cfg_8)(struct virtio_softc *, int, uint64_t);
 	uint16_t	(*read_queue_size)(struct virtio_softc *, uint16_t);
 	void		(*setup_queue)(struct virtio_softc *, struct virtqueue *, uint64_t);
+	int		(*get_status)(struct virtio_softc *);
 	void		(*set_status)(struct virtio_softc *, int);
 	int		(*neg_features)(struct virtio_softc *, const struct virtio_feature_name *);
 	int		(*poll_intr)(void *);
@@ -197,9 +198,10 @@ struct virtio_softc {
 #define	virtio_setup_queue(sc, i, v)		(sc)->sc_ops->setup_queue(sc, i, v)
 #define	virtio_negotiate_features(sc, n)	(sc)->sc_ops->neg_features(sc, n)
 #define	virtio_poll_intr(sc)			(sc)->sc_ops->poll_intr(sc)
+#define	virtio_get_status(sc)			(sc)->sc_ops->get_status(sc)
+#define	virtio_set_status(sc, i)		(sc)->sc_ops->set_status(sc, i)
 
 /* only for transport drivers */
-#define	virtio_set_status(sc, i)		(sc)->sc_ops->set_status(sc, i)
 #define	virtio_device_reset(sc)			virtio_set_status((sc), 0)
 
 static inline int