Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
interface multiqueue timout race
To:
tech@openbsd.org
Date:
Fri, 27 Sep 2024 15:11:59 +0200

Download raw body.

Thread
Hi,

From time to time I see strange hangs or crashes in my network test
lab.  Like this one

ddb{0}> trace
db_enter() at db_enter+0x19
intr_handler(ffff80005bfab210,ffff80000009e780) at intr_handler+0x91
Xintr_ioapic_edge16_untramp() at Xintr_ioapic_edge16_untramp+0x18f
Xspllower() at Xspllower+0x1d
pool_multi_alloc(ffffffff827a2a48,2,ffff80005bfab504) at pool_multi_alloc+0xcb
m_pool_alloc(ffffffff827a2a48,2,ffff80005bfab504) at m_pool_alloc+0x4b
pool_p_alloc(ffffffff827a2a48,2,ffff80005bfab504) at pool_p_alloc+0x68
pool_do_get(ffffffff827a2a48,2,ffff80005bfab504) at pool_do_get+0xe5
pool_get(ffffffff827a2a48,2) at pool_get+0xad
m_clget(0,2,802) at m_clget+0x1cf
igc_get_buf(ffff8000004ff8e8,109) at igc_get_buf+0xb8
igc_rxfill(ffff8000004ff8e8) at igc_rxfill+0xad
igc_rxrefill(ffff8000004ff8e8) at igc_rxrefill+0x27
softclock_process_tick_timeout(ffff8000004ff970,1) at softclock_process_tick_timeout+0x103
softclock(0) at softclock+0x11e
softintr_dispatch(0) at softintr_dispatch+0xe6
Xsoftclock() at Xsoftclock+0x27
acpicpu_idle() at acpicpu_idle+0x131
sched_idle(ffffffff8277aff0) at sched_idle+0x298
end trace frame: 0x0, count: -19

igc_rxrefill() may be called from both, timeout or receive interrupt.
As interrupts are per CPU and timeout can be on any CPU there should
be some lock.  Easy fix is to put a mutex around igc_rxrefill().

Note that I have fixed similar problem for em(4) a while ago.  There
splnet() was enough as it is not multi threaded.

I have added receive mutex for bnxt, igc, ix, ixl as I have these
interfaces in my lab.

ok?

bluhm


Index: dev/pci/if_bnxt.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_bnxt.c,v
diff -u -p -r1.51 if_bnxt.c
--- dev/pci/if_bnxt.c	26 Jun 2024 01:40:49 -0000	1.51
+++ dev/pci/if_bnxt.c	26 Sep 2024 13:10:17 -0000
@@ -198,6 +198,7 @@ struct bnxt_rx_queue {
 	int			rx_cons;
 	int			rx_ag_prod;
 	int			rx_ag_cons;
+	struct mutex		rx_mtx;
 	struct timeout		rx_refill;
 };
 
@@ -671,6 +672,7 @@ bnxt_attach(struct device *parent, struc
 
 		rx->rx_softc = sc;
 		rx->rx_ifiq = ifiq;
+		mtx_init(&rx->rx_mtx, IPL_NET);
 		timeout_set(&rx->rx_refill, bnxt_refill, bq);
 		ifiq->ifiq_softc = rx;
 
@@ -1642,6 +1644,7 @@ bnxt_intr(void *xq)
 	    (cpr->commit_cons+1) % cpr->ring.ring_size, 1);
 
 	if (rxfree != 0) {
+		mtx_enter(&rx->rx_mtx);
 		rx->rx_cons += rxfree;
 		if (rx->rx_cons >= rx->rx_ring.ring_size)
 			rx->rx_cons -= rx->rx_ring.ring_size;
@@ -1662,6 +1665,7 @@ bnxt_intr(void *xq)
 		if ((rx->rx_cons == rx->rx_prod) ||
 		    (rx->rx_ag_cons == rx->rx_ag_prod))
 			timeout_add(&rx->rx_refill, 0);
+		mtx_leave(&rx->rx_mtx);
 	}
 	if (txfree != 0) {
 		if (ifq_is_oactive(tx->tx_ifq))
@@ -2253,10 +2257,11 @@ bnxt_refill(void *xq)
 	struct bnxt_queue *q = xq;
 	struct bnxt_rx_queue *rx = &q->q_rx;
 
+	mtx_enter(&rx->rx_mtx);
 	bnxt_rx_fill(q);
-
 	if (rx->rx_cons == rx->rx_prod)
 		timeout_add(&rx->rx_refill, 1);
+	mtx_leave(&rx->rx_mtx);
 }
 
 int
Index: dev/pci/if_igc.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_igc.c,v
diff -u -p -r1.27 if_igc.c
--- dev/pci/if_igc.c	11 Aug 2024 01:02:10 -0000	1.27
+++ dev/pci/if_igc.c	26 Sep 2024 12:59:43 -0000
@@ -428,6 +428,7 @@ igc_allocate_queues(struct igc_softc *sc
 		rxr = &sc->rx_rings[i];
 		rxr->sc = sc;
 		rxr->me = i;
+		mtx_init(&rxr->rx_mtx, IPL_NET);
 		timeout_set(&rxr->rx_refill, igc_rxrefill, rxr);
 
 		if (igc_dma_malloc(sc, rsize, &rxr->rxdma)) {
@@ -1281,12 +1282,13 @@ igc_rxrefill(void *xrxr)
 	struct igc_rxring *rxr = xrxr;
 	struct igc_softc *sc = rxr->sc;
 
+	mtx_enter(&rxr->rx_mtx);
 	if (igc_rxfill(rxr)) {
 		IGC_WRITE_REG(&sc->hw, IGC_RDT(rxr->me),
 		    (rxr->last_desc_filled + 1) % sc->num_rx_desc);
-	}
-	else if (if_rxr_inuse(&rxr->rx_ring) == 0)
+	} else if (if_rxr_inuse(&rxr->rx_ring) == 0)
 		timeout_add(&rxr->rx_refill, 1);
+	mtx_leave(&rxr->rx_mtx);
 }
 
 /*********************************************************************
Index: dev/pci/if_igc.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_igc.h,v
diff -u -p -r1.4 if_igc.h
--- dev/pci/if_igc.h	21 May 2024 11:19:39 -0000	1.4
+++ dev/pci/if_igc.h	26 Sep 2024 12:52:04 -0000
@@ -282,6 +282,7 @@ struct igc_rxring {
 	struct igc_dma_alloc	rxdma;
 	uint32_t		last_desc_filled;
 	uint32_t		next_to_check;
+	struct mutex		rx_mtx;
 	struct timeout		rx_refill;
 	struct if_rxring	rx_ring;
 };
Index: dev/pci/if_ix.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
diff -u -p -r1.217 if_ix.c
--- dev/pci/if_ix.c	4 Sep 2024 07:54:52 -0000	1.217
+++ dev/pci/if_ix.c	26 Sep 2024 13:11:19 -0000
@@ -2200,6 +2200,7 @@ ixgbe_allocate_queues(struct ix_softc *s
 		/* Set up some basics */
 		rxr->sc = sc;
 		rxr->me = i;
+		mtx_init(&rxr->rx_mtx, IPL_NET);
 		timeout_set(&rxr->rx_refill, ixgbe_rxrefill, rxr);
 
 		if (ixgbe_dma_malloc(sc, rsize,
@@ -2842,12 +2843,14 @@ ixgbe_rxrefill(void *xrxr)
 	struct ix_rxring *rxr = xrxr;
 	struct ix_softc *sc = rxr->sc;
 
+	mtx_enter(&rxr->rx_mtx);
 	if (ixgbe_rxfill(rxr)) {
 		/* Advance the Rx Queue "Tail Pointer" */
 		IXGBE_WRITE_REG(&sc->hw, IXGBE_RDT(rxr->me),
 		    rxr->last_desc_filled);
 	} else if (if_rxr_inuse(&rxr->rx_ring) == 0)
 		timeout_add(&rxr->rx_refill, 1);
+	mtx_leave(&rxr->rx_mtx);
 
 }
 
Index: dev/pci/if_ix.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.h,v
diff -u -p -r1.47 if_ix.h
--- dev/pci/if_ix.h	21 May 2024 11:19:39 -0000	1.47
+++ dev/pci/if_ix.h	26 Sep 2024 13:11:57 -0000
@@ -205,6 +205,7 @@ struct ix_rxring {
 	uint			next_to_refresh;
 	uint			next_to_check;
 	uint			last_desc_filled;
+	struct mutex		rx_mtx;
 	struct timeout		rx_refill;
 	struct if_rxring	rx_ring;
 	struct ixgbe_rx_buf	*rx_buffers;
Index: dev/pci/if_ixl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ixl.c,v
diff -u -p -r1.101 if_ixl.c
--- dev/pci/if_ixl.c	24 May 2024 06:02:53 -0000	1.101
+++ dev/pci/if_ixl.c	26 Sep 2024 13:28:19 -0000
@@ -1216,6 +1216,7 @@ struct ixl_rx_ring {
 	struct ifiqueue		*rxr_ifiq;
 
 	struct if_rxring	 rxr_acct;
+	struct mutex		 rxr_mtx;
 	struct timeout		 rxr_refill;
 
 	unsigned int		 rxr_prod;
@@ -1408,7 +1409,7 @@ static void	ixl_rxr_unconfig(struct ixl_
 static void	ixl_rxr_clean(struct ixl_softc *, struct ixl_rx_ring *);
 static void	ixl_rxr_free(struct ixl_softc *, struct ixl_rx_ring *);
 static int	ixl_rxeof(struct ixl_softc *, struct ixl_rx_ring *);
-static void	ixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *);
+static int	ixl_rxfill(struct ixl_softc *, struct ixl_rx_ring *);
 static void	ixl_rxrefill(void *);
 static int	ixl_rxrinfo(struct ixl_softc *, struct if_rxrinfo *);
 static void	ixl_rx_checksum(struct mbuf *, uint64_t);
@@ -2323,6 +2324,7 @@ ixl_up(struct ixl_softc *sc)
 
 		ixl_wr(sc, rxr->rxr_tail, 0);
 		ixl_rxfill(sc, rxr);
+		ixl_wr(sc, rxr->rxr_tail, rxr->rxr_prod);
 
 		reg = ixl_rd(sc, I40E_QRX_ENA(i));
 		SET(reg, I40E_QRX_ENA_QENA_REQ_MASK);
@@ -3084,6 +3086,7 @@ ixl_rxr_alloc(struct ixl_softc *sc, unsi
 
 	rxr->rxr_sc = sc;
 	if_rxr_init(&rxr->rxr_acct, 17, sc->sc_rx_ring_ndescs - 1);
+	mtx_init(&rxr->rxr_mtx, IPL_NET);
 	timeout_set(&rxr->rxr_refill, ixl_rxrefill, rxr);
 	rxr->rxr_cons = rxr->rxr_prod = 0;
 	rxr->rxr_m_head = NULL;
@@ -3336,7 +3339,7 @@ ixl_rxeof(struct ixl_softc *sc, struct i
 		rxr->rxr_cons = cons;
 		if (ifiq_input(ifiq, &ml))
 			if_rxr_livelocked(&rxr->rxr_acct);
-		ixl_rxfill(sc, rxr);
+		ixl_rxrefill(rxr);
 	}
 
 	bus_dmamap_sync(sc->sc_dmat, IXL_DMA_MAP(&rxr->rxr_mem),
@@ -3346,7 +3349,7 @@ ixl_rxeof(struct ixl_softc *sc, struct i
 	return (done);
 }
 
-static void
+static int
 ixl_rxfill(struct ixl_softc *sc, struct ixl_rx_ring *rxr)
 {
 	struct ixl_rx_rd_desc_16 *ring, *rxd;
@@ -3360,7 +3363,7 @@ ixl_rxfill(struct ixl_softc *sc, struct 
 
 	slots = if_rxr_get(&rxr->rxr_acct, sc->sc_rx_ring_ndescs);
 	if (slots == 0)
-		return;
+		return (0);
 
 	prod = rxr->rxr_prod;
 
@@ -3400,14 +3403,10 @@ ixl_rxfill(struct ixl_softc *sc, struct 
 		post = 1;
 	} while (--slots);
 
+	rxr->rxr_prod = prod;
 	if_rxr_put(&rxr->rxr_acct, slots);
 
-	if (if_rxr_inuse(&rxr->rxr_acct) == 0)
-		timeout_add(&rxr->rxr_refill, 1);
-	else if (post) {
-		rxr->rxr_prod = prod;
-		ixl_wr(sc, rxr->rxr_tail, prod);
-	}
+	return (post);
 }
 
 void
@@ -3416,7 +3415,12 @@ ixl_rxrefill(void *arg)
 	struct ixl_rx_ring *rxr = arg;
 	struct ixl_softc *sc = rxr->rxr_sc;
 
-	ixl_rxfill(sc, rxr);
+	mtx_enter(&rxr->rxr_mtx);
+	if (ixl_rxfill(sc, rxr)) {
+		ixl_wr(sc, rxr->rxr_tail, rxr->rxr_prod);
+	} else if (if_rxr_inuse(&rxr->rxr_acct) == 0)
+		timeout_add(&rxr->rxr_refill, 1);
+	mtx_leave(&rxr->rxr_mtx);
 }
 
 static int