Download raw body.
bge/bnx/iavf/igc/ix/ixl/ngbe/pcn: ifq_restart() fix
A bug has been fixed by yasuaok@ in vmx(4) where the driver was
calling ifq_restart() without actually having made any space on
a full Tx ring. Calling ifq_restart() in this case can lead to
a condition where the interface gets stuck in OACTIVE until the
interface is reset with ifconfig.
I could trigger the same bug on ice(4) with iperf as follows:
for i in `seq 5`; do (iperf -l0 -t 0 -c 2001:db8::1 -u &) ; done
This needs another machine which runs iperf -u -s (and -V if using IPv6).
I have checked all ethernet drivers in dev/pci which call ifq_restart().
A few already avoid calling ifq_restart() unnecessarily.
The same bug affects bge, bnx, iavf, igc, ix, ixl, ngbe, and pcn.
The bad pattern looks like:
while (cons != prod) {
descriptor = &ring[cons];
if (descriptor is not done)
break;
free descriptor;
}
if (ifq_is_oactive(ifq))
ifq_restart(ifq);
If we break out of the loop without freeing any descriptor, we will
call ifq_restart() without having made any space on the ring.
For example, this can happen when drivers invoke both Tx and Rx interrupt
handlers during the same MSIX interrupt cause, An Rx interrupt can then
lead to processing of a Tx ring which has made no progress.
Fixed code looks like:
done = 0;
while (cons != prod) {
desc = &ring[cons];
if (desc is not done)
break;
free descriptor;
done = 1;
}
if (done && ifq_is_oactive(ifq))
ifq_restart(ifq);
I am looking for tests and OKs.
I myself don't have any hardware for these drivers to test with.
M sys/dev/pci/if_bge.c | 4+ 3-
M sys/dev/pci/if_bnx.c | 1+ 1-
M sys/dev/pci/if_iavf.c | 1+ 1-
M sys/dev/pci/if_igc.c | 1+ 1-
M sys/dev/pci/if_ix.c | 3+ 1-
M sys/dev/pci/if_ixl.c | 1+ 1-
M sys/dev/pci/if_ngbe.c | 3+ 1-
M sys/dev/pci/if_pcn.c | 3+ 1-
8 files changed, 17 insertions(+), 10 deletions(-)
commit - a89c75a2984a37717d6fffa3850bd269e697c3c7
commit + b6ed26d3094a0bd6d44e939b604136db06b1c0db
blob - 5b022a8bf67db05f03aecfd699cc8fc1701191e4
blob + c3112a656819b5b109ca032d610ff24f9fe743d6
--- sys/dev/pci/if_bge.c
+++ sys/dev/pci/if_bge.c
@@ -3739,9 +3739,10 @@ bge_txeof(struct bge_softc *sc)
sc->bge_tx_saved_considx = cons;
- if (ifq_is_oactive(&ifp->if_snd))
- ifq_restart(&ifp->if_snd);
- else if (txcnt == 0)
+ if (ifq_is_oactive(&ifp->if_snd)) {
+ if (freed)
+ ifq_restart(&ifp->if_snd);
+ } else if (txcnt == 0)
ifp->if_timer = 0;
}
blob - 6d18e3a22795dede966a95c3f81d74f71fd20b12
blob + 79382274aee75c3d51f54d747f6eda4ae84f0d99
--- sys/dev/pci/if_bnx.c
+++ sys/dev/pci/if_bnx.c
@@ -4579,7 +4579,7 @@ bnx_tx_intr(struct bnx_softc *sc)
if (used == 0)
ifp->if_timer = 0;
- if (ifq_is_oactive(&ifp->if_snd))
+ if (freed && ifq_is_oactive(&ifp->if_snd))
ifq_restart(&ifp->if_snd);
}
blob - bdfeefe85158bea04895190c8e151bd17b9cb6a4
blob + 9ae28af22a0fdbc6fb47db73f7df8db4d1fa400a
--- sys/dev/pci/if_iavf.c
+++ sys/dev/pci/if_iavf.c
@@ -2178,7 +2178,7 @@ iavf_txeof(struct iavf_softc *sc, struct ifqueue *ifq)
//ixl_enable(sc, txr->txr_msix);
- if (ifq_is_oactive(ifq))
+ if (done && ifq_is_oactive(ifq))
ifq_restart(ifq);
return (done);
blob - 6f9aaed06ed82859719b420c8198e9f5042fe823
blob + 69800e703841c3635185849dabe61aa430d5f32a
--- sys/dev/pci/if_igc.c
+++ sys/dev/pci/if_igc.c
@@ -1119,7 +1119,7 @@ igc_txeof(struct igc_txring *txr)
txr->next_to_clean = cons;
- if (ifq_is_oactive(ifq))
+ if (done && ifq_is_oactive(ifq))
ifq_restart(ifq);
return (done);
blob - eece3848ec6deeaf57a0b3da4412119979c5a74f
blob + e35807fa84586257960ff739743e4c1a05ed65de
--- sys/dev/pci/if_ix.c
+++ sys/dev/pci/if_ix.c
@@ -2641,6 +2641,7 @@ ixgbe_txeof(struct ix_txring *txr)
unsigned int head, tail, last;
struct ixgbe_tx_buf *tx_buffer;
struct ixgbe_legacy_tx_desc *tx_desc;
+ int done = 0;
if (!ISSET(ifp->if_flags, IFF_RUNNING))
return FALSE;
@@ -2673,6 +2674,7 @@ ixgbe_txeof(struct ix_txring *txr)
tx_buffer->m_head = NULL;
tx_buffer->eop_index = -1;
+ done = 1;
tail = last + 1;
if (tail == sc->num_tx_desc)
tail = 0;
@@ -2691,7 +2693,7 @@ ixgbe_txeof(struct ix_txring *txr)
txr->next_to_clean = tail;
- if (ifq_is_oactive(ifq))
+ if (done && ifq_is_oactive(ifq))
ifq_restart(ifq);
return TRUE;
blob - c0c958c59581caac0e1fca5261fd1956e152f193
blob + e338ee24ecda59a9155a02fabaafc7f9f79b2173
--- sys/dev/pci/if_ixl.c
+++ sys/dev/pci/if_ixl.c
@@ -3011,7 +3011,7 @@ ixl_txeof(struct ixl_softc *sc, struct ixl_tx_ring *tx
//ixl_enable(sc, txr->txr_msix);
- if (ifq_is_oactive(ifq))
+ if (done && ifq_is_oactive(ifq))
ifq_restart(ifq);
return (done);
blob - 7c7a6eb24b69070c80ee4feff9847168188a30fb
blob + 3e5e939ddba83973bf8df4893206ddd5aad5f1e7
--- sys/dev/pci/if_ngbe.c
+++ sys/dev/pci/if_ngbe.c
@@ -4461,6 +4461,7 @@ ngbe_txeof(struct tx_ring *txr)
struct ngbe_tx_buf *tx_buffer;
union ngbe_tx_desc *tx_desc;
unsigned int prod, cons, last;
+ int done = 0;
if (!ISSET(ifp->if_flags, IFF_RUNNING))
return;
@@ -4486,6 +4487,7 @@ ngbe_txeof(struct tx_ring *txr)
0, tx_buffer->map->dm_mapsize, BUS_DMASYNC_POSTWRITE);
bus_dmamap_unload(txr->txdma.dma_tag, tx_buffer->map);
m_freem(tx_buffer->m_head);
+ done = 1;
tx_buffer->m_head = NULL;
tx_buffer->eop_index = -1;
@@ -4505,7 +4507,7 @@ ngbe_txeof(struct tx_ring *txr)
txr->next_to_clean = cons;
- if (ifq_is_oactive(ifq))
+ if (done && ifq_is_oactive(ifq))
ifq_restart(ifq);
}
blob - 3d93a269ba97b398505e353329db00db6cf0e937
blob + 97780dba62a5b32a6f7eacccc003f9e0097ab5da
--- sys/dev/pci/if_pcn.c
+++ sys/dev/pci/if_pcn.c
@@ -1143,6 +1143,7 @@ pcn_txintr(struct pcn_softc *sc)
struct pcn_txsoft *txs;
uint32_t tmd1, tmd2, tmd;
int i, j;
+ int done = 0;
/*
* Go through our Tx list and free mbufs for those
@@ -1219,6 +1220,7 @@ pcn_txintr(struct pcn_softc *sc)
bus_dmamap_unload(sc->sc_dmat, txs->txs_dmamap);
m_freem(txs->txs_mbuf);
txs->txs_mbuf = NULL;
+ done = 1;
}
/* Update the dirty transmit buffer pointer. */
@@ -1231,7 +1233,7 @@ pcn_txintr(struct pcn_softc *sc)
if (sc->sc_txsfree == PCN_TXQUEUELEN)
ifp->if_timer = 0;
- if (ifq_is_oactive(&ifp->if_snd))
+ if (done && ifq_is_oactive(&ifp->if_snd))
ifq_restart(&ifp->if_snd);
}
bge/bnx/iavf/igc/ix/ixl/ngbe/pcn: ifq_restart() fix