From: Dmitry Bogdan Subject: Re: bge/bnx/iavf/igc/ix/ixl/ngbe/pcn: ifq_restart() fix To: tech@openbsd.org Date: Thu, 26 Jun 2025 22:57:41 +0300 Hi Stephan and tech@ I have such a great NIC as AMD Lance FastEthernet (and another one, much more ancient 10mbit with RJ-45 and AUI interface). > pcn0 at pci4 dev 6 function 0 "AMD 79c970 PCnet-PCI" rev 0x25, Am79c971, rev 5: apic 24 int 21, address 00:60:b0:eb:73:c5 > lxtphy0 at pcn0 phy 1: LXT970, rev. 0 > ukphy0 at pcn0 phy 31: Generic IEEE 802.3u media interface, rev. 1: OUI 0x00001a, model 0x0001 I can not manage the interface to get stuck forever. Yes, when I run a thing such as > iperf -u -l0 -t0 -c ${serveraddr} I see a tons of qdrops in kstat: >pcn0:0:txq:0 > packets: 36982186 packets > bytes: 1675836505 bytes > qdrops: 48256453 packets > errors: 0 packets > qlen: 0 packets > maxqlen: 511 packets > oactive: false > oactives: 4607637 and oactivee is set to true. Also I can see ping drops (no buffer space avail) and delays. But when I stop the iperf client everything is going just fine. oactive in kstat becomes false again. Pings are ok, browser works fine. Tested with 7.7 syspatch from 04 may AND with -current kernel + old 7.7 user space + you modifications with done variable in pcn_txintr func. Behavior is the same in both cases. If one is interested in more deep investigations, drop me a line privately. On Fri, Jun 20, 2025 at 1:14 PM Stefan Sperling wrote: > A bug has been fixed by yasuaok@ in vmx(4) where the driver was > calling ifq_restart() without actually having made any space on > a full Tx ring. Calling ifq_restart() in this case can lead to > a condition where the interface gets stuck in OACTIVE until the > interface is reset with ifconfig. > > I could trigger the same bug on ice(4) with iperf as follows: > for i in `seq 5`; do (iperf -l0 -t 0 -c 2001:db8::1 -u &) ; done > This needs another machine which runs iperf -u -s (and -V if using IPv6). > > I have checked all ethernet drivers in dev/pci which call ifq_restart(). > A few already avoid calling ifq_restart() unnecessarily. > The same bug affects bge, bnx, iavf, igc, ix, ixl, ngbe, and pcn. > > The bad pattern looks like: > > while (cons != prod) { > descriptor = &ring[cons]; > > if (descriptor is not done) > break; > > free descriptor; > } > > if (ifq_is_oactive(ifq)) > ifq_restart(ifq); > > If we break out of the loop without freeing any descriptor, we will > call ifq_restart() without having made any space on the ring. > For example, this can happen when drivers invoke both Tx and Rx interrupt > handlers during the same MSIX interrupt cause, An Rx interrupt can then > lead to processing of a Tx ring which has made no progress. > > Fixed code looks like: > > done = 0; > > while (cons != prod) { > desc = &ring[cons]; > > if (desc is not done) > break; > > free descriptor; > done = 1; > } > > if (done && ifq_is_oactive(ifq)) > ifq_restart(ifq); > > > I am looking for tests and OKs. > I myself don't have any hardware for these drivers to test with. > > > M sys/dev/pci/if_bge.c | 4+ 3- > M sys/dev/pci/if_bnx.c | 1+ 1- > M sys/dev/pci/if_iavf.c | 1+ 1- > M sys/dev/pci/if_igc.c | 1+ 1- > M sys/dev/pci/if_ix.c | 3+ 1- > M sys/dev/pci/if_ixl.c | 1+ 1- > M sys/dev/pci/if_ngbe.c | 3+ 1- > M sys/dev/pci/if_pcn.c | 3+ 1- > > 8 files changed, 17 insertions(+), 10 deletions(-) > > commit - a89c75a2984a37717d6fffa3850bd269e697c3c7 > commit + b6ed26d3094a0bd6d44e939b604136db06b1c0db > blob - 5b022a8bf67db05f03aecfd699cc8fc1701191e4 > blob + c3112a656819b5b109ca032d610ff24f9fe743d6 > --- sys/dev/pci/if_bge.c > +++ sys/dev/pci/if_bge.c > @@ -3739,9 +3739,10 @@ bge_txeof(struct bge_softc *sc) > > sc->bge_tx_saved_considx = cons; > > - if (ifq_is_oactive(&ifp->if_snd)) > - ifq_restart(&ifp->if_snd); > - else if (txcnt == 0) > + if (ifq_is_oactive(&ifp->if_snd)) { > + if (freed) > + ifq_restart(&ifp->if_snd); > + } else if (txcnt == 0) > ifp->if_timer = 0; > } > > blob - 6d18e3a22795dede966a95c3f81d74f71fd20b12 > blob + 79382274aee75c3d51f54d747f6eda4ae84f0d99 > --- sys/dev/pci/if_bnx.c > +++ sys/dev/pci/if_bnx.c > @@ -4579,7 +4579,7 @@ bnx_tx_intr(struct bnx_softc *sc) > if (used == 0) > ifp->if_timer = 0; > > - if (ifq_is_oactive(&ifp->if_snd)) > + if (freed && ifq_is_oactive(&ifp->if_snd)) > ifq_restart(&ifp->if_snd); > } > > blob - bdfeefe85158bea04895190c8e151bd17b9cb6a4 > blob + 9ae28af22a0fdbc6fb47db73f7df8db4d1fa400a > --- sys/dev/pci/if_iavf.c > +++ sys/dev/pci/if_iavf.c > @@ -2178,7 +2178,7 @@ iavf_txeof(struct iavf_softc *sc, struct ifqueue > *ifq) > > //ixl_enable(sc, txr->txr_msix); > > - if (ifq_is_oactive(ifq)) > + if (done && ifq_is_oactive(ifq)) > ifq_restart(ifq); > > return (done); > blob - 6f9aaed06ed82859719b420c8198e9f5042fe823 > blob + 69800e703841c3635185849dabe61aa430d5f32a > --- sys/dev/pci/if_igc.c > +++ sys/dev/pci/if_igc.c > @@ -1119,7 +1119,7 @@ igc_txeof(struct igc_txring *txr) > > txr->next_to_clean = cons; > > - if (ifq_is_oactive(ifq)) > + if (done && ifq_is_oactive(ifq)) > ifq_restart(ifq); > > return (done); > blob - eece3848ec6deeaf57a0b3da4412119979c5a74f > blob + e35807fa84586257960ff739743e4c1a05ed65de > --- sys/dev/pci/if_ix.c > +++ sys/dev/pci/if_ix.c > @@ -2641,6 +2641,7 @@ ixgbe_txeof(struct ix_txring *txr) > unsigned int head, tail, last; > struct ixgbe_tx_buf *tx_buffer; > struct ixgbe_legacy_tx_desc *tx_desc; > + int done = 0; > > if (!ISSET(ifp->if_flags, IFF_RUNNING)) > return FALSE; > @@ -2673,6 +2674,7 @@ ixgbe_txeof(struct ix_txring *txr) > tx_buffer->m_head = NULL; > tx_buffer->eop_index = -1; > > + done = 1; > tail = last + 1; > if (tail == sc->num_tx_desc) > tail = 0; > @@ -2691,7 +2693,7 @@ ixgbe_txeof(struct ix_txring *txr) > > txr->next_to_clean = tail; > > - if (ifq_is_oactive(ifq)) > + if (done && ifq_is_oactive(ifq)) > ifq_restart(ifq); > > return TRUE; > blob - c0c958c59581caac0e1fca5261fd1956e152f193 > blob + e338ee24ecda59a9155a02fabaafc7f9f79b2173 > --- sys/dev/pci/if_ixl.c > +++ sys/dev/pci/if_ixl.c > @@ -3011,7 +3011,7 @@ ixl_txeof(struct ixl_softc *sc, struct ixl_tx_ring > *tx > > //ixl_enable(sc, txr->txr_msix); > > - if (ifq_is_oactive(ifq)) > + if (done && ifq_is_oactive(ifq)) > ifq_restart(ifq); > > return (done); > blob - 7c7a6eb24b69070c80ee4feff9847168188a30fb > blob + 3e5e939ddba83973bf8df4893206ddd5aad5f1e7 > --- sys/dev/pci/if_ngbe.c > +++ sys/dev/pci/if_ngbe.c > @@ -4461,6 +4461,7 @@ ngbe_txeof(struct tx_ring *txr) > struct ngbe_tx_buf *tx_buffer; > union ngbe_tx_desc *tx_desc; > unsigned int prod, cons, last; > + int done = 0; > > if (!ISSET(ifp->if_flags, IFF_RUNNING)) > return; > @@ -4486,6 +4487,7 @@ ngbe_txeof(struct tx_ring *txr) > 0, tx_buffer->map->dm_mapsize, BUS_DMASYNC_POSTWRITE); > bus_dmamap_unload(txr->txdma.dma_tag, tx_buffer->map); > m_freem(tx_buffer->m_head); > + done = 1; > > tx_buffer->m_head = NULL; > tx_buffer->eop_index = -1; > @@ -4505,7 +4507,7 @@ ngbe_txeof(struct tx_ring *txr) > > txr->next_to_clean = cons; > > - if (ifq_is_oactive(ifq)) > + if (done && ifq_is_oactive(ifq)) > ifq_restart(ifq); > } > > blob - 3d93a269ba97b398505e353329db00db6cf0e937 > blob + 97780dba62a5b32a6f7eacccc003f9e0097ab5da > --- sys/dev/pci/if_pcn.c > +++ sys/dev/pci/if_pcn.c > @@ -1143,6 +1143,7 @@ pcn_txintr(struct pcn_softc *sc) > struct pcn_txsoft *txs; > uint32_t tmd1, tmd2, tmd; > int i, j; > + int done = 0; > > /* > * Go through our Tx list and free mbufs for those > @@ -1219,6 +1220,7 @@ pcn_txintr(struct pcn_softc *sc) > bus_dmamap_unload(sc->sc_dmat, txs->txs_dmamap); > m_freem(txs->txs_mbuf); > txs->txs_mbuf = NULL; > + done = 1; > } > > /* Update the dirty transmit buffer pointer. */ > @@ -1231,7 +1233,7 @@ pcn_txintr(struct pcn_softc *sc) > if (sc->sc_txsfree == PCN_TXQUEUELEN) > ifp->if_timer = 0; > > - if (ifq_is_oactive(&ifp->if_snd)) > + if (done && ifq_is_oactive(&ifp->if_snd)) > ifq_restart(&ifp->if_snd); > } > > >