Download raw body.
make qwx(4) flush Rx rings when the interface goes down
On Thu Apr 24, 2025 at 08:26:13PM +0200, Stefan Sperling wrote:
> On Thu, Apr 24, 2025 at 11:38:37AM +0200, Stefan Sperling wrote:
> > There is a known issue where qwx(4) triggers a pool corruption check:
> >
> > panic: pool_cache_item_magic_check: mcl2k cpu free list modified
> >
> > As an attempt to fix this, the patch below makes the driver discard
> > pending packets on all Rx rings.
> >
> > My theory is that the hardware is doing a DMA write to an mbuf which is
> > on one of these rings. While the interface goes down we are freeing mbufs
> > but we are not updating the ring state from the device's point of view
> > before doing so.
> > The panic happens later when qwx or another network driver attempts to
> > allocate a new mbuf from the 2K cluster pool.
> >
> > The issue has been observed during suspend/resume, though if my theory
> > is correct it might just as well happen during ifconfig down/up.
> >
> > Also, port over qwx_dp_mon_link_free() from ath11k. I noticed that this
> > was missing while searching the Linux driver code for clues.
>
> Theo pointed out that disabling interrupts won't necessarily stop DMA.
>
> So here's a new version which flushes the rings after the STOP command
> has been sent to (and acknowledged by) firmware. This will hopefully
> have the desired effect of stopping DMA.
>
> I've also renamed the "reap" function to "flush", to avoid an implication
> that rings were being completely destroyed/freed.
Thanks Stefan. I tested your previous and the diff below with
qwx0 at pci2 dev 0 function 0 "Qualcomm QCNFA765" rev 0x01: msi
No issues so far.
>
> M sys/dev/ic/qwx.c | 112+ 21-
>
> 1 file changed, 112 insertions(+), 21 deletions(-)
>
> commit - 31338944480d0345b0ff1635ba18a96228bd37ce
> commit + 733c622d5b43cf595e1a2ddfe07326238879463a
> blob - fdafd249b8801186ec0c12b2d0acb26131dcd963
> blob + fcb17633d81d7f0734e5f38ac655d446d0659a33
> --- sys/dev/ic/qwx.c
> +++ sys/dev/ic/qwx.c
> @@ -15454,6 +15454,16 @@ config_refill_ring:
> }
>
> void
> +qwx_dp_mon_link_free(struct qwx_softc *sc)
> +{
> + struct qwx_pdev_dp *dp = &sc->pdev_dp;
> + struct qwx_mon_data *pmon = &dp->mon_data;
> +
> + qwx_dp_link_desc_cleanup(sc, pmon->link_desc_banks,
> + HAL_RXDMA_MONITOR_DESC, &dp->rxdma_mon_desc_ring);
> +}
> +
> +void
> qwx_dp_pdev_free(struct qwx_softc *sc)
> {
> int i;
> @@ -15462,6 +15472,8 @@ qwx_dp_pdev_free(struct qwx_softc *sc)
>
> for (i = 0; i < sc->num_radios; i++)
> qwx_dp_rx_pdev_free(sc, i);
> +
> + qwx_dp_mon_link_free(sc);
> }
>
> int
> @@ -16058,7 +16070,7 @@ qwx_dp_process_rx_err_buf(struct qwx_softc *sc, uint32
> }
>
> int
> -qwx_dp_process_rx_err(struct qwx_softc *sc)
> +qwx_dp_process_rx_err(struct qwx_softc *sc, int purge)
> {
> struct ieee80211com *ic = &sc->sc_ic;
> struct ifnet *ifp = &ic->ic_if;
> @@ -16077,7 +16089,7 @@ qwx_dp_process_rx_err(struct qwx_softc *sc)
> uint64_t paddr;
> uint32_t *desc;
> int is_frag;
> - uint8_t drop = 0;
> + uint8_t drop = purge ? 1 : 0;
>
> tot_n_bufs_reaped = 0;
>
> @@ -16149,14 +16161,17 @@ qwx_dp_process_rx_err(struct qwx_softc *sc)
> #ifdef notyet
> spin_unlock_bh(&srng->lock);
> #endif
> - for (i = 0; i < sc->num_radios; i++) {
> - if (!n_bufs_reaped[i])
> - continue;
> + if (!purge) {
> + for (i = 0; i < sc->num_radios; i++) {
> + if (!n_bufs_reaped[i])
> + continue;
>
> - rx_ring = &sc->pdev_dp.rx_refill_buf_ring;
> + rx_ring = &sc->pdev_dp.rx_refill_buf_ring;
>
> - qwx_dp_rxbufs_replenish(sc, i, rx_ring, n_bufs_reaped[i],
> - sc->hw_params.hal_params->rx_buf_rbm);
> + qwx_dp_rxbufs_replenish(sc, i, rx_ring,
> + n_bufs_reaped[i],
> + sc->hw_params.hal_params->rx_buf_rbm);
> + }
> }
>
> ifp->if_ierrors += tot_n_bufs_reaped;
> @@ -16306,7 +16321,7 @@ qwx_dp_rx_wbm_err(struct qwx_softc *sc, struct qwx_rx_
> }
>
> int
> -qwx_dp_rx_process_wbm_err(struct qwx_softc *sc)
> +qwx_dp_rx_process_wbm_err(struct qwx_softc *sc, int purge)
> {
> struct ieee80211com *ic = &sc->sc_ic;
> struct ifnet *ifp = &ic->ic_if;
> @@ -16382,6 +16397,18 @@ qwx_dp_rx_process_wbm_err(struct qwx_softc *sc)
> if (!total_num_buffs_reaped)
> goto done;
>
> + if (purge) {
> + for (i = 0; i < sc->num_radios; i++) {
> + while ((msdu = TAILQ_FIRST(msdu_list))) {
> + TAILQ_REMOVE(msdu_list, msdu, entry);
> + m_freem(msdu->m);
> + msdu->m = NULL;
> + }
> + }
> +
> + goto done;
> + }
> +
> for (i = 0; i < sc->num_radios; i++) {
> if (!num_buffs_reaped[i])
> continue;
> @@ -16894,7 +16921,7 @@ qwx_dp_rx_process_received_packets(struct qwx_softc *s
> }
>
> int
> -qwx_dp_process_rx(struct qwx_softc *sc, int ring_id)
> +qwx_dp_process_rx(struct qwx_softc *sc, int ring_id, int purge)
> {
> struct qwx_dp *dp = &sc->dp;
> struct qwx_pdev_dp *pdev_dp = &sc->pdev_dp;
> @@ -17006,6 +17033,16 @@ try_again:
> if (!num_buffs_reaped[i])
> continue;
>
> + if (purge) {
> + while ((msdu = TAILQ_FIRST(&msdu_list[i]))) {
> + TAILQ_REMOVE(msdu_list, msdu, entry);
> + m_freem(msdu->m);
> + msdu->m = NULL;
> + }
> +
> + continue;
> + }
> +
> qwx_dp_rx_process_received_packets(sc, &msdu_list[i], i);
>
> rx_ring = &sc->pdev_dp.rx_refill_buf_ring;
> @@ -17073,7 +17110,7 @@ fail_free_mbuf:
>
> int
> qwx_dp_rx_reap_mon_status_ring(struct qwx_softc *sc, int mac_id,
> - struct mbuf_list *ml)
> + struct mbuf_list *ml, int purge)
> {
> const struct ath11k_hw_hal_params *hal_params;
> struct qwx_pdev_dp *dp;
> @@ -17154,6 +17191,15 @@ qwx_dp_rx_reap_mon_status_ring(struct qwx_softc *sc, i
> pmon->buf_state = DP_MON_STATUS_REPLINISH;
> }
> move_next:
> + if (purge) {
> + hal_params = sc->hw_params.hal_params;
> + qwx_hal_rx_buf_addr_info_set(rx_mon_status_desc, 0, 0,
> + hal_params->rx_buf_rbm);
> + qwx_hal_srng_src_get_next_entry(sc, srng);
> + num_buffs_reaped++;
> + continue;
> + }
> +
> m = qwx_dp_rx_alloc_mon_status_buf(sc, rx_ring, &buf_idx);
> if (!m) {
> hal_params = sc->hw_params.hal_params;
> @@ -17209,7 +17255,7 @@ qwx_dp_rx_process_mon_status(struct qwx_softc *sc, int
> #endif
> struct hal_rx_mon_ppdu_info *ppdu_info = &pmon->mon_ppdu_info;
>
> - num_buffs_reaped = qwx_dp_rx_reap_mon_status_ring(sc, mac_id, &ml);
> + num_buffs_reaped = qwx_dp_rx_reap_mon_status_ring(sc, mac_id, &ml, 0);
> if (!num_buffs_reaped)
> goto exit;
>
> @@ -17310,7 +17356,7 @@ qwx_dp_service_mon_ring(void *arg)
> }
>
> int
> -qwx_dp_process_rxdma_err(struct qwx_softc *sc, int mac_id)
> +qwx_dp_process_rxdma_err(struct qwx_softc *sc, int mac_id, int purge)
> {
> struct ieee80211com *ic = &sc->sc_ic;
> struct ifnet *ifp = &ic->ic_if;
> @@ -17382,7 +17428,7 @@ qwx_dp_process_rxdma_err(struct qwx_softc *sc, int mac
> #ifdef notyet
> spin_unlock_bh(&srng->lock);
> #endif
> - if (num_buf_freed)
> + if (num_buf_freed && !purge)
> qwx_dp_rxbufs_replenish(sc, mac_id, rx_ring, num_buf_freed,
> sc->hw_params.hal_params->rx_buf_rbm);
>
> @@ -17605,7 +17651,7 @@ qwx_hal_reo_update_rx_reo_queue_status(struct qwx_soft
> }
>
> int
> -qwx_dp_process_reo_status(struct qwx_softc *sc)
> +qwx_dp_process_reo_status(struct qwx_softc *sc, int purge)
> {
> struct qwx_dp *dp = &sc->dp;
> struct hal_srng *srng;
> @@ -17623,8 +17669,11 @@ qwx_dp_process_reo_status(struct qwx_softc *sc)
> qwx_hal_srng_access_begin(sc, srng);
>
> while ((reo_desc = qwx_hal_srng_dst_get_next_entry(sc, srng))) {
> - ret = 1;
> + ret++;
>
> + if (purge)
> + continue;
> +
> tag = FIELD_GET(HAL_SRNG_TLV_HDR_TAG, *reo_desc);
> switch (tag) {
> case HAL_REO_GET_QUEUE_STATS_STATUS:
> @@ -17705,16 +17754,16 @@ qwx_dp_service_srng(struct qwx_softc *sc, int grp_id)
> }
>
> if (sc->hw_params.ring_mask->rx_err[grp_id] &&
> - qwx_dp_process_rx_err(sc))
> + qwx_dp_process_rx_err(sc, 0))
> ret = 1;
>
> if (sc->hw_params.ring_mask->rx_wbm_rel[grp_id] &&
> - qwx_dp_rx_process_wbm_err(sc))
> + qwx_dp_rx_process_wbm_err(sc, 0))
> ret = 1;
>
> if (sc->hw_params.ring_mask->rx[grp_id]) {
> i = fls(sc->hw_params.ring_mask->rx[grp_id]) - 1;
> - if (qwx_dp_process_rx(sc, i))
> + if (qwx_dp_process_rx(sc, i, 0))
> ret = 1;
> }
>
> @@ -17732,7 +17781,7 @@ qwx_dp_service_srng(struct qwx_softc *sc, int grp_id)
> }
>
> if (sc->hw_params.ring_mask->reo_status[grp_id] &&
> - qwx_dp_process_reo_status(sc))
> + qwx_dp_process_reo_status(sc, 0))
> ret = 1;
>
> for (i = 0; i < sc->num_radios; i++) {
> @@ -17741,7 +17790,7 @@ qwx_dp_service_srng(struct qwx_softc *sc, int grp_id)
>
> if (sc->hw_params.ring_mask->rxdma2host[grp_id] &
> (1 << (id))) {
> - if (qwx_dp_process_rxdma_err(sc, id))
> + if (qwx_dp_process_rxdma_err(sc, id, 0))
> ret = 1;
> }
>
> @@ -19860,11 +19909,53 @@ err_wmi_detach:
> }
>
> void
> +qwx_flush_rx_rings(struct qwx_softc *sc)
> +{
> + struct mbuf_list ml = MBUF_LIST_INITIALIZER();
> + int i, j, n;
> +
> + do {
> + n = qwx_dp_process_rx_err(sc, 1);
> + } while (n > 0);
> +
> + do {
> + n = qwx_dp_rx_process_wbm_err(sc, 1);
> + } while (n > 0);
> +
> + do {
> + n = qwx_dp_process_reo_status(sc, 1);
> + } while (n > 0);
> +
> + for (i = 0; i < DP_REO_DST_RING_MAX; i++) {
> + do {
> + n = qwx_dp_process_rx(sc, i, 1);
> + } while (n > 0);
> + }
> +
> + for (i = 0; i < sc->num_radios; i++) {
> + for (j = 0; j < sc->hw_params.num_rxmda_per_pdev; j++) {
> + int mac_id = i * sc->hw_params.num_rxmda_per_pdev + j;
> +
> + do {
> + n = qwx_dp_process_rxdma_err(sc, mac_id, 1);
> + } while (n > 0);
> + do {
> + n = qwx_dp_rx_reap_mon_status_ring(sc,
> + mac_id, &ml, 1);
> + ml_purge(&ml);
> + } while (n > 0);
> + }
> + }
> +}
> +
> +void
> qwx_core_stop(struct qwx_softc *sc)
> {
> if (!test_bit(ATH11K_FLAG_CRASH_FLUSH, sc->sc_flags))
> qwx_qmi_firmware_stop(sc);
>
> + qwx_flush_rx_rings(sc);
> +
> sc->ops.stop(sc);
> qwx_wmi_detach(sc);
> qwx_dp_pdev_reo_cleanup(sc);
>
make qwx(4) flush Rx rings when the interface goes down