From: Stefan Sperling Subject: Re: make qwx(4) flush Rx rings when the interface goes down To: tech@openbsd.org Date: Thu, 24 Apr 2025 20:26:13 +0200 On Thu, Apr 24, 2025 at 11:38:37AM +0200, Stefan Sperling wrote: > There is a known issue where qwx(4) triggers a pool corruption check: > > panic: pool_cache_item_magic_check: mcl2k cpu free list modified > > As an attempt to fix this, the patch below makes the driver discard > pending packets on all Rx rings. > > My theory is that the hardware is doing a DMA write to an mbuf which is > on one of these rings. While the interface goes down we are freeing mbufs > but we are not updating the ring state from the device's point of view > before doing so. > The panic happens later when qwx or another network driver attempts to > allocate a new mbuf from the 2K cluster pool. > > The issue has been observed during suspend/resume, though if my theory > is correct it might just as well happen during ifconfig down/up. > > Also, port over qwx_dp_mon_link_free() from ath11k. I noticed that this > was missing while searching the Linux driver code for clues. Theo pointed out that disabling interrupts won't necessarily stop DMA. So here's a new version which flushes the rings after the STOP command has been sent to (and acknowledged by) firmware. This will hopefully have the desired effect of stopping DMA. I've also renamed the "reap" function to "flush", to avoid an implication that rings were being completely destroyed/freed. M sys/dev/ic/qwx.c | 112+ 21- 1 file changed, 112 insertions(+), 21 deletions(-) commit - 31338944480d0345b0ff1635ba18a96228bd37ce commit + 733c622d5b43cf595e1a2ddfe07326238879463a blob - fdafd249b8801186ec0c12b2d0acb26131dcd963 blob + fcb17633d81d7f0734e5f38ac655d446d0659a33 --- sys/dev/ic/qwx.c +++ sys/dev/ic/qwx.c @@ -15454,6 +15454,16 @@ config_refill_ring: } void +qwx_dp_mon_link_free(struct qwx_softc *sc) +{ + struct qwx_pdev_dp *dp = &sc->pdev_dp; + struct qwx_mon_data *pmon = &dp->mon_data; + + qwx_dp_link_desc_cleanup(sc, pmon->link_desc_banks, + HAL_RXDMA_MONITOR_DESC, &dp->rxdma_mon_desc_ring); +} + +void qwx_dp_pdev_free(struct qwx_softc *sc) { int i; @@ -15462,6 +15472,8 @@ qwx_dp_pdev_free(struct qwx_softc *sc) for (i = 0; i < sc->num_radios; i++) qwx_dp_rx_pdev_free(sc, i); + + qwx_dp_mon_link_free(sc); } int @@ -16058,7 +16070,7 @@ qwx_dp_process_rx_err_buf(struct qwx_softc *sc, uint32 } int -qwx_dp_process_rx_err(struct qwx_softc *sc) +qwx_dp_process_rx_err(struct qwx_softc *sc, int purge) { struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; @@ -16077,7 +16089,7 @@ qwx_dp_process_rx_err(struct qwx_softc *sc) uint64_t paddr; uint32_t *desc; int is_frag; - uint8_t drop = 0; + uint8_t drop = purge ? 1 : 0; tot_n_bufs_reaped = 0; @@ -16149,14 +16161,17 @@ qwx_dp_process_rx_err(struct qwx_softc *sc) #ifdef notyet spin_unlock_bh(&srng->lock); #endif - for (i = 0; i < sc->num_radios; i++) { - if (!n_bufs_reaped[i]) - continue; + if (!purge) { + for (i = 0; i < sc->num_radios; i++) { + if (!n_bufs_reaped[i]) + continue; - rx_ring = &sc->pdev_dp.rx_refill_buf_ring; + rx_ring = &sc->pdev_dp.rx_refill_buf_ring; - qwx_dp_rxbufs_replenish(sc, i, rx_ring, n_bufs_reaped[i], - sc->hw_params.hal_params->rx_buf_rbm); + qwx_dp_rxbufs_replenish(sc, i, rx_ring, + n_bufs_reaped[i], + sc->hw_params.hal_params->rx_buf_rbm); + } } ifp->if_ierrors += tot_n_bufs_reaped; @@ -16306,7 +16321,7 @@ qwx_dp_rx_wbm_err(struct qwx_softc *sc, struct qwx_rx_ } int -qwx_dp_rx_process_wbm_err(struct qwx_softc *sc) +qwx_dp_rx_process_wbm_err(struct qwx_softc *sc, int purge) { struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; @@ -16382,6 +16397,18 @@ qwx_dp_rx_process_wbm_err(struct qwx_softc *sc) if (!total_num_buffs_reaped) goto done; + if (purge) { + for (i = 0; i < sc->num_radios; i++) { + while ((msdu = TAILQ_FIRST(msdu_list))) { + TAILQ_REMOVE(msdu_list, msdu, entry); + m_freem(msdu->m); + msdu->m = NULL; + } + } + + goto done; + } + for (i = 0; i < sc->num_radios; i++) { if (!num_buffs_reaped[i]) continue; @@ -16894,7 +16921,7 @@ qwx_dp_rx_process_received_packets(struct qwx_softc *s } int -qwx_dp_process_rx(struct qwx_softc *sc, int ring_id) +qwx_dp_process_rx(struct qwx_softc *sc, int ring_id, int purge) { struct qwx_dp *dp = &sc->dp; struct qwx_pdev_dp *pdev_dp = &sc->pdev_dp; @@ -17006,6 +17033,16 @@ try_again: if (!num_buffs_reaped[i]) continue; + if (purge) { + while ((msdu = TAILQ_FIRST(&msdu_list[i]))) { + TAILQ_REMOVE(msdu_list, msdu, entry); + m_freem(msdu->m); + msdu->m = NULL; + } + + continue; + } + qwx_dp_rx_process_received_packets(sc, &msdu_list[i], i); rx_ring = &sc->pdev_dp.rx_refill_buf_ring; @@ -17073,7 +17110,7 @@ fail_free_mbuf: int qwx_dp_rx_reap_mon_status_ring(struct qwx_softc *sc, int mac_id, - struct mbuf_list *ml) + struct mbuf_list *ml, int purge) { const struct ath11k_hw_hal_params *hal_params; struct qwx_pdev_dp *dp; @@ -17154,6 +17191,15 @@ qwx_dp_rx_reap_mon_status_ring(struct qwx_softc *sc, i pmon->buf_state = DP_MON_STATUS_REPLINISH; } move_next: + if (purge) { + hal_params = sc->hw_params.hal_params; + qwx_hal_rx_buf_addr_info_set(rx_mon_status_desc, 0, 0, + hal_params->rx_buf_rbm); + qwx_hal_srng_src_get_next_entry(sc, srng); + num_buffs_reaped++; + continue; + } + m = qwx_dp_rx_alloc_mon_status_buf(sc, rx_ring, &buf_idx); if (!m) { hal_params = sc->hw_params.hal_params; @@ -17209,7 +17255,7 @@ qwx_dp_rx_process_mon_status(struct qwx_softc *sc, int #endif struct hal_rx_mon_ppdu_info *ppdu_info = &pmon->mon_ppdu_info; - num_buffs_reaped = qwx_dp_rx_reap_mon_status_ring(sc, mac_id, &ml); + num_buffs_reaped = qwx_dp_rx_reap_mon_status_ring(sc, mac_id, &ml, 0); if (!num_buffs_reaped) goto exit; @@ -17310,7 +17356,7 @@ qwx_dp_service_mon_ring(void *arg) } int -qwx_dp_process_rxdma_err(struct qwx_softc *sc, int mac_id) +qwx_dp_process_rxdma_err(struct qwx_softc *sc, int mac_id, int purge) { struct ieee80211com *ic = &sc->sc_ic; struct ifnet *ifp = &ic->ic_if; @@ -17382,7 +17428,7 @@ qwx_dp_process_rxdma_err(struct qwx_softc *sc, int mac #ifdef notyet spin_unlock_bh(&srng->lock); #endif - if (num_buf_freed) + if (num_buf_freed && !purge) qwx_dp_rxbufs_replenish(sc, mac_id, rx_ring, num_buf_freed, sc->hw_params.hal_params->rx_buf_rbm); @@ -17605,7 +17651,7 @@ qwx_hal_reo_update_rx_reo_queue_status(struct qwx_soft } int -qwx_dp_process_reo_status(struct qwx_softc *sc) +qwx_dp_process_reo_status(struct qwx_softc *sc, int purge) { struct qwx_dp *dp = &sc->dp; struct hal_srng *srng; @@ -17623,8 +17669,11 @@ qwx_dp_process_reo_status(struct qwx_softc *sc) qwx_hal_srng_access_begin(sc, srng); while ((reo_desc = qwx_hal_srng_dst_get_next_entry(sc, srng))) { - ret = 1; + ret++; + if (purge) + continue; + tag = FIELD_GET(HAL_SRNG_TLV_HDR_TAG, *reo_desc); switch (tag) { case HAL_REO_GET_QUEUE_STATS_STATUS: @@ -17705,16 +17754,16 @@ qwx_dp_service_srng(struct qwx_softc *sc, int grp_id) } if (sc->hw_params.ring_mask->rx_err[grp_id] && - qwx_dp_process_rx_err(sc)) + qwx_dp_process_rx_err(sc, 0)) ret = 1; if (sc->hw_params.ring_mask->rx_wbm_rel[grp_id] && - qwx_dp_rx_process_wbm_err(sc)) + qwx_dp_rx_process_wbm_err(sc, 0)) ret = 1; if (sc->hw_params.ring_mask->rx[grp_id]) { i = fls(sc->hw_params.ring_mask->rx[grp_id]) - 1; - if (qwx_dp_process_rx(sc, i)) + if (qwx_dp_process_rx(sc, i, 0)) ret = 1; } @@ -17732,7 +17781,7 @@ qwx_dp_service_srng(struct qwx_softc *sc, int grp_id) } if (sc->hw_params.ring_mask->reo_status[grp_id] && - qwx_dp_process_reo_status(sc)) + qwx_dp_process_reo_status(sc, 0)) ret = 1; for (i = 0; i < sc->num_radios; i++) { @@ -17741,7 +17790,7 @@ qwx_dp_service_srng(struct qwx_softc *sc, int grp_id) if (sc->hw_params.ring_mask->rxdma2host[grp_id] & (1 << (id))) { - if (qwx_dp_process_rxdma_err(sc, id)) + if (qwx_dp_process_rxdma_err(sc, id, 0)) ret = 1; } @@ -19860,11 +19909,53 @@ err_wmi_detach: } void +qwx_flush_rx_rings(struct qwx_softc *sc) +{ + struct mbuf_list ml = MBUF_LIST_INITIALIZER(); + int i, j, n; + + do { + n = qwx_dp_process_rx_err(sc, 1); + } while (n > 0); + + do { + n = qwx_dp_rx_process_wbm_err(sc, 1); + } while (n > 0); + + do { + n = qwx_dp_process_reo_status(sc, 1); + } while (n > 0); + + for (i = 0; i < DP_REO_DST_RING_MAX; i++) { + do { + n = qwx_dp_process_rx(sc, i, 1); + } while (n > 0); + } + + for (i = 0; i < sc->num_radios; i++) { + for (j = 0; j < sc->hw_params.num_rxmda_per_pdev; j++) { + int mac_id = i * sc->hw_params.num_rxmda_per_pdev + j; + + do { + n = qwx_dp_process_rxdma_err(sc, mac_id, 1); + } while (n > 0); + do { + n = qwx_dp_rx_reap_mon_status_ring(sc, + mac_id, &ml, 1); + ml_purge(&ml); + } while (n > 0); + } + } +} + +void qwx_core_stop(struct qwx_softc *sc) { if (!test_bit(ATH11K_FLAG_CRASH_FLUSH, sc->sc_flags)) qwx_qmi_firmware_stop(sc); + qwx_flush_rx_rings(sc); + sc->ops.stop(sc); qwx_wmi_detach(sc); qwx_dp_pdev_reo_cleanup(sc);