From: Mark Kettenis Subject: Re: qwz: enable WPA2 association on WCN7850 To: Stefan Sperling Cc: marcus@nazgul.ch, tech@openbsd.org, kirill@korins.ky, mail@patrick-wildt.de Date: Sun, 26 Apr 2026 14:23:10 +0200 > Date: Sun, 26 Apr 2026 07:56:10 +0200 > From: Stefan Sperling > On Sat, Apr 25, 2026 at 11:56:07PM +0200, Marcus Glocker wrote: > > Bring the qwz driver up to a working WPA2 client connection on the > > Qualcomm WCN7850 chip. Tested on the Samsung Galaxy Book4 Edge. > > Thanks for making progress on this. It looks like there are porential > fixes in here for qwx as well, in particular: ... > > 5. Add non-coherent DMA cache sync on RX and TX. > > Without explicit flushes the CPU and FW see different bytes for > > the same buffer. This was the root cause of "garbage RX frames": > > they were always real EAPOL Msg 1 frames torn by stale CPU cache > > lines. > > Again, this sounds like something which would apply to qwx as well. So here is a diff I wrote in early March while working on DMA bounce buffers. With this diff, qwx(4) works when the DMA constraint on mbufs is removed. The one-liner to do that is included in the diff. This could use a 2nd pair of eyes. Index: dev/ic/qwx.c =================================================================== RCS file: /cvs/src/sys/dev/ic/qwx.c,v diff -u -p -r1.103 qwx.c --- dev/ic/qwx.c 29 Mar 2026 05:29:02 -0000 1.103 +++ dev/ic/qwx.c 26 Apr 2026 12:14:31 -0000 @@ -14317,6 +14317,9 @@ qwx_htc_send(struct qwx_htc *htc, enum a goto err_credits; } + bus_dmamap_sync(sc->sc_dmat, tx_data->map, 0, + tx_data->map->dm_mapsize, BUS_DMASYNC_PREWRITE); + DNPRINTF(QWX_D_HTC, "%s: tx mbuf %p eid %d paddr %lx\n", __func__, m, tx_data->eid, tx_data->map->dm_segs[0].ds_addr); #ifdef QWX_DEBUG @@ -16367,6 +16370,8 @@ qwx_dp_process_rx_err_buf(struct qwx_sof return; rx_data = &rx_ring->rx_data[buf_id]; + bus_dmamap_sync(sc->sc_dmat, rx_data->map, 0, + rx_data->map->dm_mapsize, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_dmat, rx_data->map); m = rx_data->m; rx_data->m = NULL; @@ -16706,6 +16711,8 @@ qwx_dp_rx_process_wbm_err(struct qwx_sof continue; rx_data = &rx_ring->rx_data[idx]; + bus_dmamap_sync(sc->sc_dmat, rx_data->map, 0, + rx_data->map->dm_mapsize, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_dmat, rx_data->map); m = rx_data->m; rx_data->m = NULL; @@ -17491,6 +17498,8 @@ try_again: continue; rx_data = &rx_ring->rx_data[idx]; + bus_dmamap_sync(sc->sc_dmat, rx_data->map, 0, + rx_data->map->dm_mapsize, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_dmat, rx_data->map); m = rx_data->m; rx_data->m = NULL; @@ -17684,7 +17693,7 @@ qwx_dp_rx_reap_mon_status_ring(struct qw rx_data = &rx_ring->rx_data[buf_idx]; bus_dmamap_sync(sc->sc_dmat, rx_data->map, 0, - rx_data->m->m_pkthdr.len, BUS_DMASYNC_POSTREAD); + rx_data->map->dm_mapsize, BUS_DMASYNC_POSTREAD); tlv = mtod(rx_data->m, struct hal_tlv_hdr *); if (FIELD_GET(HAL_TLV_HDR_TAG, tlv->tl) != @@ -22768,6 +22777,8 @@ qwx_ce_recv_process_cb(struct qwx_ce_pip &nbytes) == 0) { struct qwx_rx_data *rx_data = transfer_context; + bus_dmamap_sync(sc->sc_dmat, rx_data->map, 0, + rx_data->map->dm_mapsize, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_dmat, rx_data->map); m = rx_data->m; rx_data->m = NULL; @@ -25330,6 +25341,9 @@ qwx_dp_tx(struct qwx_softc *sc, struct q return ENOMEM; } + bus_dmamap_sync(sc->sc_dmat, tx_data->map, 0, + tx_data->map->dm_mapsize, BUS_DMASYNC_PREWRITE); + tx_data->m = m; tx_data->ni = ni; @@ -25490,6 +25504,9 @@ qwx_mac_mgmt_tx_wmi(struct qwx_softc *sc return ret; } } + + bus_dmamap_sync(sc->sc_dmat, tx_data->map, 0, + tx_data->map->dm_mapsize, BUS_DMASYNC_PREWRITE); ret = qwx_wmi_mgmt_send(sc, arvif, pdev_id, buf_id, m, tx_data); if (ret) { Index: dev/pci/if_qwx_pci.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_qwx_pci.c,v diff -u -p -r1.31 if_qwx_pci.c --- dev/pci/if_qwx_pci.c 20 Jan 2026 11:19:50 -0000 1.31 +++ dev/pci/if_qwx_pci.c 26 Apr 2026 12:14:31 -0000 @@ -2524,7 +2524,8 @@ qwx_mhi_init_event_rings(struct qwx_pci_ } bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->event_ctxt), 0, - QWX_DMA_LEN(psc->event_ctxt), BUS_DMASYNC_PREWRITE); + QWX_DMA_LEN(psc->event_ctxt), + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } void @@ -2590,7 +2591,7 @@ qwx_mhi_cmd_ring_submit(struct qwx_pci_s ring->wp += sizeof(struct qwx_mhi_ring_element); bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->cmd_ctxt), 0, - QWX_DMA_LEN(psc->cmd_ctxt), BUS_DMASYNC_POSTREAD); + QWX_DMA_LEN(psc->cmd_ctxt), BUS_DMASYNC_POSTWRITE); c = (struct qwx_mhi_cmd_ctxt *)QWX_DMA_KVA(psc->cmd_ctxt); c->wp = htole64(ring->wp); @@ -2625,6 +2626,9 @@ qwx_mhi_send_cmd(struct qwx_pci_softc *p ((chan << MHI_TRE_CMD_CHID_SHFT) & MHI_TRE_CMD_CHID_MASK) | ((cmd << MHI_TRE_CMD_CMDID_SHFT) & MHI_TRE_CMD_CMDID_MASK)); + bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(ring->dmamem), + 0, QWX_DMA_LEN(ring->dmamem), BUS_DMASYNC_PREWRITE); + return qwx_mhi_cmd_ring_submit(psc, ring); } @@ -2709,7 +2713,7 @@ qwx_mhi_submit_xfer(struct qwx_softc *sc } } - bus_dmamap_sync(sc->sc_dmat, xfer->map, 0, m->m_pkthdr.len, + bus_dmamap_sync(sc->sc_dmat, xfer->map, 0, xfer->map->dm_mapsize, BUS_DMASYNC_PREWRITE); xfer->m = m; @@ -2731,6 +2735,9 @@ qwx_mhi_submit_xfer(struct qwx_softc *sc ring->wp += sizeof(struct qwx_mhi_ring_element); ring->queued++; + bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->chan_ctxt), 0, + QWX_DMA_LEN(psc->chan_ctxt), BUS_DMASYNC_POSTWRITE); + ring->chan_ctxt->wp = htole64(ring->wp); bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->chan_ctxt), 0, @@ -2755,6 +2762,9 @@ qwx_mhi_start_channel(struct qwx_pci_sof c = ring->chan_ctxt; + bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->chan_ctxt), 0, + QWX_DMA_LEN(psc->chan_ctxt), BUS_DMASYNC_POSTWRITE); + chcfg = le32toh(c->chcfg); chcfg &= ~MHI_CHAN_CTX_CHSTATE_MASK; chcfg |= MHI_CHAN_CTX_CHSTATE_ENABLED; @@ -3708,6 +3718,7 @@ void qwx_pci_intr_ctrl_event_cmd_complete(struct qwx_pci_softc *psc, uint64_t ptr, uint32_t cmd_status) { + struct qwx_softc *sc = &psc->sc_sc; struct qwx_pci_cmd_ring *cmd_ring = &psc->cmd_ring; uint64_t base = QWX_DMA_DVA(cmd_ring->dmamem); struct qwx_pci_xfer_ring *xfer_ring = NULL; @@ -3719,6 +3730,9 @@ qwx_pci_intr_ctrl_event_cmd_complete(str if (e == NULL) return; + bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(cmd_ring->dmamem), + 0, QWX_DMA_LEN(cmd_ring->dmamem), BUS_DMASYNC_POSTREAD); + tre1 = le32toh(e->dword[1]); chid = (tre1 & MHI_TRE1_EV_CHID_MASK) >> MHI_TRE1_EV_CHID_SHFT; @@ -3762,7 +3776,8 @@ qwx_pci_intr_ctrl_event(struct qwx_pci_s } bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->event_ctxt), 0, - QWX_DMA_LEN(psc->event_ctxt), BUS_DMASYNC_POSTREAD); + QWX_DMA_LEN(psc->event_ctxt), + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); rp = le64toh(c->rp); wp = le64toh(c->wp); @@ -3827,7 +3842,8 @@ qwx_pci_intr_ctrl_event(struct qwx_pci_s c->wp = htole64(ring->wp); bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->event_ctxt), 0, - QWX_DMA_LEN(psc->event_ctxt), BUS_DMASYNC_PREWRITE); + QWX_DMA_LEN(psc->event_ctxt), + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); qwx_mhi_ring_doorbell(sc, ring->db_addr, ring->wp); return 1; @@ -3890,7 +3906,7 @@ qwx_pci_intr_data_event_tx(struct qwx_pc len = xfer->m->m_pkthdr.len; bus_dmamap_sync(sc->sc_dmat, xfer->map, 0, - xfer->m->m_pkthdr.len, BUS_DMASYNC_POSTREAD); + xfer->map->dm_mapsize, BUS_DMASYNC_POSTREAD); #ifdef QWX_DEBUG { int i; @@ -3950,6 +3966,11 @@ qwx_pci_intr_data_event_tx(struct qwx_pc } if (ring->mhi_chan_direction == MHI_CHAN_TYPE_INBOUND) { + bus_dmamap_sync(sc->sc_dmat, + QWX_DMA_MAP(psc->chan_ctxt), 0, + QWX_DMA_LEN(psc->chan_ctxt), + BUS_DMASYNC_POSTWRITE); + ring->chan_ctxt->wp = htole64(ring->wp); bus_dmamap_sync(sc->sc_dmat, @@ -3985,7 +4006,8 @@ qwx_pci_intr_data_event(struct qwx_pci_s } bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->event_ctxt), 0, - QWX_DMA_LEN(psc->event_ctxt), BUS_DMASYNC_POSTREAD); + QWX_DMA_LEN(psc->event_ctxt), + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); rp = le64toh(c->rp); wp = le64toh(c->wp); @@ -4038,10 +4060,14 @@ qwx_pci_intr_data_event(struct qwx_pci_s ring->wp += sizeof(*e); } + bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(ring->dmamem), + 0, QWX_DMA_LEN(ring->dmamem), BUS_DMASYNC_PREWRITE); + c->wp = htole64(ring->wp); bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->event_ctxt), 0, - QWX_DMA_LEN(psc->event_ctxt), BUS_DMASYNC_PREWRITE); + QWX_DMA_LEN(psc->event_ctxt), + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); qwx_mhi_ring_doorbell(sc, ring->db_addr, ring->wp); return 1; Index: kern/uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v diff -u -p -r1.305 uipc_mbuf.c --- kern/uipc_mbuf.c 22 Apr 2026 21:58:53 -0000 1.305 +++ kern/uipc_mbuf.c 26 Apr 2026 12:14:33 -0000 @@ -1477,7 +1477,7 @@ void m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg) { pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator); - pool_set_constraints(pp, &kp_dma_contig); +// pool_set_constraints(pp, &kp_dma_contig); } u_int