Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: qwz: enable WPA2 association on WCN7850
To:
Stefan Sperling <stsp@stsp.name>
Cc:
marcus@nazgul.ch, tech@openbsd.org, kirill@korins.ky, mail@patrick-wildt.de
Date:
Sun, 26 Apr 2026 14:23:10 +0200

Download raw body.

Thread
> Date: Sun, 26 Apr 2026 07:56:10 +0200
> From: Stefan Sperling <stsp@stsp.name>

> On Sat, Apr 25, 2026 at 11:56:07PM +0200, Marcus Glocker wrote:
> > Bring the qwz driver up to a working WPA2 client connection on the
> > Qualcomm WCN7850 chip.  Tested on the Samsung Galaxy Book4 Edge.
> 
> Thanks for making progress on this. It looks like there are porential
> fixes in here for qwx as well, in particular:

...

> > 5. Add non-coherent DMA cache sync on RX and TX.
> >    Without explicit flushes the CPU and FW see different bytes for
> >    the same buffer.  This was the root cause of "garbage RX frames":
> >    they were always real EAPOL Msg 1 frames torn by stale CPU cache
> >    lines.
> 
> Again, this sounds like something which would apply to qwx as well.

So here is a diff I wrote in early March while working on DMA bounce
buffers.  With this diff, qwx(4) works when the DMA constraint on
mbufs is removed.  The one-liner to do that is included in the diff.
This could use a 2nd pair of eyes.


Index: dev/ic/qwx.c
===================================================================
RCS file: /cvs/src/sys/dev/ic/qwx.c,v
diff -u -p -r1.103 qwx.c
--- dev/ic/qwx.c	29 Mar 2026 05:29:02 -0000	1.103
+++ dev/ic/qwx.c	26 Apr 2026 12:14:31 -0000
@@ -14317,6 +14317,9 @@ qwx_htc_send(struct qwx_htc *htc, enum a
 		goto err_credits;
 	}
 
+	bus_dmamap_sync(sc->sc_dmat, tx_data->map, 0,
+	    tx_data->map->dm_mapsize, BUS_DMASYNC_PREWRITE);
+
 	DNPRINTF(QWX_D_HTC, "%s: tx mbuf %p eid %d paddr %lx\n",
 	    __func__, m, tx_data->eid, tx_data->map->dm_segs[0].ds_addr);
 #ifdef QWX_DEBUG
@@ -16367,6 +16370,8 @@ qwx_dp_process_rx_err_buf(struct qwx_sof
 		return;
 
 	rx_data = &rx_ring->rx_data[buf_id];
+	bus_dmamap_sync(sc->sc_dmat, rx_data->map, 0,
+	    rx_data->map->dm_mapsize, BUS_DMASYNC_POSTREAD);
 	bus_dmamap_unload(sc->sc_dmat, rx_data->map);
 	m = rx_data->m;
 	rx_data->m = NULL;
@@ -16706,6 +16711,8 @@ qwx_dp_rx_process_wbm_err(struct qwx_sof
 			continue;
 
 		rx_data = &rx_ring->rx_data[idx];
+		bus_dmamap_sync(sc->sc_dmat, rx_data->map, 0,
+		    rx_data->map->dm_mapsize, BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->sc_dmat, rx_data->map);
 		m = rx_data->m;
 		rx_data->m = NULL;
@@ -17491,6 +17498,8 @@ try_again:
 			continue;
 
 		rx_data = &rx_ring->rx_data[idx];
+		bus_dmamap_sync(sc->sc_dmat, rx_data->map, 0,
+		    rx_data->map->dm_mapsize, BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->sc_dmat, rx_data->map);
 		m = rx_data->m;
 		rx_data->m = NULL;
@@ -17684,7 +17693,7 @@ qwx_dp_rx_reap_mon_status_ring(struct qw
 			rx_data = &rx_ring->rx_data[buf_idx];
 
 			bus_dmamap_sync(sc->sc_dmat, rx_data->map, 0,
-			    rx_data->m->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
+			    rx_data->map->dm_mapsize, BUS_DMASYNC_POSTREAD);
 
 			tlv = mtod(rx_data->m, struct hal_tlv_hdr *);
 			if (FIELD_GET(HAL_TLV_HDR_TAG, tlv->tl) !=
@@ -22768,6 +22777,8 @@ qwx_ce_recv_process_cb(struct qwx_ce_pip
 	    &nbytes) == 0) {
 		struct qwx_rx_data *rx_data = transfer_context;
 
+		bus_dmamap_sync(sc->sc_dmat, rx_data->map, 0,
+		    rx_data->map->dm_mapsize, BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->sc_dmat, rx_data->map);
 		m = rx_data->m;
 		rx_data->m = NULL;
@@ -25330,6 +25341,9 @@ qwx_dp_tx(struct qwx_softc *sc, struct q
 		return ENOMEM;
 	}
 
+	bus_dmamap_sync(sc->sc_dmat, tx_data->map, 0,
+	    tx_data->map->dm_mapsize, BUS_DMASYNC_PREWRITE);
+
 	tx_data->m = m;
 	tx_data->ni = ni;
 
@@ -25490,6 +25504,9 @@ qwx_mac_mgmt_tx_wmi(struct qwx_softc *sc
 			return ret;
 		}
 	}
+
+	bus_dmamap_sync(sc->sc_dmat, tx_data->map, 0,
+	    tx_data->map->dm_mapsize, BUS_DMASYNC_PREWRITE);
 
 	ret = qwx_wmi_mgmt_send(sc, arvif, pdev_id, buf_id, m, tx_data);
 	if (ret) {
Index: dev/pci/if_qwx_pci.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_qwx_pci.c,v
diff -u -p -r1.31 if_qwx_pci.c
--- dev/pci/if_qwx_pci.c	20 Jan 2026 11:19:50 -0000	1.31
+++ dev/pci/if_qwx_pci.c	26 Apr 2026 12:14:31 -0000
@@ -2524,7 +2524,8 @@ qwx_mhi_init_event_rings(struct qwx_pci_
 	}
 
 	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->event_ctxt), 0,
-	    QWX_DMA_LEN(psc->event_ctxt), BUS_DMASYNC_PREWRITE);
+	    QWX_DMA_LEN(psc->event_ctxt),
+	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 }
 
 void
@@ -2590,7 +2591,7 @@ qwx_mhi_cmd_ring_submit(struct qwx_pci_s
 		ring->wp += sizeof(struct qwx_mhi_ring_element);
 
 	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->cmd_ctxt), 0,
-	    QWX_DMA_LEN(psc->cmd_ctxt), BUS_DMASYNC_POSTREAD);
+	    QWX_DMA_LEN(psc->cmd_ctxt), BUS_DMASYNC_POSTWRITE);
 
 	c = (struct qwx_mhi_cmd_ctxt *)QWX_DMA_KVA(psc->cmd_ctxt);
 	c->wp = htole64(ring->wp);
@@ -2625,6 +2626,9 @@ qwx_mhi_send_cmd(struct qwx_pci_softc *p
 	    ((chan << MHI_TRE_CMD_CHID_SHFT) & MHI_TRE_CMD_CHID_MASK) |
 	    ((cmd << MHI_TRE_CMD_CMDID_SHFT) & MHI_TRE_CMD_CMDID_MASK));
 
+	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(ring->dmamem),
+	    0, QWX_DMA_LEN(ring->dmamem), BUS_DMASYNC_PREWRITE);
+
 	return qwx_mhi_cmd_ring_submit(psc, ring);
 }
 
@@ -2709,7 +2713,7 @@ qwx_mhi_submit_xfer(struct qwx_softc *sc
 		}
 	}
 
-	bus_dmamap_sync(sc->sc_dmat, xfer->map, 0, m->m_pkthdr.len,
+	bus_dmamap_sync(sc->sc_dmat, xfer->map, 0, xfer->map->dm_mapsize,
 	    BUS_DMASYNC_PREWRITE);
 
 	xfer->m = m;
@@ -2731,6 +2735,9 @@ qwx_mhi_submit_xfer(struct qwx_softc *sc
 		ring->wp += sizeof(struct qwx_mhi_ring_element);
 	ring->queued++;
 
+	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->chan_ctxt), 0,
+	    QWX_DMA_LEN(psc->chan_ctxt), BUS_DMASYNC_POSTWRITE);
+
 	ring->chan_ctxt->wp = htole64(ring->wp);
 
 	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->chan_ctxt), 0,
@@ -2755,6 +2762,9 @@ qwx_mhi_start_channel(struct qwx_pci_sof
 
 	c = ring->chan_ctxt;
 
+	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->chan_ctxt), 0,
+	    QWX_DMA_LEN(psc->chan_ctxt), BUS_DMASYNC_POSTWRITE);
+
 	chcfg = le32toh(c->chcfg);
 	chcfg &= ~MHI_CHAN_CTX_CHSTATE_MASK;
 	chcfg |= MHI_CHAN_CTX_CHSTATE_ENABLED;
@@ -3708,6 +3718,7 @@ void
 qwx_pci_intr_ctrl_event_cmd_complete(struct qwx_pci_softc *psc,
     uint64_t ptr, uint32_t cmd_status)
 {
+	struct qwx_softc *sc = &psc->sc_sc;
 	struct qwx_pci_cmd_ring	*cmd_ring = &psc->cmd_ring;
 	uint64_t base = QWX_DMA_DVA(cmd_ring->dmamem);
 	struct qwx_pci_xfer_ring *xfer_ring = NULL;
@@ -3719,6 +3730,9 @@ qwx_pci_intr_ctrl_event_cmd_complete(str
 	if (e == NULL)
 		return;
 
+	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(cmd_ring->dmamem),
+	    0, QWX_DMA_LEN(cmd_ring->dmamem), BUS_DMASYNC_POSTREAD);
+
 	tre1 = le32toh(e->dword[1]);
 	chid = (tre1 & MHI_TRE1_EV_CHID_MASK) >> MHI_TRE1_EV_CHID_SHFT;
 
@@ -3762,7 +3776,8 @@ qwx_pci_intr_ctrl_event(struct qwx_pci_s
 	}
 
 	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->event_ctxt), 0,
-	    QWX_DMA_LEN(psc->event_ctxt), BUS_DMASYNC_POSTREAD);
+	    QWX_DMA_LEN(psc->event_ctxt),
+	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	rp = le64toh(c->rp);
 	wp = le64toh(c->wp);
@@ -3827,7 +3842,8 @@ qwx_pci_intr_ctrl_event(struct qwx_pci_s
 	c->wp = htole64(ring->wp);
 
 	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->event_ctxt), 0,
-	    QWX_DMA_LEN(psc->event_ctxt), BUS_DMASYNC_PREWRITE);
+	    QWX_DMA_LEN(psc->event_ctxt),
+	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	qwx_mhi_ring_doorbell(sc, ring->db_addr, ring->wp);
 	return 1;
@@ -3890,7 +3906,7 @@ qwx_pci_intr_data_event_tx(struct qwx_pc
 				len = xfer->m->m_pkthdr.len;
 
 			bus_dmamap_sync(sc->sc_dmat, xfer->map, 0,
-			    xfer->m->m_pkthdr.len, BUS_DMASYNC_POSTREAD);
+			    xfer->map->dm_mapsize, BUS_DMASYNC_POSTREAD);
 #ifdef QWX_DEBUG
 			{
 			int i;
@@ -3950,6 +3966,11 @@ qwx_pci_intr_data_event_tx(struct qwx_pc
 		}
 
 		if (ring->mhi_chan_direction == MHI_CHAN_TYPE_INBOUND) {
+			bus_dmamap_sync(sc->sc_dmat,
+			    QWX_DMA_MAP(psc->chan_ctxt), 0,
+			    QWX_DMA_LEN(psc->chan_ctxt),
+			    BUS_DMASYNC_POSTWRITE);
+
 			ring->chan_ctxt->wp = htole64(ring->wp);
 
 			bus_dmamap_sync(sc->sc_dmat,
@@ -3985,7 +4006,8 @@ qwx_pci_intr_data_event(struct qwx_pci_s
 	}
 
 	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->event_ctxt), 0,
-	    QWX_DMA_LEN(psc->event_ctxt), BUS_DMASYNC_POSTREAD);
+	    QWX_DMA_LEN(psc->event_ctxt),
+	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	rp = le64toh(c->rp);
 	wp = le64toh(c->wp);
@@ -4038,10 +4060,14 @@ qwx_pci_intr_data_event(struct qwx_pci_s
 			ring->wp += sizeof(*e);
 	}
 
+	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(ring->dmamem),
+	    0, QWX_DMA_LEN(ring->dmamem), BUS_DMASYNC_PREWRITE);
+
 	c->wp = htole64(ring->wp);
 
 	bus_dmamap_sync(sc->sc_dmat, QWX_DMA_MAP(psc->event_ctxt), 0,
-	    QWX_DMA_LEN(psc->event_ctxt), BUS_DMASYNC_PREWRITE);
+	    QWX_DMA_LEN(psc->event_ctxt),
+	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	qwx_mhi_ring_doorbell(sc, ring->db_addr, ring->wp);
 	return 1;
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
diff -u -p -r1.305 uipc_mbuf.c
--- kern/uipc_mbuf.c	22 Apr 2026 21:58:53 -0000	1.305
+++ kern/uipc_mbuf.c	26 Apr 2026 12:14:33 -0000
@@ -1477,7 +1477,7 @@ void
 m_pool_init(struct pool *pp, u_int size, u_int align, const char *wmesg)
 {
 	pool_init(pp, size, align, IPL_NET, 0, wmesg, &m_pool_allocator);
-	pool_set_constraints(pp, &kp_dma_contig);
+//	pool_set_constraints(pp, &kp_dma_contig);
 }
 
 u_int