Index | Thread | Search

From:
Patrick Wildt <patrick@blueri.se>
Subject:
Re: smmu(4): don't always set DMA coherent tag
To:
Landry Breuil <landry@openbsd.org>
Cc:
tech@openbsd.org, kettenis@openbsd.org
Date:
Sun, 21 Sep 2025 10:29:15 +0200

Download raw body.

Thread
On Thu, Aug 28, 2025 at 08:28:31AM +0200, Landry Breuil wrote:
> Le Wed, Aug 27, 2025 at 02:03:39PM +0200, Patrick Wildt a ?crit :
> > Hi,
> > 
> > SMMUs don't necessarily influence device DMA coherency attributes.
> > I assume we have been lucky so far that our machines that have an
> > SMMUv2 usually have devices with DMA coherency.  On the RK3588
> > this is not the case, and us always adding the COHERENT flag makes
> > devices fail to work when used with smmu(4) enabled.
> > 
> > Please give this a run on machines where "dmesg | grep ^smmu" shows
> > some output.
> 
> i dont have regressions on the omnibook x14 with that patch and the
> previous, but smmu is still disabled. should i test with it enabled ?
> that's only adding 'early 1' to the 'smmu* at fdt?' line right ?

Needs slightly more on top than just 'early 1':

diff --git a/sys/arch/arm64/conf/GENERIC b/sys/arch/arm64/conf/GENERIC
index 93273767ec7..9998b4ad8c7 100644
--- a/sys/arch/arm64/conf/GENERIC
+++ b/sys/arch/arm64/conf/GENERIC
@@ -93,7 +93,7 @@ pci*		at pciecam?
 sdhc*		at fdt?
 sdmmc*		at sdhc?
 bwfm*		at sdmmc?	# Broadcom FullMAC
-smmu*		at fdt?
+smmu*		at fdt? early 1
 xhci*		at fdt?
 ccp*		at fdt?		# AMD Cryptographic Co-processor
 ipmi*		at fdt?
diff --git a/sys/arch/arm64/dev/smmu.c b/sys/arch/arm64/dev/smmu.c
index fafbe3189b5..68deb31f382 100644
--- a/sys/arch/arm64/dev/smmu.c
+++ b/sys/arch/arm64/dev/smmu.c
@@ -73,6 +73,7 @@ uint64_t smmu_cb_read_8(struct smmu_softc *, int, bus_size_t);
 void smmu_cb_write_8(struct smmu_softc *, int, bus_size_t, uint64_t);
 
 int smmu_v2_domain_create(struct smmu_domain *);
+void smmu_v2_domain_enable(struct smmu_domain *);
 void smmu_v2_tlbi_va(struct smmu_domain *, vaddr_t);
 void smmu_v2_tlb_sync_global(struct smmu_softc *);
 void smmu_v2_tlb_sync_context(struct smmu_domain *);
@@ -123,6 +124,7 @@ int smmu_v3_write_ack(struct smmu_softc *, bus_size_t, bus_size_t,
      uint32_t);
 
 int smmu_v3_domain_create(struct smmu_domain *);
+void smmu_v3_domain_enable(struct smmu_domain *);
 void smmu_v3_cfgi_all(struct smmu_softc *);
 void smmu_v3_cfgi_cd(struct smmu_domain *);
 void smmu_v3_cfgi_ste(struct smmu_domain *);
@@ -379,6 +381,7 @@ smmu_v2_attach(struct smmu_softc *sc)
 	smmu_gr0_write_4(sc, SMMU_SCR0, reg);
 
 	sc->sc_domain_create = smmu_v2_domain_create;
+	sc->sc_domain_enable = smmu_v2_domain_enable;
 	sc->sc_tlbi_va = smmu_v2_tlbi_va;
 	sc->sc_tlb_sync_context = smmu_v2_tlb_sync_context;
 	return 0;
@@ -814,6 +817,24 @@ smmu_v2_domain_create(struct smmu_domain *dom)
 		reg |= SMMU_CB_SCTLR_ASIDPNE;
 	smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_SCTLR, reg);
 
+	snprintf(dom->sd_exname, sizeof(dom->sd_exname), "%s:%x",
+	    sc->sc_dev.dv_xname, dom->sd_sid);
+	dom->sd_iovamap = extent_create(dom->sd_exname, 0,
+	    (1LL << iovabits) - 1, M_DEVBUF, NULL, 0, EX_WAITOK |
+	    EX_NOCOALESCE);
+
+	return 0;
+}
+
+void
+smmu_v2_domain_enable(struct smmu_domain *dom)
+{
+	struct smmu_softc *sc = dom->sd_sc;
+	uint32_t reg;
+
+	if (dom->sd_enabled)
+		return;
+
 	/* Point stream to context block */
 	reg = SMMU_S2CR_TYPE_TRANS | dom->sd_cb_idx;
 	if (sc->sc_has_exids && sc->sc_smr)
@@ -829,13 +850,7 @@ smmu_v2_domain_create(struct smmu_domain *dom)
 		smmu_gr0_write_4(sc, SMMU_SMR(dom->sd_smr_idx), reg);
 	}
 
-	snprintf(dom->sd_exname, sizeof(dom->sd_exname), "%s:%x",
-	    sc->sc_dev.dv_xname, dom->sd_sid);
-	dom->sd_iovamap = extent_create(dom->sd_exname, 0,
-	    (1LL << iovabits) - 1, M_DEVBUF, NULL, 0, EX_WAITOK |
-	    EX_NOCOALESCE);
-
-	return 0;
+	dom->sd_enabled = 1;
 }
 
 void
@@ -1348,6 +1363,8 @@ smmu_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
 	u_long dva, len;
 	int error;
 
+	sc->sc_domain_enable(dom);
+
 	error = sc->sc_dmat->_dmamap_create(sc->sc_dmat, size,
 	    nsegments, maxsegsz, boundary, flags, &map);
 	if (error)
@@ -1781,6 +1798,7 @@ smmu_v3_attach(struct smmu_softc *sc)
 	printf("\n");
 
 	sc->sc_domain_create = smmu_v3_domain_create;
+	sc->sc_domain_enable = smmu_v3_domain_enable;
 	sc->sc_tlbi_va = smmu_v3_tlbi_va;
 	sc->sc_tlb_sync_context = smmu_v3_tlb_sync_context;
 	return 0;
@@ -2113,6 +2131,15 @@ smmu_v3_domain_create(struct smmu_domain *dom)
 	return 0;
 }
 
+void
+smmu_v3_domain_enable(struct smmu_domain *dom)
+{
+	if (dom->sd_enabled)
+		return;
+
+	dom->sd_enabled = 1;
+}
+
 int
 smmu_v3_sync(struct smmu_softc *sc)
 {
diff --git a/sys/arch/arm64/dev/smmu_fdt.c b/sys/arch/arm64/dev/smmu_fdt.c
index 77bc7132055..2d55ee3c82a 100644
--- a/sys/arch/arm64/dev/smmu_fdt.c
+++ b/sys/arch/arm64/dev/smmu_fdt.c
@@ -129,11 +129,6 @@ smmu_v2_fdt_attach(struct smmu_fdt_softc *fsc, int node)
 	if (OF_getproplen(node, "dma-coherent") == 0)
 		sc->sc_coherent = 1;
 
-	if (sc->sc_is_qcom) {
-		printf(": disabled\n");
-		return ENXIO;
-	}
-
 	if (smmu_v2_attach(sc) != 0)
 		return ENXIO;
 
diff --git a/sys/arch/arm64/dev/smmuvar.h b/sys/arch/arm64/dev/smmuvar.h
index 045dfbd1fd9..b3fb3cfc590 100644
--- a/sys/arch/arm64/dev/smmuvar.h
+++ b/sys/arch/arm64/dev/smmuvar.h
@@ -25,6 +25,7 @@ struct smmu_dmamem {
 struct smmu_softc;
 struct smmu_domain {
 	struct smmu_softc		*sd_sc;
+	uint32_t			 sd_enabled;
 	uint32_t			 sd_sid;
 	bus_dma_tag_t			 sd_dmat;
 
@@ -122,6 +123,7 @@ struct smmu_softc {
 	SIMPLEQ_HEAD(, smmu_domain) sc_domains;
 
 	int			 (*sc_domain_create)(struct smmu_domain *);
+	void			 (*sc_domain_enable)(struct smmu_domain *);
 	void			 (*sc_tlbi_va)(struct smmu_domain *, vaddr_t);
 	void			 (*sc_tlb_sync_context)(struct smmu_domain *);
 };