From: Patrick Wildt Subject: Re: smmu(4): don't always set DMA coherent tag To: Landry Breuil Cc: tech@openbsd.org, kettenis@openbsd.org Date: Sun, 21 Sep 2025 10:29:15 +0200 On Thu, Aug 28, 2025 at 08:28:31AM +0200, Landry Breuil wrote: > Le Wed, Aug 27, 2025 at 02:03:39PM +0200, Patrick Wildt a ?crit : > > Hi, > > > > SMMUs don't necessarily influence device DMA coherency attributes. > > I assume we have been lucky so far that our machines that have an > > SMMUv2 usually have devices with DMA coherency. On the RK3588 > > this is not the case, and us always adding the COHERENT flag makes > > devices fail to work when used with smmu(4) enabled. > > > > Please give this a run on machines where "dmesg | grep ^smmu" shows > > some output. > > i dont have regressions on the omnibook x14 with that patch and the > previous, but smmu is still disabled. should i test with it enabled ? > that's only adding 'early 1' to the 'smmu* at fdt?' line right ? Needs slightly more on top than just 'early 1': diff --git a/sys/arch/arm64/conf/GENERIC b/sys/arch/arm64/conf/GENERIC index 93273767ec7..9998b4ad8c7 100644 --- a/sys/arch/arm64/conf/GENERIC +++ b/sys/arch/arm64/conf/GENERIC @@ -93,7 +93,7 @@ pci* at pciecam? sdhc* at fdt? sdmmc* at sdhc? bwfm* at sdmmc? # Broadcom FullMAC -smmu* at fdt? +smmu* at fdt? early 1 xhci* at fdt? ccp* at fdt? # AMD Cryptographic Co-processor ipmi* at fdt? diff --git a/sys/arch/arm64/dev/smmu.c b/sys/arch/arm64/dev/smmu.c index fafbe3189b5..68deb31f382 100644 --- a/sys/arch/arm64/dev/smmu.c +++ b/sys/arch/arm64/dev/smmu.c @@ -73,6 +73,7 @@ uint64_t smmu_cb_read_8(struct smmu_softc *, int, bus_size_t); void smmu_cb_write_8(struct smmu_softc *, int, bus_size_t, uint64_t); int smmu_v2_domain_create(struct smmu_domain *); +void smmu_v2_domain_enable(struct smmu_domain *); void smmu_v2_tlbi_va(struct smmu_domain *, vaddr_t); void smmu_v2_tlb_sync_global(struct smmu_softc *); void smmu_v2_tlb_sync_context(struct smmu_domain *); @@ -123,6 +124,7 @@ int smmu_v3_write_ack(struct smmu_softc *, bus_size_t, bus_size_t, uint32_t); int smmu_v3_domain_create(struct smmu_domain *); +void smmu_v3_domain_enable(struct smmu_domain *); void smmu_v3_cfgi_all(struct smmu_softc *); void smmu_v3_cfgi_cd(struct smmu_domain *); void smmu_v3_cfgi_ste(struct smmu_domain *); @@ -379,6 +381,7 @@ smmu_v2_attach(struct smmu_softc *sc) smmu_gr0_write_4(sc, SMMU_SCR0, reg); sc->sc_domain_create = smmu_v2_domain_create; + sc->sc_domain_enable = smmu_v2_domain_enable; sc->sc_tlbi_va = smmu_v2_tlbi_va; sc->sc_tlb_sync_context = smmu_v2_tlb_sync_context; return 0; @@ -814,6 +817,24 @@ smmu_v2_domain_create(struct smmu_domain *dom) reg |= SMMU_CB_SCTLR_ASIDPNE; smmu_cb_write_4(sc, dom->sd_cb_idx, SMMU_CB_SCTLR, reg); + snprintf(dom->sd_exname, sizeof(dom->sd_exname), "%s:%x", + sc->sc_dev.dv_xname, dom->sd_sid); + dom->sd_iovamap = extent_create(dom->sd_exname, 0, + (1LL << iovabits) - 1, M_DEVBUF, NULL, 0, EX_WAITOK | + EX_NOCOALESCE); + + return 0; +} + +void +smmu_v2_domain_enable(struct smmu_domain *dom) +{ + struct smmu_softc *sc = dom->sd_sc; + uint32_t reg; + + if (dom->sd_enabled) + return; + /* Point stream to context block */ reg = SMMU_S2CR_TYPE_TRANS | dom->sd_cb_idx; if (sc->sc_has_exids && sc->sc_smr) @@ -829,13 +850,7 @@ smmu_v2_domain_create(struct smmu_domain *dom) smmu_gr0_write_4(sc, SMMU_SMR(dom->sd_smr_idx), reg); } - snprintf(dom->sd_exname, sizeof(dom->sd_exname), "%s:%x", - sc->sc_dev.dv_xname, dom->sd_sid); - dom->sd_iovamap = extent_create(dom->sd_exname, 0, - (1LL << iovabits) - 1, M_DEVBUF, NULL, 0, EX_WAITOK | - EX_NOCOALESCE); - - return 0; + dom->sd_enabled = 1; } void @@ -1348,6 +1363,8 @@ smmu_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, u_long dva, len; int error; + sc->sc_domain_enable(dom); + error = sc->sc_dmat->_dmamap_create(sc->sc_dmat, size, nsegments, maxsegsz, boundary, flags, &map); if (error) @@ -1781,6 +1798,7 @@ smmu_v3_attach(struct smmu_softc *sc) printf("\n"); sc->sc_domain_create = smmu_v3_domain_create; + sc->sc_domain_enable = smmu_v3_domain_enable; sc->sc_tlbi_va = smmu_v3_tlbi_va; sc->sc_tlb_sync_context = smmu_v3_tlb_sync_context; return 0; @@ -2113,6 +2131,15 @@ smmu_v3_domain_create(struct smmu_domain *dom) return 0; } +void +smmu_v3_domain_enable(struct smmu_domain *dom) +{ + if (dom->sd_enabled) + return; + + dom->sd_enabled = 1; +} + int smmu_v3_sync(struct smmu_softc *sc) { diff --git a/sys/arch/arm64/dev/smmu_fdt.c b/sys/arch/arm64/dev/smmu_fdt.c index 77bc7132055..2d55ee3c82a 100644 --- a/sys/arch/arm64/dev/smmu_fdt.c +++ b/sys/arch/arm64/dev/smmu_fdt.c @@ -129,11 +129,6 @@ smmu_v2_fdt_attach(struct smmu_fdt_softc *fsc, int node) if (OF_getproplen(node, "dma-coherent") == 0) sc->sc_coherent = 1; - if (sc->sc_is_qcom) { - printf(": disabled\n"); - return ENXIO; - } - if (smmu_v2_attach(sc) != 0) return ENXIO; diff --git a/sys/arch/arm64/dev/smmuvar.h b/sys/arch/arm64/dev/smmuvar.h index 045dfbd1fd9..b3fb3cfc590 100644 --- a/sys/arch/arm64/dev/smmuvar.h +++ b/sys/arch/arm64/dev/smmuvar.h @@ -25,6 +25,7 @@ struct smmu_dmamem { struct smmu_softc; struct smmu_domain { struct smmu_softc *sd_sc; + uint32_t sd_enabled; uint32_t sd_sid; bus_dma_tag_t sd_dmat; @@ -122,6 +123,7 @@ struct smmu_softc { SIMPLEQ_HEAD(, smmu_domain) sc_domains; int (*sc_domain_create)(struct smmu_domain *); + void (*sc_domain_enable)(struct smmu_domain *); void (*sc_tlbi_va)(struct smmu_domain *, vaddr_t); void (*sc_tlb_sync_context)(struct smmu_domain *); };