From: Mark Kettenis Subject: Re: mbuf dma 64 bit To: Alexander Bluhm Cc: tech@openbsd.org Date: Thu, 12 Feb 2026 00:23:19 +0100 > Date: Wed, 11 Feb 2026 16:43:16 -0500 > From: Alexander Bluhm > > Hi, > > Allow to allocate mbufs with physical address above 4 GB. If any > network driver does not support 64 bit dma, the existing restrictions > are used. > > I have added the IFXF_MBUF_64BIT flag to all drivers I could test. > > There is no solution for hotplug devices yet. And I have not tested > USB network. > > Is this diff the right direction? Maybe, but I think you're running ahead a bit too fast here. The solution for hotplug devices is probably bounce buffers. But those still need a bit of work. We have a diff for amd64 (that still needs some work) but there are other 64-bit architectures with DMA constraints. And ideally we'd audit most of our network drivers before we go this route. Or at least the subset that is likely to be involved in hotplug scenarios. A few comments om (parts of ) the diff down below. > Index: arch/amd64/amd64/autoconf.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/autoconf.c,v > diff -u -p -r1.59 autoconf.c > --- arch/amd64/amd64/autoconf.c 12 Nov 2025 10:00:27 -0000 1.59 > +++ arch/amd64/amd64/autoconf.c 11 Feb 2026 21:19:45 -0000 > @@ -108,6 +108,23 @@ unmap_startup(void) > } while (p < (vaddr_t)endboot); > } > > +void > +mbuf_dma_64bit_enable(void) > +{ > + struct ifnet *ifp; > + > + TAILQ_FOREACH(ifp, &ifnetlist, if_list) { > + if (!ISSET(ifp->if_xflags, IFXF_MBUF_64BIT)) { > + printf("%s: restrict all mbufs to low memory\n", > + ifp->if_xname); > + return; > + } > + } > + > + printf("enable mbufs in high memory\n"); > + m_pool_constraints(0, ULONG_MAX); > +} > + I hope those are just debug printfs. > /* > * Determine i/o configuration for a machine. > */ > @@ -123,6 +140,8 @@ cpu_configure(void) > panic("configure: mainbus not configured"); > > intr_printconfig(); > + > + mbuf_dma_64bit_enable(); > > #if NIOAPIC > 0 > lapic_set_lvt(); > Index: arch/amd64/amd64/bus_dma.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/bus_dma.c,v > diff -u -p -r1.60 bus_dma.c > --- arch/amd64/amd64/bus_dma.c 13 Mar 2025 13:24:04 -0000 1.60 > +++ arch/amd64/amd64/bus_dma.c 11 Feb 2026 21:19:27 -0000 > @@ -102,7 +102,8 @@ > #endif > > int _bus_dmamap_load_buffer(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t, > - struct proc *, int, paddr_t *, int *, int *, int); > + struct proc *, int, paddr_t *, int *, int *, int, > + struct uvm_constraint_range *); > > /* > * Common function for DMA map creation. May be called by bus-specific > @@ -269,7 +270,7 @@ _bus_dmamap_load(bus_dma_tag_t t, bus_dm > seg = 0; > used = 0; > error = _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags, > - &lastaddr, &seg, &used, 1); > + &lastaddr, &seg, &used, 1, &dma_constraint); > if (error == 0) { > map->dm_mapsize = buflen; > map->dm_nsegs = seg + 1; > @@ -311,7 +312,8 @@ _bus_dmamap_load_mbuf(bus_dma_tag_t t, b > if (m->m_len == 0) > continue; > error = _bus_dmamap_load_buffer(t, map, m->m_data, m->m_len, > - NULL, flags, &lastaddr, &seg, &used, first); > + NULL, flags, &lastaddr, &seg, &used, first, > + &mbuf_constraint); > first = 0; > } > if (error == 0) { > @@ -366,7 +368,8 @@ _bus_dmamap_load_uio(bus_dma_tag_t t, bu > addr = (caddr_t)iov[i].iov_base; > > error = _bus_dmamap_load_buffer(t, map, addr, minlen, > - p, flags, &lastaddr, &seg, &used, first); > + p, flags, &lastaddr, &seg, &used, first, > + &dma_constraint); > first = 0; > > resid -= minlen; > @@ -721,7 +724,7 @@ _bus_dmamem_mmap(bus_dma_tag_t t, bus_dm > int > _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, > bus_size_t buflen, struct proc *p, int flags, paddr_t *lastaddrp, > - int *segp, int *usedp, int first) > + int *segp, int *usedp, int first, struct uvm_constraint_range *constraint) > { > bus_size_t sgsize; > bus_addr_t curaddr, lastaddr, baddr, bmask; > @@ -746,7 +749,7 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, > */ > pmap_extract(pmap, vaddr, (paddr_t *)&curaddr); > > - if (curaddr > dma_constraint.ucr_high && > + if (curaddr > constraint->ucr_high && > (map->_dm_flags & BUS_DMA_64BIT) == 0) > panic("Non dma-reachable buffer at curaddr %#lx(raw)", > curaddr); This is wrong. If a driver gets an mbuf in high memory, but didn't set BUS_DMA_64BIT, we should still panic. I don't think you need this bit and it conflicts with the bounce buffer stuff. > Index: arch/amd64/amd64/machdep.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/machdep.c,v > diff -u -p -r1.306 machdep.c > --- arch/amd64/amd64/machdep.c 24 Nov 2025 17:20:40 -0000 1.306 > +++ arch/amd64/amd64/machdep.c 11 Feb 2026 21:19:27 -0000 > @@ -218,9 +218,11 @@ struct vm_map *phys_map = NULL; > /* UVM constraint ranges. */ > struct uvm_constraint_range isa_constraint = { 0x0, 0x00ffffffUL }; > struct uvm_constraint_range dma_constraint = { 0x0, 0xffffffffUL }; > +struct uvm_constraint_range mbuf_constraint = { 0x0, 0xffffffffUL }; > struct uvm_constraint_range *uvm_md_constraints[] = { > &isa_constraint, > &dma_constraint, > + &mbuf_constraint, > NULL, > }; The uvm_md_constraints[] array is used in uvm to divide up the memory in regions. The mbuf_constraint isn't a new region. It is either the same as dma_constraint or equivalent to no_constraint. One might argue that mbuf_constraint should actually just be a pointer to one those. > Index: dev/pci/if_em.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_em.c,v > diff -u -p -r1.379 if_em.c > --- dev/pci/if_em.c 14 Jul 2025 11:52:43 -0000 1.379 > +++ dev/pci/if_em.c 11 Feb 2026 21:19:27 -0000 > @@ -1990,7 +1990,7 @@ em_setup_interface(struct em_softc *sc) > strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ); > ifp->if_softc = sc; > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; > - ifp->if_xflags = IFXF_MPSAFE; > + ifp->if_xflags = IFXF_MPSAFE | IFXF_MBUF_64BIT; > ifp->if_ioctl = em_ioctl; > ifp->if_qstart = em_start; > ifp->if_watchdog = em_watchdog; > @@ -2158,7 +2158,8 @@ em_dma_malloc(struct em_softc *sc, bus_s > int r; > > r = bus_dmamap_create(sc->sc_dmat, size, 1, > - size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map); > + size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, > + &dma->dma_map); > if (r != 0) > return (r); > > @@ -2250,10 +2251,11 @@ em_setup_transmit_structures(struct em_s > pkt = &que->tx.sc_tx_pkts_ring[i]; > error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE, > EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1), > - EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map); > + EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT | BUS_DMA_64BIT, > + &pkt->pkt_map); > if (error != 0) { > - printf("%s: Unable to create TX DMA map\n", > - DEVNAME(sc)); > + printf("%s: Unable to create TX DMA map, " > + "error %d\n", DEVNAME(sc), error); > goto fail; > } > } > @@ -2772,11 +2774,11 @@ em_allocate_receive_structures(struct em > pkt = &que->rx.sc_rx_pkts_ring[i]; > > error = bus_dmamap_create(sc->sc_dmat, EM_MCLBYTES, 1, > - EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &pkt->pkt_map); > + EM_MCLBYTES, 0, BUS_DMA_NOWAIT | BUS_DMA_64BIT, > + &pkt->pkt_map); > if (error != 0) { > - printf("%s: em_allocate_receive_structures: " > - "bus_dmamap_create failed; error %u\n", > - DEVNAME(sc), error); > + printf("%s: Unable to create RX DMA map, " > + "error %d\n", DEVNAME(sc), error); > goto fail; > } Are we sure older em(4) variants do actually support 64-bit DMA? > Index: dev/pci/if_igc.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_igc.c,v > diff -u -p -r1.30 if_igc.c > --- dev/pci/if_igc.c 17 Dec 2025 01:14:42 -0000 1.30 > +++ dev/pci/if_igc.c 11 Feb 2026 21:19:27 -0000 > @@ -737,8 +737,8 @@ igc_dma_malloc(struct igc_softc *sc, bus > > dma->dma_tag = os->os_pa.pa_dmat; > > - if (bus_dmamap_create(dma->dma_tag, size, 1, size, 0, BUS_DMA_NOWAIT, > - &dma->dma_map)) > + if (bus_dmamap_create(dma->dma_tag, size, 1, size, 0, > + BUS_DMA_NOWAIT | BUS_DMA_64BIT, &dma->dma_map)) > return 1; > if (bus_dmamem_alloc(dma->dma_tag, size, PAGE_SIZE, 0, &dma->dma_seg, > 1, &dma->dma_nseg, BUS_DMA_NOWAIT)) > @@ -796,7 +796,7 @@ igc_setup_interface(struct igc_softc *sc > ifp->if_softc = sc; > strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ); > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; > - ifp->if_xflags = IFXF_MPSAFE; > + ifp->if_xflags = IFXF_MPSAFE | IFXF_MBUF_64BIT; > ifp->if_ioctl = igc_ioctl; > ifp->if_qstart = igc_start; > ifp->if_watchdog = igc_watchdog; > @@ -1855,10 +1855,11 @@ igc_allocate_transmit_buffers(struct igc > for (i = 0; i < sc->num_tx_desc; i++) { > txbuf = &txr->tx_buffers[i]; > error = bus_dmamap_create(txr->txdma.dma_tag, IGC_TSO_SIZE, > - IGC_MAX_SCATTER, PAGE_SIZE, 0, BUS_DMA_NOWAIT, &txbuf->map); > + IGC_MAX_SCATTER, PAGE_SIZE, 0, > + BUS_DMA_NOWAIT | BUS_DMA_64BIT, &txbuf->map); > if (error != 0) { > - printf("%s: Unable to create TX DMA map\n", > - DEVNAME(sc)); > + printf("%s: Unable to create TX DMA map, error %d\n", > + DEVNAME(sc), error); > goto fail; > } > } > @@ -2161,10 +2162,10 @@ igc_allocate_receive_buffers(struct igc_ > for (i = 0; i < sc->num_rx_desc; i++, rxbuf++) { > error = bus_dmamap_create(rxr->rxdma.dma_tag, > sc->rx_mbuf_sz, 1, sc->rx_mbuf_sz, 0, > - BUS_DMA_NOWAIT, &rxbuf->map); > + BUS_DMA_NOWAIT | BUS_DMA_64BIT, &rxbuf->map); > if (error) { > - printf("%s: Unable to create RX DMA map\n", > - DEVNAME(sc)); > + printf("%s: Unable to create RX DMA map, error %d\n", > + DEVNAME(sc), error); > goto fail; > } > } I don't expect any issue with adding those BUS_DMA_64BIT to this driver. You should also add those to the bus_dmamem_alloc(9) calls though. Can you send this out as a separate diff? > Index: dev/pci/if_ix.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v > diff -u -p -r1.222 if_ix.c > --- dev/pci/if_ix.c 11 Nov 2025 17:43:18 -0000 1.222 > +++ dev/pci/if_ix.c 11 Feb 2026 21:19:27 -0000 > @@ -1929,7 +1929,7 @@ ixgbe_setup_interface(struct ix_softc *s > strlcpy(ifp->if_xname, sc->dev.dv_xname, IFNAMSIZ); > ifp->if_softc = sc; > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; > - ifp->if_xflags = IFXF_MPSAFE; > + ifp->if_xflags = IFXF_MPSAFE | IFXF_MBUF_64BIT; > ifp->if_ioctl = ixgbe_ioctl; > ifp->if_qstart = ixgbe_start; > ifp->if_timer = 0; > @@ -2087,7 +2087,7 @@ ixgbe_dma_malloc(struct ix_softc *sc, bu > > dma->dma_tag = os->os_pa.pa_dmat; > r = bus_dmamap_create(dma->dma_tag, size, 1, > - size, 0, BUS_DMA_NOWAIT, &dma->dma_map); > + size, 0, BUS_DMA_NOWAIT | BUS_DMA_64BIT, &dma->dma_map); > if (r != 0) { > printf("%s: ixgbe_dma_malloc: bus_dmamap_create failed; " > "error %u\n", ifp->if_xname, r); > @@ -2293,11 +2293,11 @@ ixgbe_allocate_transmit_buffers(struct i > txbuf = &txr->tx_buffers[i]; > error = bus_dmamap_create(txr->txdma.dma_tag, MAXMCLBYTES, > sc->num_segs, PAGE_SIZE, 0, > - BUS_DMA_NOWAIT, &txbuf->map); > + BUS_DMA_NOWAIT | BUS_DMA_64BIT, &txbuf->map); > > if (error != 0) { > - printf("%s: Unable to create TX DMA map\n", > - ifp->if_xname); > + printf("%s: Unable to create TX DMA map, error %d\n", > + ifp->if_xname, error); > goto fail; > } > } > @@ -2776,10 +2776,10 @@ ixgbe_allocate_receive_buffers(struct ix > rxbuf = rxr->rx_buffers; > for (i = 0; i < sc->num_rx_desc; i++, rxbuf++) { > error = bus_dmamap_create(rxr->rxdma.dma_tag, 16 * 1024, 1, > - 16 * 1024, 0, BUS_DMA_NOWAIT, &rxbuf->map); > + 16 * 1024, 0, BUS_DMA_NOWAIT | BUS_DMA_64BIT, &rxbuf->map); > if (error) { > - printf("%s: Unable to create Pack DMA map\n", > - ifp->if_xname); > + printf("%s: Unable to create RX DMA map, error %d\n", > + ifp->if_xname, error); > goto fail; > } > } Same story as for icg(4).