Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: mbuf dma 64 bit
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Thu, 12 Feb 2026 00:23:19 +0100

Download raw body.

Thread
> Date: Wed, 11 Feb 2026 16:43:16 -0500
> From: Alexander Bluhm <bluhm@openbsd.org>
> 
> Hi,
> 
> Allow to allocate mbufs with physical address above 4 GB.  If any
> network driver does not support 64 bit dma, the existing restrictions
> are used.
> 
> I have added the IFXF_MBUF_64BIT flag to all drivers I could test.
> 
> There is no solution for hotplug devices yet.  And I have not tested
> USB network.
> 
> Is this diff the right direction?

Maybe, but I think you're running ahead a bit too fast here.  The
solution for hotplug devices is probably bounce buffers.  But those
still need a bit of work.  We have a diff for amd64 (that still needs
some work) but there are other 64-bit architectures with DMA
constraints.  And ideally we'd audit most of our network drivers
before we go this route.  Or at least the subset that is likely to be
involved in hotplug scenarios.

A few comments om (parts of ) the diff down below.

> Index: arch/amd64/amd64/autoconf.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/autoconf.c,v
> diff -u -p -r1.59 autoconf.c
> --- arch/amd64/amd64/autoconf.c	12 Nov 2025 10:00:27 -0000	1.59
> +++ arch/amd64/amd64/autoconf.c	11 Feb 2026 21:19:45 -0000
> @@ -108,6 +108,23 @@ unmap_startup(void)
>  	} while (p < (vaddr_t)endboot);
>  }
>  
> +void
> +mbuf_dma_64bit_enable(void)
> +{
> +	struct ifnet *ifp;
> +
> +	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
> +		if (!ISSET(ifp->if_xflags, IFXF_MBUF_64BIT)) {
> +			printf("%s: restrict all mbufs to low memory\n",
> +			    ifp->if_xname);
> +			return;
> +		}
> +	}
> +
> +	printf("enable mbufs in high memory\n");
> +	m_pool_constraints(0, ULONG_MAX);
> +}
> +

I hope those are just debug printfs.

>  /*
>   * Determine i/o configuration for a machine.
>   */
> @@ -123,6 +140,8 @@ cpu_configure(void)
>  		panic("configure: mainbus not configured");
>  
>  	intr_printconfig();
> +
> +	mbuf_dma_64bit_enable();
>  
>  #if NIOAPIC > 0
>  	lapic_set_lvt();
> Index: arch/amd64/amd64/bus_dma.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/bus_dma.c,v
> diff -u -p -r1.60 bus_dma.c
> --- arch/amd64/amd64/bus_dma.c	13 Mar 2025 13:24:04 -0000	1.60
> +++ arch/amd64/amd64/bus_dma.c	11 Feb 2026 21:19:27 -0000
> @@ -102,7 +102,8 @@
>  #endif
>  
>  int _bus_dmamap_load_buffer(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
> -    struct proc *, int, paddr_t *, int *, int *, int);
> +    struct proc *, int, paddr_t *, int *, int *, int,
> +    struct uvm_constraint_range *);
>  
>  /*
>   * Common function for DMA map creation.  May be called by bus-specific
> @@ -269,7 +270,7 @@ _bus_dmamap_load(bus_dma_tag_t t, bus_dm
>  	seg = 0;
>  	used = 0;
>  	error = _bus_dmamap_load_buffer(t, map, buf, buflen, p, flags,
> -	    &lastaddr, &seg, &used, 1);
> +	    &lastaddr, &seg, &used, 1, &dma_constraint);
>  	if (error == 0) {
>  		map->dm_mapsize = buflen;
>  		map->dm_nsegs = seg + 1;
> @@ -311,7 +312,8 @@ _bus_dmamap_load_mbuf(bus_dma_tag_t t, b
>  		if (m->m_len == 0)
>  			continue;
>  		error = _bus_dmamap_load_buffer(t, map, m->m_data, m->m_len,
> -		    NULL, flags, &lastaddr, &seg, &used, first);
> +		    NULL, flags, &lastaddr, &seg, &used, first,
> +		    &mbuf_constraint);
>  		first = 0;
>  	}
>  	if (error == 0) {
> @@ -366,7 +368,8 @@ _bus_dmamap_load_uio(bus_dma_tag_t t, bu
>  		addr = (caddr_t)iov[i].iov_base;
>  
>  		error = _bus_dmamap_load_buffer(t, map, addr, minlen,
> -		    p, flags, &lastaddr, &seg, &used, first);
> +		    p, flags, &lastaddr, &seg, &used, first,
> +		    &dma_constraint);
>  		first = 0;
>  
>  		resid -= minlen;
> @@ -721,7 +724,7 @@ _bus_dmamem_mmap(bus_dma_tag_t t, bus_dm
>  int
>  _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
>      bus_size_t buflen, struct proc *p, int flags, paddr_t *lastaddrp,
> -    int *segp, int *usedp, int first)
> +    int *segp, int *usedp, int first, struct uvm_constraint_range *constraint)
>  {
>  	bus_size_t sgsize;
>  	bus_addr_t curaddr, lastaddr, baddr, bmask;
> @@ -746,7 +749,7 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t,
>  		 */
>  		pmap_extract(pmap, vaddr, (paddr_t *)&curaddr);
>  
> -		if (curaddr > dma_constraint.ucr_high &&
> +		if (curaddr > constraint->ucr_high &&
>  		    (map->_dm_flags & BUS_DMA_64BIT) == 0)
>  			panic("Non dma-reachable buffer at curaddr %#lx(raw)",
>  			    curaddr);

This is wrong.  If a driver gets an mbuf in high memory, but didn't
set BUS_DMA_64BIT, we should still panic.  I don't think you need this
bit and it conflicts with the bounce buffer stuff.

> Index: arch/amd64/amd64/machdep.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/machdep.c,v
> diff -u -p -r1.306 machdep.c
> --- arch/amd64/amd64/machdep.c	24 Nov 2025 17:20:40 -0000	1.306
> +++ arch/amd64/amd64/machdep.c	11 Feb 2026 21:19:27 -0000
> @@ -218,9 +218,11 @@ struct vm_map *phys_map = NULL;
>  /* UVM constraint ranges. */
>  struct uvm_constraint_range  isa_constraint = { 0x0, 0x00ffffffUL };
>  struct uvm_constraint_range  dma_constraint = { 0x0, 0xffffffffUL };
> +struct uvm_constraint_range  mbuf_constraint = { 0x0, 0xffffffffUL };
>  struct uvm_constraint_range *uvm_md_constraints[] = {
>      &isa_constraint,
>      &dma_constraint,
> +    &mbuf_constraint,
>      NULL,
>  };

The uvm_md_constraints[] array is used in uvm to divide up the memory
in regions.  The mbuf_constraint isn't a new region.  It is either the
same as dma_constraint or equivalent to no_constraint.

One might argue that mbuf_constraint should actually just be a pointer
to one those.

> Index: dev/pci/if_em.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_em.c,v
> diff -u -p -r1.379 if_em.c
> --- dev/pci/if_em.c	14 Jul 2025 11:52:43 -0000	1.379
> +++ dev/pci/if_em.c	11 Feb 2026 21:19:27 -0000
> @@ -1990,7 +1990,7 @@ em_setup_interface(struct em_softc *sc)
>  	strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
>  	ifp->if_softc = sc;
>  	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> -	ifp->if_xflags = IFXF_MPSAFE;
> +	ifp->if_xflags = IFXF_MPSAFE | IFXF_MBUF_64BIT;
>  	ifp->if_ioctl = em_ioctl;
>  	ifp->if_qstart = em_start;
>  	ifp->if_watchdog = em_watchdog;
> @@ -2158,7 +2158,8 @@ em_dma_malloc(struct em_softc *sc, bus_s
>  	int r;
>  
>  	r = bus_dmamap_create(sc->sc_dmat, size, 1,
> -	    size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &dma->dma_map);
> +	    size, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
> +	    &dma->dma_map);
>  	if (r != 0)
>  		return (r);
>  
> @@ -2250,10 +2251,11 @@ em_setup_transmit_structures(struct em_s
>  			pkt = &que->tx.sc_tx_pkts_ring[i];
>  			error = bus_dmamap_create(sc->sc_dmat, EM_TSO_SIZE,
>  			    EM_MAX_SCATTER / (sc->pcix_82544 ? 2 : 1),
> -			    EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
> +			    EM_TSO_SEG_SIZE, 0, BUS_DMA_NOWAIT | BUS_DMA_64BIT,
> +			    &pkt->pkt_map);
>  			if (error != 0) {
> -				printf("%s: Unable to create TX DMA map\n",
> -				    DEVNAME(sc));
> +				printf("%s: Unable to create TX DMA map, "
> +				    "error %d\n", DEVNAME(sc), error);
>  				goto fail;
>  			}
>  		}
> @@ -2772,11 +2774,11 @@ em_allocate_receive_structures(struct em
>  			pkt = &que->rx.sc_rx_pkts_ring[i];
>  
>  			error = bus_dmamap_create(sc->sc_dmat, EM_MCLBYTES, 1,
> -			    EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &pkt->pkt_map);
> +			    EM_MCLBYTES, 0, BUS_DMA_NOWAIT | BUS_DMA_64BIT,
> +			    &pkt->pkt_map);
>  			if (error != 0) {
> -				printf("%s: em_allocate_receive_structures: "
> -				    "bus_dmamap_create failed; error %u\n",
> -				    DEVNAME(sc), error);
> +				printf("%s: Unable to create RX DMA map, "
> +				    "error %d\n", DEVNAME(sc), error);
>  				goto fail;
>  			}

Are we sure older em(4) variants do actually support 64-bit DMA?

> Index: dev/pci/if_igc.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_igc.c,v
> diff -u -p -r1.30 if_igc.c
> --- dev/pci/if_igc.c	17 Dec 2025 01:14:42 -0000	1.30
> +++ dev/pci/if_igc.c	11 Feb 2026 21:19:27 -0000
> @@ -737,8 +737,8 @@ igc_dma_malloc(struct igc_softc *sc, bus
>  
>  	dma->dma_tag = os->os_pa.pa_dmat;
>  
> -	if (bus_dmamap_create(dma->dma_tag, size, 1, size, 0, BUS_DMA_NOWAIT,
> -	    &dma->dma_map))
> +	if (bus_dmamap_create(dma->dma_tag, size, 1, size, 0,
> +	    BUS_DMA_NOWAIT | BUS_DMA_64BIT, &dma->dma_map))
>  		return 1;
>  	if (bus_dmamem_alloc(dma->dma_tag, size, PAGE_SIZE, 0, &dma->dma_seg,
>  	    1, &dma->dma_nseg, BUS_DMA_NOWAIT))
> @@ -796,7 +796,7 @@ igc_setup_interface(struct igc_softc *sc
>  	ifp->if_softc = sc;
>  	strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
>  	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> -	ifp->if_xflags = IFXF_MPSAFE;
> +	ifp->if_xflags = IFXF_MPSAFE | IFXF_MBUF_64BIT;
>  	ifp->if_ioctl = igc_ioctl;
>  	ifp->if_qstart = igc_start;
>  	ifp->if_watchdog = igc_watchdog;
> @@ -1855,10 +1855,11 @@ igc_allocate_transmit_buffers(struct igc
>  	for (i = 0; i < sc->num_tx_desc; i++) {
>  		txbuf = &txr->tx_buffers[i];
>  		error = bus_dmamap_create(txr->txdma.dma_tag, IGC_TSO_SIZE,
> -		    IGC_MAX_SCATTER, PAGE_SIZE, 0, BUS_DMA_NOWAIT, &txbuf->map);
> +		    IGC_MAX_SCATTER, PAGE_SIZE, 0,
> +		    BUS_DMA_NOWAIT | BUS_DMA_64BIT, &txbuf->map);
>  		if (error != 0) {
> -			printf("%s: Unable to create TX DMA map\n",
> -			    DEVNAME(sc));
> +			printf("%s: Unable to create TX DMA map, error %d\n",
> +			    DEVNAME(sc), error);
>  			goto fail;
>  		}
>  	}
> @@ -2161,10 +2162,10 @@ igc_allocate_receive_buffers(struct igc_
>  	for (i = 0; i < sc->num_rx_desc; i++, rxbuf++) {
>  		error = bus_dmamap_create(rxr->rxdma.dma_tag,
>  		    sc->rx_mbuf_sz, 1, sc->rx_mbuf_sz, 0,
> -		    BUS_DMA_NOWAIT, &rxbuf->map);
> +		    BUS_DMA_NOWAIT | BUS_DMA_64BIT, &rxbuf->map);
>  		if (error) {
> -			printf("%s: Unable to create RX DMA map\n",
> -			    DEVNAME(sc));
> +			printf("%s: Unable to create RX DMA map, error %d\n",
> +			    DEVNAME(sc), error);
>  			goto fail;
>  		}
>  	}

I don't expect any issue with adding those BUS_DMA_64BIT to this
driver.  You should also add those to the bus_dmamem_alloc(9) calls
though.  Can you send this out as a separate diff?

> Index: dev/pci/if_ix.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/dev/pci/if_ix.c,v
> diff -u -p -r1.222 if_ix.c
> --- dev/pci/if_ix.c	11 Nov 2025 17:43:18 -0000	1.222
> +++ dev/pci/if_ix.c	11 Feb 2026 21:19:27 -0000
> @@ -1929,7 +1929,7 @@ ixgbe_setup_interface(struct ix_softc *s
>  	strlcpy(ifp->if_xname, sc->dev.dv_xname, IFNAMSIZ);
>  	ifp->if_softc = sc;
>  	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
> -	ifp->if_xflags = IFXF_MPSAFE;
> +	ifp->if_xflags = IFXF_MPSAFE | IFXF_MBUF_64BIT;
>  	ifp->if_ioctl = ixgbe_ioctl;
>  	ifp->if_qstart = ixgbe_start;
>  	ifp->if_timer = 0;
> @@ -2087,7 +2087,7 @@ ixgbe_dma_malloc(struct ix_softc *sc, bu
>  
>  	dma->dma_tag = os->os_pa.pa_dmat;
>  	r = bus_dmamap_create(dma->dma_tag, size, 1,
> -	    size, 0, BUS_DMA_NOWAIT, &dma->dma_map);
> +	    size, 0, BUS_DMA_NOWAIT | BUS_DMA_64BIT, &dma->dma_map);
>  	if (r != 0) {
>  		printf("%s: ixgbe_dma_malloc: bus_dmamap_create failed; "
>  		       "error %u\n", ifp->if_xname, r);
> @@ -2293,11 +2293,11 @@ ixgbe_allocate_transmit_buffers(struct i
>  		txbuf = &txr->tx_buffers[i];
>  		error = bus_dmamap_create(txr->txdma.dma_tag, MAXMCLBYTES,
>  			    sc->num_segs, PAGE_SIZE, 0,
> -			    BUS_DMA_NOWAIT, &txbuf->map);
> +			    BUS_DMA_NOWAIT | BUS_DMA_64BIT, &txbuf->map);
>  
>  		if (error != 0) {
> -			printf("%s: Unable to create TX DMA map\n",
> -			    ifp->if_xname);
> +			printf("%s: Unable to create TX DMA map, error %d\n",
> +			    ifp->if_xname, error);
>  			goto fail;
>  		}
>  	}
> @@ -2776,10 +2776,10 @@ ixgbe_allocate_receive_buffers(struct ix
>  	rxbuf = rxr->rx_buffers;
>  	for (i = 0; i < sc->num_rx_desc; i++, rxbuf++) {
>  		error = bus_dmamap_create(rxr->rxdma.dma_tag, 16 * 1024, 1,
> -		    16 * 1024, 0, BUS_DMA_NOWAIT, &rxbuf->map);
> +		    16 * 1024, 0, BUS_DMA_NOWAIT | BUS_DMA_64BIT, &rxbuf->map);
>  		if (error) {
> -			printf("%s: Unable to create Pack DMA map\n",
> -			    ifp->if_xname);
> +			printf("%s: Unable to create RX DMA map, error %d\n",
> +			    ifp->if_xname, error);
>  			goto fail;
>  		}
>  	}

Same story as for icg(4).