Index | Thread | Search

From:
Mike Larkin <mlarkin@nested.page>
Subject:
Re: virtio: Allow DMA mem > 4GB on amd64
To:
Mark Kettenis <mark.kettenis@xs4all.nl>
Cc:
Stefan Fritsch <sf@sfritsch.de>, tech@openbsd.org
Date:
Thu, 31 Jul 2025 11:50:30 -0700

Download raw body.

Thread
On Thu, Jul 31, 2025 at 01:08:21PM +0200, Mark Kettenis wrote:
> > Date: Thu, 31 Jul 2025 12:38:08 +0200 (CEST)
> > From: Stefan Fritsch <sf@sfritsch.de>
> >
> > Hi,
> >
> > the diff below allows to use mem above 4GB on amd64 for virtio rings and
> > descriptors. This may reduce pressure on dmaable memory a bit.
> >
> > For the virtio ring address, virtio 0.9 has a 2^44 limit, so I use
> > bus_dmamem_alloc_range there.
> >
> > ok?
>
> In principle yes, I think doing this makes sense.  However...
>
> > diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c
> > index f9c5d1ed0b4..1f46332d452 100644
> > --- a/sys/dev/pv/if_vio.c
> > +++ b/sys/dev/pv/if_vio.c
> > @@ -392,11 +392,13 @@ vio_alloc_dmamem(struct vio_softc *sc)
> >  	int nsegs;
> >
> >  	if (bus_dmamap_create(vsc->sc_dmat, sc->sc_dma_size, 1,
> > -	    sc->sc_dma_size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW,
> > +	    sc->sc_dma_size, 0,
> > +	    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
> >  	    &sc->sc_dma_map) != 0)
> >  		goto err;
> >  	if (bus_dmamem_alloc(vsc->sc_dmat, sc->sc_dma_size, 16, 0,
> > -	    &sc->sc_dma_seg, 1, &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO) != 0)
> > +	    &sc->sc_dma_seg, 1, &nsegs,
> > +	    BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_64BIT) != 0)
> >  		goto destroy;
> >  	if (bus_dmamem_map(vsc->sc_dmat, &sc->sc_dma_seg, nsegs,
> >  	    sc->sc_dma_size, &sc->sc_dma_kva, BUS_DMA_NOWAIT) != 0)
> > @@ -542,7 +544,7 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
> >  			r = bus_dmamap_create(vsc->sc_dmat,
> >  			    sc->sc_rx_mbuf_size + sc->sc_hdr_size, 2,
> >  			    sc->sc_rx_mbuf_size, 0,
> > -			    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
> > +			    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
> >  			    &vioq->viq_rxdmamaps[i]);
> >  			if (r != 0)
> >  				goto destroy;
> > @@ -551,7 +553,7 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments)
> >  		for (i = 0; i < txqsize; i++) {
> >  			r = bus_dmamap_create(vsc->sc_dmat, txsize,
> >  			    tx_max_segments, txsize, 0,
> > -			    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
> > +			    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
> >  			    &vioq->viq_txdmamaps[i]);
> >  			if (r != 0)
> >  				goto destroy;
> > diff --git a/sys/dev/pv/vioblk.c b/sys/dev/pv/vioblk.c
> > index c38efbac356..5d5c9f706d5 100644
> > --- a/sys/dev/pv/vioblk.c
> > +++ b/sys/dev/pv/vioblk.c
> > @@ -646,7 +646,7 @@ vioblk_alloc_reqs(struct vioblk_softc *sc, int qsize)
> >
> >  	allocsize = sizeof(struct virtio_blk_req) * nreqs;
> >  	r = bus_dmamem_alloc(sc->sc_virtio->sc_dmat, allocsize, 0, 0,
> > -	    &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
> > +	    &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT | BUS_DMA_64BIT);
> >  	if (r != 0) {
> >  		printf("DMA memory allocation failed, size %d, error %d\n",
> >  		    allocsize, r);
> > @@ -697,7 +697,8 @@ vioblk_alloc_reqs(struct vioblk_softc *sc, int qsize)
> >
> >  		r = bus_dmamap_create(sc->sc_virtio->sc_dmat,
> >  		    VR_DMA_END, 1, VR_DMA_END, 0,
> > -		    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_cmdsts);
> > +		    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
> > +		    &vr->vr_cmdsts);
> >  		if (r != 0) {
> >  			printf("cmd dmamap creation failed, err %d\n", r);
> >  			nreqs = i;
> > @@ -711,7 +712,8 @@ vioblk_alloc_reqs(struct vioblk_softc *sc, int qsize)
> >  			goto err_reqs;
> >  		}
> >  		r = bus_dmamap_create(sc->sc_virtio->sc_dmat, MAXPHYS,
> > -		    SEG_MAX, MAXPHYS, 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW,
> > +		    SEG_MAX, MAXPHYS, 0,
> > +		    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
> >  		    &vr->vr_payload);
> >  		if (r != 0) {
> >  			printf("payload dmamap creation failed, err %d\n", r);
> > diff --git a/sys/dev/pv/vioscsi.c b/sys/dev/pv/vioscsi.c
> > index 3ebac61abee..9375a33eef0 100644
> > --- a/sys/dev/pv/vioscsi.c
> > +++ b/sys/dev/pv/vioscsi.c
> > @@ -433,7 +433,7 @@ vioscsi_alloc_reqs(struct vioscsi_softc *sc, struct virtio_softc *vsc,
> >
> >  	allocsize = nreqs * sizeof(struct vioscsi_req);
> >  	r = bus_dmamem_alloc(vsc->sc_dmat, allocsize, 0, 0,
> > -	    &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
> > +	    &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT | BUS_DMA_64BIT);
> >  	if (r != 0) {
> >  		printf("bus_dmamem_alloc, size %zd, error %d\n",
> >  		    allocsize, r);
> > @@ -486,13 +486,16 @@ vioscsi_alloc_reqs(struct vioscsi_softc *sc, struct virtio_softc *vsc,
> >  		r = bus_dmamap_create(vsc->sc_dmat,
> >  		    offsetof(struct vioscsi_req, vr_xs), 1,
> >  		    offsetof(struct vioscsi_req, vr_xs), 0,
> > -		    BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_control);
> > +		    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
> > +		    &vr->vr_control);
> >  		if (r != 0) {
> >  			printf("bus_dmamap_create vr_control failed, error  %d\n", r);
> >  			return i;
> >  		}
> >  		r = bus_dmamap_create(vsc->sc_dmat, MAXPHYS, SEG_MAX,
> > -		    MAXPHYS, 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_data);
> > +		    MAXPHYS, 0,
> > +		    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
> > +		    &vr->vr_data);
>
> nit: I think you can avoid an extra line by putting MAXPHYS on the
> first line?
>
> >  		if (r != 0) {
> >  			printf("bus_dmamap_create vr_data failed, error %d\n", r );
> >  			return i;
> > diff --git a/sys/dev/pv/virtio.c b/sys/dev/pv/virtio.c
> > index aa63fd5e9ba..759bf05ccb8 100644
> > --- a/sys/dev/pv/virtio.c
> > +++ b/sys/dev/pv/virtio.c
> > @@ -387,9 +387,15 @@ virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
> >  		allocsize3 = 0;
> >  	allocsize = allocsize1 + allocsize2 + allocsize3;
> >
> > -	/* alloc and map the memory */
> > -	r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
> > -	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT);
> > +	/*
> > +	 * alloc and map the memory
> > +	 *
> > +	 * With virtio 0.9, the ring memory must be in the lowest 2^32
> > +	 * pages. For simplicity, we use this limit even for virtio 1.0.
> > +	 */
> > +	r = bus_dmamem_alloc_range(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0,
> > +	    &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT, 0,
> > +	    0xfffffffffff);
>
> So in principe the upper limit would depend on VIRTIO_PAGE_SIZE.  So
> should this be something like
>
>        ((uint64_t)VIRTIO_PAGE_SIZE << 32) - 1)
>
> instead.  And does this work on 32-bit architectures?
>

Discussed with sf@. I had the same question about the addr and that long
constant. ok mlarkin either way once i386 is tested.

> >  	if (r != 0) {
> >  		printf("virtqueue %d for %s allocation failed, error %d\n",
> >  		       index, name, r);
> > @@ -403,7 +409,7 @@ virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index,
> >  		goto err;
> >  	}
> >  	r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0,
> > -	    BUS_DMA_NOWAIT, &vq->vq_dmamap);
> > +	    BUS_DMA_NOWAIT | BUS_DMA_64BIT, &vq->vq_dmamap);
> >  	if (r != 0) {
> >  		printf("virtqueue %d for %s dmamap creation failed, "
> >  		    "error %d\n", index, name, r);
> >
> >
>