From: Mike Larkin Subject: Re: virtio: Allow DMA mem > 4GB on amd64 To: Mark Kettenis Cc: Stefan Fritsch , tech@openbsd.org Date: Thu, 31 Jul 2025 11:50:30 -0700 On Thu, Jul 31, 2025 at 01:08:21PM +0200, Mark Kettenis wrote: > > Date: Thu, 31 Jul 2025 12:38:08 +0200 (CEST) > > From: Stefan Fritsch > > > > Hi, > > > > the diff below allows to use mem above 4GB on amd64 for virtio rings and > > descriptors. This may reduce pressure on dmaable memory a bit. > > > > For the virtio ring address, virtio 0.9 has a 2^44 limit, so I use > > bus_dmamem_alloc_range there. > > > > ok? > > In principle yes, I think doing this makes sense. However... > > > diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c > > index f9c5d1ed0b4..1f46332d452 100644 > > --- a/sys/dev/pv/if_vio.c > > +++ b/sys/dev/pv/if_vio.c > > @@ -392,11 +392,13 @@ vio_alloc_dmamem(struct vio_softc *sc) > > int nsegs; > > > > if (bus_dmamap_create(vsc->sc_dmat, sc->sc_dma_size, 1, > > - sc->sc_dma_size, 0, BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, > > + sc->sc_dma_size, 0, > > + BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, > > &sc->sc_dma_map) != 0) > > goto err; > > if (bus_dmamem_alloc(vsc->sc_dmat, sc->sc_dma_size, 16, 0, > > - &sc->sc_dma_seg, 1, &nsegs, BUS_DMA_NOWAIT | BUS_DMA_ZERO) != 0) > > + &sc->sc_dma_seg, 1, &nsegs, > > + BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_64BIT) != 0) > > goto destroy; > > if (bus_dmamem_map(vsc->sc_dmat, &sc->sc_dma_seg, nsegs, > > sc->sc_dma_size, &sc->sc_dma_kva, BUS_DMA_NOWAIT) != 0) > > @@ -542,7 +544,7 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments) > > r = bus_dmamap_create(vsc->sc_dmat, > > sc->sc_rx_mbuf_size + sc->sc_hdr_size, 2, > > sc->sc_rx_mbuf_size, 0, > > - BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, > > + BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, > > &vioq->viq_rxdmamaps[i]); > > if (r != 0) > > goto destroy; > > @@ -551,7 +553,7 @@ vio_alloc_mem(struct vio_softc *sc, int tx_max_segments) > > for (i = 0; i < txqsize; i++) { > > r = bus_dmamap_create(vsc->sc_dmat, txsize, > > tx_max_segments, txsize, 0, > > - BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, > > + BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, > > &vioq->viq_txdmamaps[i]); > > if (r != 0) > > goto destroy; > > diff --git a/sys/dev/pv/vioblk.c b/sys/dev/pv/vioblk.c > > index c38efbac356..5d5c9f706d5 100644 > > --- a/sys/dev/pv/vioblk.c > > +++ b/sys/dev/pv/vioblk.c > > @@ -646,7 +646,7 @@ vioblk_alloc_reqs(struct vioblk_softc *sc, int qsize) > > > > allocsize = sizeof(struct virtio_blk_req) * nreqs; > > r = bus_dmamem_alloc(sc->sc_virtio->sc_dmat, allocsize, 0, 0, > > - &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT); > > + &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT | BUS_DMA_64BIT); > > if (r != 0) { > > printf("DMA memory allocation failed, size %d, error %d\n", > > allocsize, r); > > @@ -697,7 +697,8 @@ vioblk_alloc_reqs(struct vioblk_softc *sc, int qsize) > > > > r = bus_dmamap_create(sc->sc_virtio->sc_dmat, > > VR_DMA_END, 1, VR_DMA_END, 0, > > - BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_cmdsts); > > + BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, > > + &vr->vr_cmdsts); > > if (r != 0) { > > printf("cmd dmamap creation failed, err %d\n", r); > > nreqs = i; > > @@ -711,7 +712,8 @@ vioblk_alloc_reqs(struct vioblk_softc *sc, int qsize) > > goto err_reqs; > > } > > r = bus_dmamap_create(sc->sc_virtio->sc_dmat, MAXPHYS, > > - SEG_MAX, MAXPHYS, 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, > > + SEG_MAX, MAXPHYS, 0, > > + BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, > > &vr->vr_payload); > > if (r != 0) { > > printf("payload dmamap creation failed, err %d\n", r); > > diff --git a/sys/dev/pv/vioscsi.c b/sys/dev/pv/vioscsi.c > > index 3ebac61abee..9375a33eef0 100644 > > --- a/sys/dev/pv/vioscsi.c > > +++ b/sys/dev/pv/vioscsi.c > > @@ -433,7 +433,7 @@ vioscsi_alloc_reqs(struct vioscsi_softc *sc, struct virtio_softc *vsc, > > > > allocsize = nreqs * sizeof(struct vioscsi_req); > > r = bus_dmamem_alloc(vsc->sc_dmat, allocsize, 0, 0, > > - &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT); > > + &sc->sc_reqs_segs[0], 1, &rsegs, BUS_DMA_NOWAIT | BUS_DMA_64BIT); > > if (r != 0) { > > printf("bus_dmamem_alloc, size %zd, error %d\n", > > allocsize, r); > > @@ -486,13 +486,16 @@ vioscsi_alloc_reqs(struct vioscsi_softc *sc, struct virtio_softc *vsc, > > r = bus_dmamap_create(vsc->sc_dmat, > > offsetof(struct vioscsi_req, vr_xs), 1, > > offsetof(struct vioscsi_req, vr_xs), 0, > > - BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_control); > > + BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, > > + &vr->vr_control); > > if (r != 0) { > > printf("bus_dmamap_create vr_control failed, error %d\n", r); > > return i; > > } > > r = bus_dmamap_create(vsc->sc_dmat, MAXPHYS, SEG_MAX, > > - MAXPHYS, 0, BUS_DMA_NOWAIT|BUS_DMA_ALLOCNOW, &vr->vr_data); > > + MAXPHYS, 0, > > + BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT, > > + &vr->vr_data); > > nit: I think you can avoid an extra line by putting MAXPHYS on the > first line? > > > if (r != 0) { > > printf("bus_dmamap_create vr_data failed, error %d\n", r ); > > return i; > > diff --git a/sys/dev/pv/virtio.c b/sys/dev/pv/virtio.c > > index aa63fd5e9ba..759bf05ccb8 100644 > > --- a/sys/dev/pv/virtio.c > > +++ b/sys/dev/pv/virtio.c > > @@ -387,9 +387,15 @@ virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index, > > allocsize3 = 0; > > allocsize = allocsize1 + allocsize2 + allocsize3; > > > > - /* alloc and map the memory */ > > - r = bus_dmamem_alloc(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0, > > - &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT); > > + /* > > + * alloc and map the memory > > + * > > + * With virtio 0.9, the ring memory must be in the lowest 2^32 > > + * pages. For simplicity, we use this limit even for virtio 1.0. > > + */ > > + r = bus_dmamem_alloc_range(sc->sc_dmat, allocsize, VIRTIO_PAGE_SIZE, 0, > > + &vq->vq_segs[0], 1, &rsegs, BUS_DMA_NOWAIT, 0, > > + 0xfffffffffff); > > So in principe the upper limit would depend on VIRTIO_PAGE_SIZE. So > should this be something like > > ((uint64_t)VIRTIO_PAGE_SIZE << 32) - 1) > > instead. And does this work on 32-bit architectures? > Discussed with sf@. I had the same question about the addr and that long constant. ok mlarkin either way once i386 is tested. > > if (r != 0) { > > printf("virtqueue %d for %s allocation failed, error %d\n", > > index, name, r); > > @@ -403,7 +409,7 @@ virtio_alloc_vq(struct virtio_softc *sc, struct virtqueue *vq, int index, > > goto err; > > } > > r = bus_dmamap_create(sc->sc_dmat, allocsize, 1, allocsize, 0, > > - BUS_DMA_NOWAIT, &vq->vq_dmamap); > > + BUS_DMA_NOWAIT | BUS_DMA_64BIT, &vq->vq_dmamap); > > if (r != 0) { > > printf("virtqueue %d for %s dmamap creation failed, " > > "error %d\n", index, name, r); > > > > >