Index | Thread | Search

From:
Hans-Jörg Höxer <Hans-Joerg_Hoexer@genua.de>
Subject:
Re: [EXT] Re: SEV: busdma bounce buffer
To:
Mark Kettenis <mark.kettenis@xs4all.nl>
Cc:
Hans-Jörg Höxer <Hans-Joerg_Hoexer@genua.de>, <tech@openbsd.org>, <alexander.bluhm@gmx.net>
Date:
Wed, 14 Aug 2024 16:05:50 +0200

Download raw body.

Thread
Hi Mark,

On Wed, Aug 14, 2024 at 02:44:22PM +0200, Mark Kettenis wrote:
> Argh, the new name make it painfully obvious that the new
> _ds_bounce_addr isn't actually the physical address of the bounce
> buffer, but the physical address of the original buffer.  Sorry for
> the misguided suggestion.
> 
> That said, _ds_bounce_addr doesn't actually seem to be used for any
> real purpose.  So maybe just remove it and save a few bytes of memory
> in the maps?

ah, true! I've removed _ds_bounce_addr.

> > +	struct pglist mlist;
> > +	vaddr_t va;
> > +
> > +	if (map->_dm_pgva) {
> > +		km_free((void *)map->_dm_pgva, map->_dm_npages << PGSHIFT,
> > +		    &kv_any, &kp_none);
> > +	}
> > +
> > +	if (map->_dm_pages) {
> > +		TAILQ_INIT(&mlist);
> > +		for (pg = map->_dm_pages, va = map->_dm_pgva;
> > +		    map->_dm_npages--; va += PAGE_SIZE, pg++) {
> > +			pmap_remove(pmap_kernel(), va, va + PAGE_SIZE);
> 
> The km_free() does an uvm_unmap(), which already takes care of doing
> the pmap_remove().  Dropping this also avoids doing the wrong thing if
> _dm_pgva is zero, which you guard agains above, but not here.

ok, I see.  I've adjusted the code.

Thanks for looking into this!  Updated diff below.
And bluhm@, thanks for testing!

Take care,
HJ.

----------------------------------------------------------------
commit 2c257ee437132e265321c2559077b23ecf50771f
Author: Hans-Joerg Hoexer <hshoexer@genua.de>
Date:   Fri Jul 12 10:56:19 2024 +0200

    busdma: Bounce buffering for SEV
    
    When running as SEV guest (indicated by cpu_sev_guestmode != 0),
    we allocate additional pages for each segment on dma map creation.
    These pages are mapped with the PMAP_NOCRYPT attribute, i.e. the
    crypt bit is not set in the PTE.  Thus, these pages are shared with
    the hypervisor.
    
    When the map is loaded with actual pages, the address in the
    descriptor is replaced with the corresponding bounce buffer.  Using
    bus_dmamap_sync(), data is copied from the encrypted pages used by
    guest-drivers to the unencrypted bounce buffers shared with the
    hypervisor.  And vice versa.
    
    When the kernel is running in non-SEV guest mode (i.e. normal host,
    or non-SEV guest), no bounce buffers are used.

diff --git a/sys/arch/amd64/amd64/bus_dma.c b/sys/arch/amd64/amd64/bus_dma.c
index 19a366115f9..873242cf9fb 100644
--- a/sys/arch/amd64/amd64/bus_dma.c
+++ b/sys/arch/amd64/amd64/bus_dma.c
@@ -108,8 +108,13 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
     bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
 {
 	struct bus_dmamap *map;
+	struct pglist mlist;
+	struct vm_page **pg, *pgnext;
+	size_t mapsize, sz, ssize;
+	vaddr_t va, sva;
 	void *mapstore;
-	size_t mapsize;
+	int npages, error;
+	const struct kmem_dyn_mode *kd;
 
 	/*
 	 * Allocate and initialize the DMA map.  The end of the map
@@ -125,6 +130,16 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
 	 */
 	mapsize = sizeof(struct bus_dmamap) +
 	    (sizeof(bus_dma_segment_t) * (nsegments - 1));
+
+	/* allocate and use bounce buffers when running as SEV guest */
+	if (cpu_sev_guestmode) {
+		/* this many pages plus one in case we get split */
+		npages = round_page(size) / PAGE_SIZE + 1;
+		if (npages < nsegments)
+			npages = nsegments;
+		mapsize += sizeof(struct vm_page *) * npages;
+	}
+
 	if ((mapstore = malloc(mapsize, M_DEVBUF,
 	    (flags & BUS_DMA_NOWAIT) ?
 	        (M_NOWAIT|M_ZERO) : (M_WAITOK|M_ZERO))) == NULL)
@@ -135,8 +150,59 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
 	map->_dm_segcnt = nsegments;
 	map->_dm_maxsegsz = maxsegsz;
 	map->_dm_boundary = boundary;
+	if (cpu_sev_guestmode) {
+		map->_dm_pages = (void *)&map->dm_segs[nsegments];
+		map->_dm_npages = npages;
+	}
 	map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT);
 
+	if (!cpu_sev_guestmode) {
+		*dmamp = map;
+		return (0);
+	}
+
+	sz = npages << PGSHIFT;
+	kd = flags & BUS_DMA_NOWAIT ? &kd_trylock : &kd_waitok;
+	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, kd);
+	if (va == 0) {
+		map->_dm_npages = 0;
+		free(map, M_DEVBUF, mapsize);
+		return (ENOMEM);
+	}
+
+	TAILQ_INIT(&mlist);
+	error = uvm_pglistalloc(sz, 0, -1, PAGE_SIZE, 0, &mlist, nsegments,
+	    (flags & BUS_DMA_NOWAIT) ? UVM_PLA_NOWAIT : UVM_PLA_WAITOK);
+	if (error) {
+		map->_dm_npages = 0;
+		km_free((void *)va, sz, &kv_any, &kp_none);
+		free(map, M_DEVBUF, mapsize);
+		return (ENOMEM);
+	}
+
+	sva = va;
+	ssize = sz;
+	pgnext = TAILQ_FIRST(&mlist);
+	for (pg = map->_dm_pages; npages--; va += PAGE_SIZE, pg++) {
+		*pg = pgnext;
+		error = pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(*pg),
+		    PROT_READ | PROT_WRITE,
+		    PROT_READ | PROT_WRITE | PMAP_WIRED |
+		    PMAP_CANFAIL | PMAP_NOCRYPT);
+		if (error) {
+			pmap_update(pmap_kernel());
+			map->_dm_npages = 0;
+			km_free((void *)sva, ssize, &kv_any, &kp_none);
+			free(map, M_DEVBUF, mapsize);
+			uvm_pglistfree(&mlist);
+			return (ENOMEM);
+		}
+		pgnext = TAILQ_NEXT(*pg, pageq);
+		bzero((void *)va, PAGE_SIZE);
+	}
+	pmap_update(pmap_kernel());
+	map->_dm_pgva = sva;
+
 	*dmamp = map;
 	return (0);
 }
@@ -149,7 +215,22 @@ void
 _bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
 {
 	size_t mapsize;
-	
+	struct vm_page **pg;
+	struct pglist mlist;
+
+	if (map->_dm_pgva) {
+		km_free((void *)map->_dm_pgva, map->_dm_npages << PGSHIFT,
+		    &kv_any, &kp_none);
+	}
+
+	if (map->_dm_pages) {
+		TAILQ_INIT(&mlist);
+		for (pg = map->_dm_pages; map->_dm_npages--; pg++) {
+			TAILQ_INSERT_TAIL(&mlist, *pg, pageq);
+		}
+		uvm_pglistfree(&mlist);
+	}
+
 	mapsize = sizeof(struct bus_dmamap) +
 		(sizeof(bus_dma_segment_t) * (map->_dm_segcnt - 1));
 	free(map, M_DEVBUF, mapsize);
@@ -383,6 +464,7 @@ _bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
 	 */
 	map->dm_mapsize = 0;
 	map->dm_nsegs = 0;
+	map->_dm_nused = 0;
 }
 
 /*
@@ -393,7 +475,40 @@ void
 _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr,
     bus_size_t size, int op)
 {
-	/* Nothing to do here. */
+	bus_dma_segment_t *sg;
+	int i, off = addr;
+	bus_size_t l;
+
+	if (!cpu_sev_guestmode)
+		return;
+
+	for (i = map->_dm_segcnt, sg = map->dm_segs; size && i--; sg++) {
+		if (off >= sg->ds_len) {
+			off -= sg->ds_len;
+			continue;
+		}
+
+		l = sg->ds_len - off;
+		if (l > size)
+			l = size;
+		size -= l;
+
+		/* PREREAD and POSTWRITE are no-ops. */
+
+		/* READ: device -> memory */
+		if (op & BUS_DMASYNC_POSTREAD) {
+			bcopy((void *)(sg->_ds_bounce_va + off),
+			    (void *)(sg->_ds_va + off), l);
+		}
+
+		/* WRITE: memory -> device */
+		if (op & BUS_DMASYNC_PREWRITE) {
+			bcopy((void *)(sg->_ds_va + off),
+			    (void *)(sg->_ds_bounce_va + off), l);
+		}
+
+		off = 0;
+	}
 }
 
 /*
@@ -566,9 +681,10 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
 {
 	bus_size_t sgsize;
 	bus_addr_t curaddr, lastaddr, baddr, bmask;
-	vaddr_t vaddr = (vaddr_t)buf;
-	int seg;
+	vaddr_t pgva = -1, vaddr = (vaddr_t)buf;
+	int seg, page, off;
 	pmap_t pmap;
+	struct vm_page *pg;
 
 	if (p != NULL)
 		pmap = p->p_vmspace->vm_map.pmap;
@@ -589,6 +705,18 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
 			panic("Non dma-reachable buffer at curaddr %#lx(raw)",
 			    curaddr);
 
+		if (cpu_sev_guestmode) {
+			/* use bounce buffer */
+			if (map->_dm_nused + 1 >= map->_dm_npages)
+				return (ENOMEM);
+
+			off = vaddr & PAGE_MASK;
+			pg = map->_dm_pages[page = map->_dm_nused++];
+			curaddr = VM_PAGE_TO_PHYS(pg) + off;
+
+			pgva = map->_dm_pgva + (page << PGSHIFT) + off;
+		}
+
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
@@ -612,6 +740,8 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
 		if (first) {
 			map->dm_segs[seg].ds_addr = curaddr;
 			map->dm_segs[seg].ds_len = sgsize;
+			map->dm_segs[seg]._ds_va = vaddr;
+			map->dm_segs[seg]._ds_bounce_va = pgva;
 			first = 0;
 		} else {
 			if (curaddr == lastaddr &&
@@ -626,6 +756,8 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
 					break;
 				map->dm_segs[seg].ds_addr = curaddr;
 				map->dm_segs[seg].ds_len = sgsize;
+				map->dm_segs[seg]._ds_va = vaddr;
+				map->dm_segs[seg]._ds_bounce_va = pgva;
 			}
 		}
 
diff --git a/sys/arch/amd64/include/bus.h b/sys/arch/amd64/include/bus.h
index 33d6cd6eaeb..e49639e8021 100644
--- a/sys/arch/amd64/include/bus.h
+++ b/sys/arch/amd64/include/bus.h
@@ -552,6 +552,9 @@ typedef struct bus_dmamap		*bus_dmamap_t;
 struct bus_dma_segment {
 	bus_addr_t	ds_addr;	/* DMA address */
 	bus_size_t	ds_len;		/* length of transfer */
+	vaddr_t		_ds_va;		/* mapped loaded data */
+	vaddr_t		_ds_bounce_va;	/* mapped bounced data */
+
 	/*
 	 * Ugh. need this so can pass alignment down from bus_dmamem_alloc
 	 * to scatter gather maps. only the first one is used so the rest is
@@ -655,6 +658,11 @@ struct bus_dmamap {
 
 	void		*_dm_cookie;	/* cookie for bus-specific functions */
 
+	struct vm_page **_dm_pages;	/* replacement pages */
+	vaddr_t		_dm_pgva;	/* those above -- mapped */
+	int		_dm_npages;	/* number of pages allocated */
+	int		_dm_nused;	/* number of pages replaced */
+
 	/*
 	 * PUBLIC MEMBERS: these are used by machine-independent code.
 	 */