Index | Thread | Search

From:
Hans-Jörg Höxer <Hans-Joerg_Hoexer@genua.de>
Subject:
Re: [EXT] Re: SEV: busdma bounce buffer
To:
Mark Kettenis <mark.kettenis@xs4all.nl>, <tech@openbsd.org>
Cc:
Hans-Jörg Höxer <Hans-Joerg_Hoexer@genua.de>, <alexander.bluhm@gmx.net>
Date:
Tue, 13 Aug 2024 15:47:54 +0200

Download raw body.

Thread
Hi,

> ...
> > > @@ -551,7 +551,11 @@ typedef struct bus_dmamap		*bus_dmamap_t;
> > >   */
> > >  struct bus_dma_segment {
> > >  	bus_addr_t	ds_addr;	/* DMA address */
> > > +	bus_addr_t	ds_addr2;	/* replacement store */
> > 
> > A more descriptive name would be better.  Also, because this isn't
> > part of the bus_dma API, it should probably start with an underscore.
> > So perhaps _ds_bounce_addr?
> 
> ok, I will change this.
> 
> > >  	bus_size_t	ds_len;		/* length of transfer */
> > > +	vaddr_t		ds_va;		/* mapped loaded data */
> > > +	vaddr_t		ds_bounce;	/* mapped bounced data */
> > > +
> > 
> > Same here.  For consitencey maybe _ds_va and _ds_bounce_va?

Updated diff below

Take care,
HJ.
------------------------------------------------------------------------
commit 5a9b3f80d9a3d9d0e86a20b1d0b449c562e197be
Author: Hans-Joerg Hoexer <hshoexer@genua.de>
Date:   Fri Jul 12 10:56:19 2024 +0200

    busdma: Bounce buffering for SEV
    
    When running as SEV guest (indicated by cpu_sev_guestmode != 0),
    we allocate additional pages for each segment on dma map creation.
    These pages are mapped with the PMAP_NOCRYPT attribute, i.e. the
    crypt bit is not set in the PTE.  Thus, these pages are shared with
    the hypervisor.
    
    When the map is loaded with actual pages, the address in the
    descriptor is replaced with the corresponding bounce buffer.  Using
    bus_dmamap_sync(), data is copied from the encrypted pages used by
    guest-drivers to the unencrypted bounce buffers shared with the
    hypervisor.  And vice versa.
    
    When the kernel is running in non-SEV guest mode (i.e. normal host,
    or non-SEV guest), no bounce buffers are used.

diff --git a/sys/arch/amd64/amd64/bus_dma.c b/sys/arch/amd64/amd64/bus_dma.c
index 19a366115f9..9d4705b36f3 100644
--- a/sys/arch/amd64/amd64/bus_dma.c
+++ b/sys/arch/amd64/amd64/bus_dma.c
@@ -108,8 +108,13 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
     bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
 {
 	struct bus_dmamap *map;
+	struct pglist mlist;
+	struct vm_page **pg, *pgnext;
+	size_t mapsize, sz, ssize;
+	vaddr_t va, sva;
 	void *mapstore;
-	size_t mapsize;
+	int npages, error;
+	const struct kmem_dyn_mode *kd;
 
 	/*
 	 * Allocate and initialize the DMA map.  The end of the map
@@ -125,6 +130,16 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
 	 */
 	mapsize = sizeof(struct bus_dmamap) +
 	    (sizeof(bus_dma_segment_t) * (nsegments - 1));
+
+	/* allocate and use bounce buffers when running as SEV guest */
+	if (cpu_sev_guestmode) {
+		/* this many pages plus one in case we get split */
+		npages = round_page(size) / PAGE_SIZE + 1;
+		if (npages < nsegments)
+			npages = nsegments;
+		mapsize += sizeof(struct vm_page *) * npages;
+	}
+
 	if ((mapstore = malloc(mapsize, M_DEVBUF,
 	    (flags & BUS_DMA_NOWAIT) ?
 	        (M_NOWAIT|M_ZERO) : (M_WAITOK|M_ZERO))) == NULL)
@@ -135,8 +150,59 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
 	map->_dm_segcnt = nsegments;
 	map->_dm_maxsegsz = maxsegsz;
 	map->_dm_boundary = boundary;
+	if (cpu_sev_guestmode) {
+		map->_dm_pages = (void *)&map->dm_segs[nsegments];
+		map->_dm_npages = npages;
+	}
 	map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT);
 
+	if (!cpu_sev_guestmode) {
+		*dmamp = map;
+		return (0);
+	}
+
+	sz = npages << PGSHIFT;
+	kd = flags & BUS_DMA_NOWAIT ? &kd_trylock : &kd_waitok;
+	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, kd);
+	if (va == 0) {
+		map->_dm_npages = 0;
+		free(map, M_DEVBUF, mapsize);
+		return (ENOMEM);
+	}
+
+	TAILQ_INIT(&mlist);
+	error = uvm_pglistalloc(sz, 0, -1, PAGE_SIZE, 0, &mlist, nsegments,
+	    (flags & BUS_DMA_NOWAIT) ? UVM_PLA_NOWAIT : UVM_PLA_WAITOK);
+	if (error) {
+		map->_dm_npages = 0;
+		km_free((void *)va, sz, &kv_any, &kp_none);
+		free(map, M_DEVBUF, mapsize);
+		return (ENOMEM);
+	}
+
+	sva = va;
+	ssize = sz;
+	pgnext = TAILQ_FIRST(&mlist);
+	for (pg = map->_dm_pages; npages--; va += PAGE_SIZE, pg++) {
+		*pg = pgnext;
+		error = pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(*pg),
+		    PROT_READ | PROT_WRITE,
+		    PROT_READ | PROT_WRITE | PMAP_WIRED |
+		    PMAP_CANFAIL | PMAP_NOCRYPT);
+		if (error) {
+			pmap_update(pmap_kernel());
+			map->_dm_npages = 0;
+			km_free((void *)sva, ssize, &kv_any, &kp_none);
+			free(map, M_DEVBUF, mapsize);
+			uvm_pglistfree(&mlist);
+			return (ENOMEM);
+		}
+		pgnext = TAILQ_NEXT(*pg, pageq);
+		bzero((void *)va, PAGE_SIZE);
+	}
+	pmap_update(pmap_kernel());
+	map->_dm_pgva = sva;
+
 	*dmamp = map;
 	return (0);
 }
@@ -149,7 +215,25 @@ void
 _bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
 {
 	size_t mapsize;
-	
+	struct vm_page **pg;
+	struct pglist mlist;
+	vaddr_t va;
+
+	if (map->_dm_pgva) {
+		km_free((void *)map->_dm_pgva, map->_dm_npages << PGSHIFT,
+		    &kv_any, &kp_none);
+	}
+
+	if (map->_dm_pages) {
+		TAILQ_INIT(&mlist);
+		for (pg = map->_dm_pages, va = map->_dm_pgva;
+		    map->_dm_npages--; va += PAGE_SIZE, pg++) {
+			pmap_remove(pmap_kernel(), va, va + PAGE_SIZE);
+			TAILQ_INSERT_TAIL(&mlist, *pg, pageq);
+		}
+		uvm_pglistfree(&mlist);
+	}
+
 	mapsize = sizeof(struct bus_dmamap) +
 		(sizeof(bus_dma_segment_t) * (map->_dm_segcnt - 1));
 	free(map, M_DEVBUF, mapsize);
@@ -383,6 +467,7 @@ _bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
 	 */
 	map->dm_mapsize = 0;
 	map->dm_nsegs = 0;
+	map->_dm_nused = 0;
 }
 
 /*
@@ -393,7 +478,40 @@ void
 _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr,
     bus_size_t size, int op)
 {
-	/* Nothing to do here. */
+	bus_dma_segment_t *sg;
+	int i, off = addr;
+	bus_size_t l;
+
+	if (!cpu_sev_guestmode)
+		return;
+
+	for (i = map->_dm_segcnt, sg = map->dm_segs; size && i--; sg++) {
+		if (off >= sg->ds_len) {
+			off -= sg->ds_len;
+			continue;
+		}
+
+		l = sg->ds_len - off;
+		if (l > size)
+			l = size;
+		size -= l;
+
+		/* PREREAD and POSTWRITE are no-ops. */
+
+		/* READ: device -> memory */
+		if (op & BUS_DMASYNC_POSTREAD) {
+			bcopy((void *)(sg->_ds_bounce_va + off),
+			    (void *)(sg->_ds_va + off), l);
+		}
+
+		/* WRITE: memory -> device */
+		if (op & BUS_DMASYNC_PREWRITE) {
+			bcopy((void *)(sg->_ds_va + off),
+			    (void *)(sg->_ds_bounce_va + off), l);
+		}
+
+		off = 0;
+	}
 }
 
 /*
@@ -565,10 +683,11 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
     int first)
 {
 	bus_size_t sgsize;
-	bus_addr_t curaddr, lastaddr, baddr, bmask;
-	vaddr_t vaddr = (vaddr_t)buf;
-	int seg;
+	bus_addr_t curaddr, lastaddr, baddr, bmask, oaddr = -1;
+	vaddr_t pgva = -1, vaddr = (vaddr_t)buf;
+	int seg, page, off;
 	pmap_t pmap;
+	struct vm_page *pg;
 
 	if (p != NULL)
 		pmap = p->p_vmspace->vm_map.pmap;
@@ -589,6 +708,19 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
 			panic("Non dma-reachable buffer at curaddr %#lx(raw)",
 			    curaddr);
 
+		if (cpu_sev_guestmode) {
+			/* use bounce buffer */
+			if (map->_dm_nused + 1 >= map->_dm_npages)
+				return (ENOMEM);
+
+			off = vaddr & PAGE_MASK;
+			pg = map->_dm_pages[page = map->_dm_nused++];
+			oaddr = curaddr;
+			curaddr = VM_PAGE_TO_PHYS(pg) + off;
+
+			pgva = map->_dm_pgva + (page << PGSHIFT) + off;
+		}
+
 		/*
 		 * Compute the segment size, and adjust counts.
 		 */
@@ -611,7 +743,10 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
 		 */
 		if (first) {
 			map->dm_segs[seg].ds_addr = curaddr;
+			map->dm_segs[seg]._ds_bounce_addr = oaddr;
 			map->dm_segs[seg].ds_len = sgsize;
+			map->dm_segs[seg]._ds_va = vaddr;
+			map->dm_segs[seg]._ds_bounce_va = pgva;
 			first = 0;
 		} else {
 			if (curaddr == lastaddr &&
@@ -625,7 +760,10 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
 				if (++seg >= map->_dm_segcnt)
 					break;
 				map->dm_segs[seg].ds_addr = curaddr;
+				map->dm_segs[seg]._ds_bounce_addr = oaddr;
 				map->dm_segs[seg].ds_len = sgsize;
+				map->dm_segs[seg]._ds_va = vaddr;
+				map->dm_segs[seg]._ds_bounce_va = pgva;
 			}
 		}
 
diff --git a/sys/arch/amd64/include/bus.h b/sys/arch/amd64/include/bus.h
index 33d6cd6eaeb..d78735ede6f 100644
--- a/sys/arch/amd64/include/bus.h
+++ b/sys/arch/amd64/include/bus.h
@@ -550,8 +550,12 @@ typedef struct bus_dmamap		*bus_dmamap_t;
  *	are suitable for programming into DMA registers.
  */
 struct bus_dma_segment {
-	bus_addr_t	ds_addr;	/* DMA address */
-	bus_size_t	ds_len;		/* length of transfer */
+	bus_addr_t	ds_addr;		/* DMA address */
+	bus_addr_t	_ds_bounce_addr;	/* bounce buffer */
+	bus_size_t	ds_len;			/* length of transfer */
+	vaddr_t		_ds_va;			/* mapped loaded data */
+	vaddr_t		_ds_bounce_va;		/* mapped bounced data */
+
 	/*
 	 * Ugh. need this so can pass alignment down from bus_dmamem_alloc
 	 * to scatter gather maps. only the first one is used so the rest is
@@ -655,6 +659,11 @@ struct bus_dmamap {
 
 	void		*_dm_cookie;	/* cookie for bus-specific functions */
 
+	struct vm_page **_dm_pages;	/* replacement pages */
+	vaddr_t		_dm_pgva;	/* those above -- mapped */
+	int		_dm_npages;	/* number of pages allocated */
+	int		_dm_nused;	/* number of pages replaced */
+
 	/*
 	 * PUBLIC MEMBERS: these are used by machine-independent code.
 	 */