Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: [EXT] Re: SEV: busdma bounce buffer
To:
Hans-Jörg Höxer <Hans-Joerg_Hoexer@genua.de>
Cc:
Hans-Joerg_Hoexer@genua.de, tech@openbsd.org, alexander.bluhm@gmx.net
Date:
Wed, 14 Aug 2024 16:13:35 +0200

Download raw body.

Thread
> Date: Wed, 14 Aug 2024 16:05:50 +0200
> From: Hans-Jörg Höxer <Hans-Joerg_Hoexer@genua.de>
> 
> Hi Mark,
> 
> On Wed, Aug 14, 2024 at 02:44:22PM +0200, Mark Kettenis wrote:
> > Argh, the new name make it painfully obvious that the new
> > _ds_bounce_addr isn't actually the physical address of the bounce
> > buffer, but the physical address of the original buffer.  Sorry for
> > the misguided suggestion.
> > 
> > That said, _ds_bounce_addr doesn't actually seem to be used for any
> > real purpose.  So maybe just remove it and save a few bytes of memory
> > in the maps?
> 
> ah, true! I've removed _ds_bounce_addr.
> 
> > > +	struct pglist mlist;
> > > +	vaddr_t va;
> > > +
> > > +	if (map->_dm_pgva) {
> > > +		km_free((void *)map->_dm_pgva, map->_dm_npages << PGSHIFT,
> > > +		    &kv_any, &kp_none);
> > > +	}
> > > +
> > > +	if (map->_dm_pages) {
> > > +		TAILQ_INIT(&mlist);
> > > +		for (pg = map->_dm_pages, va = map->_dm_pgva;
> > > +		    map->_dm_npages--; va += PAGE_SIZE, pg++) {
> > > +			pmap_remove(pmap_kernel(), va, va + PAGE_SIZE);
> > 
> > The km_free() does an uvm_unmap(), which already takes care of doing
> > the pmap_remove().  Dropping this also avoids doing the wrong thing if
> > _dm_pgva is zero, which you guard agains above, but not here.
> 
> ok, I see.  I've adjusted the code.
> 
> Thanks for looking into this!  Updated diff below.
> And bluhm@, thanks for testing!

ok kettenis@

> ----------------------------------------------------------------
> commit 2c257ee437132e265321c2559077b23ecf50771f
> Author: Hans-Joerg Hoexer <hshoexer@genua.de>
> Date:   Fri Jul 12 10:56:19 2024 +0200
> 
>     busdma: Bounce buffering for SEV
>     
>     When running as SEV guest (indicated by cpu_sev_guestmode != 0),
>     we allocate additional pages for each segment on dma map creation.
>     These pages are mapped with the PMAP_NOCRYPT attribute, i.e. the
>     crypt bit is not set in the PTE.  Thus, these pages are shared with
>     the hypervisor.
>     
>     When the map is loaded with actual pages, the address in the
>     descriptor is replaced with the corresponding bounce buffer.  Using
>     bus_dmamap_sync(), data is copied from the encrypted pages used by
>     guest-drivers to the unencrypted bounce buffers shared with the
>     hypervisor.  And vice versa.
>     
>     When the kernel is running in non-SEV guest mode (i.e. normal host,
>     or non-SEV guest), no bounce buffers are used.
> 
> diff --git a/sys/arch/amd64/amd64/bus_dma.c b/sys/arch/amd64/amd64/bus_dma.c
> index 19a366115f9..873242cf9fb 100644
> --- a/sys/arch/amd64/amd64/bus_dma.c
> +++ b/sys/arch/amd64/amd64/bus_dma.c
> @@ -108,8 +108,13 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
>      bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
>  {
>  	struct bus_dmamap *map;
> +	struct pglist mlist;
> +	struct vm_page **pg, *pgnext;
> +	size_t mapsize, sz, ssize;
> +	vaddr_t va, sva;
>  	void *mapstore;
> -	size_t mapsize;
> +	int npages, error;
> +	const struct kmem_dyn_mode *kd;
>  
>  	/*
>  	 * Allocate and initialize the DMA map.  The end of the map
> @@ -125,6 +130,16 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
>  	 */
>  	mapsize = sizeof(struct bus_dmamap) +
>  	    (sizeof(bus_dma_segment_t) * (nsegments - 1));
> +
> +	/* allocate and use bounce buffers when running as SEV guest */
> +	if (cpu_sev_guestmode) {
> +		/* this many pages plus one in case we get split */
> +		npages = round_page(size) / PAGE_SIZE + 1;
> +		if (npages < nsegments)
> +			npages = nsegments;
> +		mapsize += sizeof(struct vm_page *) * npages;
> +	}
> +
>  	if ((mapstore = malloc(mapsize, M_DEVBUF,
>  	    (flags & BUS_DMA_NOWAIT) ?
>  	        (M_NOWAIT|M_ZERO) : (M_WAITOK|M_ZERO))) == NULL)
> @@ -135,8 +150,59 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments,
>  	map->_dm_segcnt = nsegments;
>  	map->_dm_maxsegsz = maxsegsz;
>  	map->_dm_boundary = boundary;
> +	if (cpu_sev_guestmode) {
> +		map->_dm_pages = (void *)&map->dm_segs[nsegments];
> +		map->_dm_npages = npages;
> +	}
>  	map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT);
>  
> +	if (!cpu_sev_guestmode) {
> +		*dmamp = map;
> +		return (0);
> +	}
> +
> +	sz = npages << PGSHIFT;
> +	kd = flags & BUS_DMA_NOWAIT ? &kd_trylock : &kd_waitok;
> +	va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, kd);
> +	if (va == 0) {
> +		map->_dm_npages = 0;
> +		free(map, M_DEVBUF, mapsize);
> +		return (ENOMEM);
> +	}
> +
> +	TAILQ_INIT(&mlist);
> +	error = uvm_pglistalloc(sz, 0, -1, PAGE_SIZE, 0, &mlist, nsegments,
> +	    (flags & BUS_DMA_NOWAIT) ? UVM_PLA_NOWAIT : UVM_PLA_WAITOK);
> +	if (error) {
> +		map->_dm_npages = 0;
> +		km_free((void *)va, sz, &kv_any, &kp_none);
> +		free(map, M_DEVBUF, mapsize);
> +		return (ENOMEM);
> +	}
> +
> +	sva = va;
> +	ssize = sz;
> +	pgnext = TAILQ_FIRST(&mlist);
> +	for (pg = map->_dm_pages; npages--; va += PAGE_SIZE, pg++) {
> +		*pg = pgnext;
> +		error = pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(*pg),
> +		    PROT_READ | PROT_WRITE,
> +		    PROT_READ | PROT_WRITE | PMAP_WIRED |
> +		    PMAP_CANFAIL | PMAP_NOCRYPT);
> +		if (error) {
> +			pmap_update(pmap_kernel());
> +			map->_dm_npages = 0;
> +			km_free((void *)sva, ssize, &kv_any, &kp_none);
> +			free(map, M_DEVBUF, mapsize);
> +			uvm_pglistfree(&mlist);
> +			return (ENOMEM);
> +		}
> +		pgnext = TAILQ_NEXT(*pg, pageq);
> +		bzero((void *)va, PAGE_SIZE);
> +	}
> +	pmap_update(pmap_kernel());
> +	map->_dm_pgva = sva;
> +
>  	*dmamp = map;
>  	return (0);
>  }
> @@ -149,7 +215,22 @@ void
>  _bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map)
>  {
>  	size_t mapsize;
> -	
> +	struct vm_page **pg;
> +	struct pglist mlist;
> +
> +	if (map->_dm_pgva) {
> +		km_free((void *)map->_dm_pgva, map->_dm_npages << PGSHIFT,
> +		    &kv_any, &kp_none);
> +	}
> +
> +	if (map->_dm_pages) {
> +		TAILQ_INIT(&mlist);
> +		for (pg = map->_dm_pages; map->_dm_npages--; pg++) {
> +			TAILQ_INSERT_TAIL(&mlist, *pg, pageq);
> +		}
> +		uvm_pglistfree(&mlist);
> +	}
> +
>  	mapsize = sizeof(struct bus_dmamap) +
>  		(sizeof(bus_dma_segment_t) * (map->_dm_segcnt - 1));
>  	free(map, M_DEVBUF, mapsize);
> @@ -383,6 +464,7 @@ _bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map)
>  	 */
>  	map->dm_mapsize = 0;
>  	map->dm_nsegs = 0;
> +	map->_dm_nused = 0;
>  }
>  
>  /*
> @@ -393,7 +475,40 @@ void
>  _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr,
>      bus_size_t size, int op)
>  {
> -	/* Nothing to do here. */
> +	bus_dma_segment_t *sg;
> +	int i, off = addr;
> +	bus_size_t l;
> +
> +	if (!cpu_sev_guestmode)
> +		return;
> +
> +	for (i = map->_dm_segcnt, sg = map->dm_segs; size && i--; sg++) {
> +		if (off >= sg->ds_len) {
> +			off -= sg->ds_len;
> +			continue;
> +		}
> +
> +		l = sg->ds_len - off;
> +		if (l > size)
> +			l = size;
> +		size -= l;
> +
> +		/* PREREAD and POSTWRITE are no-ops. */
> +
> +		/* READ: device -> memory */
> +		if (op & BUS_DMASYNC_POSTREAD) {
> +			bcopy((void *)(sg->_ds_bounce_va + off),
> +			    (void *)(sg->_ds_va + off), l);
> +		}
> +
> +		/* WRITE: memory -> device */
> +		if (op & BUS_DMASYNC_PREWRITE) {
> +			bcopy((void *)(sg->_ds_va + off),
> +			    (void *)(sg->_ds_bounce_va + off), l);
> +		}
> +
> +		off = 0;
> +	}
>  }
>  
>  /*
> @@ -566,9 +681,10 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
>  {
>  	bus_size_t sgsize;
>  	bus_addr_t curaddr, lastaddr, baddr, bmask;
> -	vaddr_t vaddr = (vaddr_t)buf;
> -	int seg;
> +	vaddr_t pgva = -1, vaddr = (vaddr_t)buf;
> +	int seg, page, off;
>  	pmap_t pmap;
> +	struct vm_page *pg;
>  
>  	if (p != NULL)
>  		pmap = p->p_vmspace->vm_map.pmap;
> @@ -589,6 +705,18 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
>  			panic("Non dma-reachable buffer at curaddr %#lx(raw)",
>  			    curaddr);
>  
> +		if (cpu_sev_guestmode) {
> +			/* use bounce buffer */
> +			if (map->_dm_nused + 1 >= map->_dm_npages)
> +				return (ENOMEM);
> +
> +			off = vaddr & PAGE_MASK;
> +			pg = map->_dm_pages[page = map->_dm_nused++];
> +			curaddr = VM_PAGE_TO_PHYS(pg) + off;
> +
> +			pgva = map->_dm_pgva + (page << PGSHIFT) + off;
> +		}
> +
>  		/*
>  		 * Compute the segment size, and adjust counts.
>  		 */
> @@ -612,6 +740,8 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
>  		if (first) {
>  			map->dm_segs[seg].ds_addr = curaddr;
>  			map->dm_segs[seg].ds_len = sgsize;
> +			map->dm_segs[seg]._ds_va = vaddr;
> +			map->dm_segs[seg]._ds_bounce_va = pgva;
>  			first = 0;
>  		} else {
>  			if (curaddr == lastaddr &&
> @@ -626,6 +756,8 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf,
>  					break;
>  				map->dm_segs[seg].ds_addr = curaddr;
>  				map->dm_segs[seg].ds_len = sgsize;
> +				map->dm_segs[seg]._ds_va = vaddr;
> +				map->dm_segs[seg]._ds_bounce_va = pgva;
>  			}
>  		}
>  
> diff --git a/sys/arch/amd64/include/bus.h b/sys/arch/amd64/include/bus.h
> index 33d6cd6eaeb..e49639e8021 100644
> --- a/sys/arch/amd64/include/bus.h
> +++ b/sys/arch/amd64/include/bus.h
> @@ -552,6 +552,9 @@ typedef struct bus_dmamap		*bus_dmamap_t;
>  struct bus_dma_segment {
>  	bus_addr_t	ds_addr;	/* DMA address */
>  	bus_size_t	ds_len;		/* length of transfer */
> +	vaddr_t		_ds_va;		/* mapped loaded data */
> +	vaddr_t		_ds_bounce_va;	/* mapped bounced data */
> +
>  	/*
>  	 * Ugh. need this so can pass alignment down from bus_dmamem_alloc
>  	 * to scatter gather maps. only the first one is used so the rest is
> @@ -655,6 +658,11 @@ struct bus_dmamap {
>  
>  	void		*_dm_cookie;	/* cookie for bus-specific functions */
>  
> +	struct vm_page **_dm_pages;	/* replacement pages */
> +	vaddr_t		_dm_pgva;	/* those above -- mapped */
> +	int		_dm_npages;	/* number of pages allocated */
> +	int		_dm_nused;	/* number of pages replaced */
> +
>  	/*
>  	 * PUBLIC MEMBERS: these are used by machine-independent code.
>  	 */
> 
> [2:application/pkcs7-signature Show Save:smime.p7s (5kB)]
>