From: Mark Kettenis Subject: Re: [EXT] Re: SEV: busdma bounce buffer To: Hans-Jörg Höxer Cc: tech@openbsd.org, Hans-Joerg_Hoexer@genua.de, alexander.bluhm@gmx.net Date: Wed, 14 Aug 2024 14:44:22 +0200 > Date: Tue, 13 Aug 2024 15:47:54 +0200 > From: Hans-Jörg Höxer > > Hi, > > > ... > > > > @@ -551,7 +551,11 @@ typedef struct bus_dmamap *bus_dmamap_t; > > > > */ > > > > struct bus_dma_segment { > > > > bus_addr_t ds_addr; /* DMA address */ > > > > + bus_addr_t ds_addr2; /* replacement store */ > > > > > > A more descriptive name would be better. Also, because this isn't > > > part of the bus_dma API, it should probably start with an underscore. > > > So perhaps _ds_bounce_addr? > > > > ok, I will change this. Argh, the new name make it painfully obvious that the new _ds_bounce_addr isn't actually the physical address of the bounce buffer, but the physical address of the original buffer. Sorry for the misguided suggestion. That said, _ds_bounce_addr doesn't actually seem to be used for any real purpose. So maybe just remove it and save a few bytes of memory in the maps? > > > > bus_size_t ds_len; /* length of transfer */ > > > > + vaddr_t ds_va; /* mapped loaded data */ > > > > + vaddr_t ds_bounce; /* mapped bounced data */ > > > > + > > > > > > Same here. For consitencey maybe _ds_va and _ds_bounce_va? But these are correct. > Updated diff below I think there is a small mistake in the bus_dmamap_destroy() implementation. But otherwise this looks ok. > Take care, > HJ. > ------------------------------------------------------------------------ > commit 5a9b3f80d9a3d9d0e86a20b1d0b449c562e197be > Author: Hans-Joerg Hoexer > Date: Fri Jul 12 10:56:19 2024 +0200 > > busdma: Bounce buffering for SEV > > When running as SEV guest (indicated by cpu_sev_guestmode != 0), > we allocate additional pages for each segment on dma map creation. > These pages are mapped with the PMAP_NOCRYPT attribute, i.e. the > crypt bit is not set in the PTE. Thus, these pages are shared with > the hypervisor. > > When the map is loaded with actual pages, the address in the > descriptor is replaced with the corresponding bounce buffer. Using > bus_dmamap_sync(), data is copied from the encrypted pages used by > guest-drivers to the unencrypted bounce buffers shared with the > hypervisor. And vice versa. > > When the kernel is running in non-SEV guest mode (i.e. normal host, > or non-SEV guest), no bounce buffers are used. > > diff --git a/sys/arch/amd64/amd64/bus_dma.c b/sys/arch/amd64/amd64/bus_dma.c > index 19a366115f9..9d4705b36f3 100644 > --- a/sys/arch/amd64/amd64/bus_dma.c > +++ b/sys/arch/amd64/amd64/bus_dma.c > @@ -108,8 +108,13 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, > bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp) > { > struct bus_dmamap *map; > + struct pglist mlist; > + struct vm_page **pg, *pgnext; > + size_t mapsize, sz, ssize; > + vaddr_t va, sva; > void *mapstore; > - size_t mapsize; > + int npages, error; > + const struct kmem_dyn_mode *kd; > > /* > * Allocate and initialize the DMA map. The end of the map > @@ -125,6 +130,16 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, > */ > mapsize = sizeof(struct bus_dmamap) + > (sizeof(bus_dma_segment_t) * (nsegments - 1)); > + > + /* allocate and use bounce buffers when running as SEV guest */ > + if (cpu_sev_guestmode) { > + /* this many pages plus one in case we get split */ > + npages = round_page(size) / PAGE_SIZE + 1; > + if (npages < nsegments) > + npages = nsegments; > + mapsize += sizeof(struct vm_page *) * npages; > + } > + > if ((mapstore = malloc(mapsize, M_DEVBUF, > (flags & BUS_DMA_NOWAIT) ? > (M_NOWAIT|M_ZERO) : (M_WAITOK|M_ZERO))) == NULL) > @@ -135,8 +150,59 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, > map->_dm_segcnt = nsegments; > map->_dm_maxsegsz = maxsegsz; > map->_dm_boundary = boundary; > + if (cpu_sev_guestmode) { > + map->_dm_pages = (void *)&map->dm_segs[nsegments]; > + map->_dm_npages = npages; > + } > map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT); > > + if (!cpu_sev_guestmode) { > + *dmamp = map; > + return (0); > + } > + > + sz = npages << PGSHIFT; > + kd = flags & BUS_DMA_NOWAIT ? &kd_trylock : &kd_waitok; > + va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, kd); > + if (va == 0) { > + map->_dm_npages = 0; > + free(map, M_DEVBUF, mapsize); > + return (ENOMEM); > + } > + > + TAILQ_INIT(&mlist); > + error = uvm_pglistalloc(sz, 0, -1, PAGE_SIZE, 0, &mlist, nsegments, > + (flags & BUS_DMA_NOWAIT) ? UVM_PLA_NOWAIT : UVM_PLA_WAITOK); > + if (error) { > + map->_dm_npages = 0; > + km_free((void *)va, sz, &kv_any, &kp_none); > + free(map, M_DEVBUF, mapsize); > + return (ENOMEM); > + } > + > + sva = va; > + ssize = sz; > + pgnext = TAILQ_FIRST(&mlist); > + for (pg = map->_dm_pages; npages--; va += PAGE_SIZE, pg++) { > + *pg = pgnext; > + error = pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(*pg), > + PROT_READ | PROT_WRITE, > + PROT_READ | PROT_WRITE | PMAP_WIRED | > + PMAP_CANFAIL | PMAP_NOCRYPT); Ideally this would use pmap_kenter_pa() to avoid the overhead of "managed" kernel memory. But that's probably a bad idea since it makes the VA space management using km_alloc()/km_free() more complicated. And the bus_dmamem_map() implementation used pmap_enter() as well. > + if (error) { > + pmap_update(pmap_kernel()); > + map->_dm_npages = 0; > + km_free((void *)sva, ssize, &kv_any, &kp_none); > + free(map, M_DEVBUF, mapsize); > + uvm_pglistfree(&mlist); > + return (ENOMEM); > + } > + pgnext = TAILQ_NEXT(*pg, pageq); > + bzero((void *)va, PAGE_SIZE); > + } > + pmap_update(pmap_kernel()); > + map->_dm_pgva = sva; > + > *dmamp = map; > return (0); > } > @@ -149,7 +215,25 @@ void > _bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map) > { > size_t mapsize; > - > + struct vm_page **pg; > + struct pglist mlist; > + vaddr_t va; > + > + if (map->_dm_pgva) { > + km_free((void *)map->_dm_pgva, map->_dm_npages << PGSHIFT, > + &kv_any, &kp_none); > + } > + > + if (map->_dm_pages) { > + TAILQ_INIT(&mlist); > + for (pg = map->_dm_pages, va = map->_dm_pgva; > + map->_dm_npages--; va += PAGE_SIZE, pg++) { > + pmap_remove(pmap_kernel(), va, va + PAGE_SIZE); The km_free() does an uvm_unmap(), which already takes care of doing the pmap_remove(). Dropping this also avoids doing the wrong thing if _dm_pgva is zero, which you guard agains above, but not here. > + TAILQ_INSERT_TAIL(&mlist, *pg, pageq); > + } > + uvm_pglistfree(&mlist); > + } > + > mapsize = sizeof(struct bus_dmamap) + > (sizeof(bus_dma_segment_t) * (map->_dm_segcnt - 1)); > free(map, M_DEVBUF, mapsize); > @@ -383,6 +467,7 @@ _bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map) > */ > map->dm_mapsize = 0; > map->dm_nsegs = 0; > + map->_dm_nused = 0; > } > > /* > @@ -393,7 +478,40 @@ void > _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr, > bus_size_t size, int op) > { > - /* Nothing to do here. */ > + bus_dma_segment_t *sg; > + int i, off = addr; > + bus_size_t l; > + > + if (!cpu_sev_guestmode) > + return; > + > + for (i = map->_dm_segcnt, sg = map->dm_segs; size && i--; sg++) { > + if (off >= sg->ds_len) { > + off -= sg->ds_len; > + continue; > + } > + > + l = sg->ds_len - off; > + if (l > size) > + l = size; > + size -= l; > + > + /* PREREAD and POSTWRITE are no-ops. */ > + > + /* READ: device -> memory */ > + if (op & BUS_DMASYNC_POSTREAD) { > + bcopy((void *)(sg->_ds_bounce_va + off), > + (void *)(sg->_ds_va + off), l); > + } > + > + /* WRITE: memory -> device */ > + if (op & BUS_DMASYNC_PREWRITE) { > + bcopy((void *)(sg->_ds_va + off), > + (void *)(sg->_ds_bounce_va + off), l); > + } > + > + off = 0; > + } > } > > /* > @@ -565,10 +683,11 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, > int first) > { > bus_size_t sgsize; > - bus_addr_t curaddr, lastaddr, baddr, bmask; > - vaddr_t vaddr = (vaddr_t)buf; > - int seg; > + bus_addr_t curaddr, lastaddr, baddr, bmask, oaddr = -1; > + vaddr_t pgva = -1, vaddr = (vaddr_t)buf; > + int seg, page, off; > pmap_t pmap; > + struct vm_page *pg; > > if (p != NULL) > pmap = p->p_vmspace->vm_map.pmap; > @@ -589,6 +708,19 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, > panic("Non dma-reachable buffer at curaddr %#lx(raw)", > curaddr); > > + if (cpu_sev_guestmode) { > + /* use bounce buffer */ > + if (map->_dm_nused + 1 >= map->_dm_npages) > + return (ENOMEM); > + > + off = vaddr & PAGE_MASK; > + pg = map->_dm_pages[page = map->_dm_nused++]; > + oaddr = curaddr; > + curaddr = VM_PAGE_TO_PHYS(pg) + off; > + > + pgva = map->_dm_pgva + (page << PGSHIFT) + off; > + } > + > /* > * Compute the segment size, and adjust counts. > */ > @@ -611,7 +743,10 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, > */ > if (first) { > map->dm_segs[seg].ds_addr = curaddr; > + map->dm_segs[seg]._ds_bounce_addr = oaddr; > map->dm_segs[seg].ds_len = sgsize; > + map->dm_segs[seg]._ds_va = vaddr; > + map->dm_segs[seg]._ds_bounce_va = pgva; > first = 0; > } else { > if (curaddr == lastaddr && > @@ -625,7 +760,10 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, > if (++seg >= map->_dm_segcnt) > break; > map->dm_segs[seg].ds_addr = curaddr; > + map->dm_segs[seg]._ds_bounce_addr = oaddr; > map->dm_segs[seg].ds_len = sgsize; > + map->dm_segs[seg]._ds_va = vaddr; > + map->dm_segs[seg]._ds_bounce_va = pgva; > } > } > > diff --git a/sys/arch/amd64/include/bus.h b/sys/arch/amd64/include/bus.h > index 33d6cd6eaeb..d78735ede6f 100644 > --- a/sys/arch/amd64/include/bus.h > +++ b/sys/arch/amd64/include/bus.h > @@ -550,8 +550,12 @@ typedef struct bus_dmamap *bus_dmamap_t; > * are suitable for programming into DMA registers. > */ > struct bus_dma_segment { > - bus_addr_t ds_addr; /* DMA address */ > - bus_size_t ds_len; /* length of transfer */ > + bus_addr_t ds_addr; /* DMA address */ > + bus_addr_t _ds_bounce_addr; /* bounce buffer */ > + bus_size_t ds_len; /* length of transfer */ > + vaddr_t _ds_va; /* mapped loaded data */ > + vaddr_t _ds_bounce_va; /* mapped bounced data */ > + > /* > * Ugh. need this so can pass alignment down from bus_dmamem_alloc > * to scatter gather maps. only the first one is used so the rest is > @@ -655,6 +659,11 @@ struct bus_dmamap { > > void *_dm_cookie; /* cookie for bus-specific functions */ > > + struct vm_page **_dm_pages; /* replacement pages */ > + vaddr_t _dm_pgva; /* those above -- mapped */ > + int _dm_npages; /* number of pages allocated */ > + int _dm_nused; /* number of pages replaced */ > + > /* > * PUBLIC MEMBERS: these are used by machine-independent code. > */ > > [2:application/pkcs7-signature Show Save:smime.p7s (5kB)] >