From: Hans-Jörg Höxer Subject: Re: [EXT] Re: SEV: busdma bounce buffer To: Mark Kettenis , Cc: Hans-Jörg Höxer , Date: Tue, 13 Aug 2024 15:47:54 +0200 Hi, > ... > > > @@ -551,7 +551,11 @@ typedef struct bus_dmamap *bus_dmamap_t; > > > */ > > > struct bus_dma_segment { > > > bus_addr_t ds_addr; /* DMA address */ > > > + bus_addr_t ds_addr2; /* replacement store */ > > > > A more descriptive name would be better. Also, because this isn't > > part of the bus_dma API, it should probably start with an underscore. > > So perhaps _ds_bounce_addr? > > ok, I will change this. > > > > bus_size_t ds_len; /* length of transfer */ > > > + vaddr_t ds_va; /* mapped loaded data */ > > > + vaddr_t ds_bounce; /* mapped bounced data */ > > > + > > > > Same here. For consitencey maybe _ds_va and _ds_bounce_va? Updated diff below Take care, HJ. ------------------------------------------------------------------------ commit 5a9b3f80d9a3d9d0e86a20b1d0b449c562e197be Author: Hans-Joerg Hoexer Date: Fri Jul 12 10:56:19 2024 +0200 busdma: Bounce buffering for SEV When running as SEV guest (indicated by cpu_sev_guestmode != 0), we allocate additional pages for each segment on dma map creation. These pages are mapped with the PMAP_NOCRYPT attribute, i.e. the crypt bit is not set in the PTE. Thus, these pages are shared with the hypervisor. When the map is loaded with actual pages, the address in the descriptor is replaced with the corresponding bounce buffer. Using bus_dmamap_sync(), data is copied from the encrypted pages used by guest-drivers to the unencrypted bounce buffers shared with the hypervisor. And vice versa. When the kernel is running in non-SEV guest mode (i.e. normal host, or non-SEV guest), no bounce buffers are used. diff --git a/sys/arch/amd64/amd64/bus_dma.c b/sys/arch/amd64/amd64/bus_dma.c index 19a366115f9..9d4705b36f3 100644 --- a/sys/arch/amd64/amd64/bus_dma.c +++ b/sys/arch/amd64/amd64/bus_dma.c @@ -108,8 +108,13 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp) { struct bus_dmamap *map; + struct pglist mlist; + struct vm_page **pg, *pgnext; + size_t mapsize, sz, ssize; + vaddr_t va, sva; void *mapstore; - size_t mapsize; + int npages, error; + const struct kmem_dyn_mode *kd; /* * Allocate and initialize the DMA map. The end of the map @@ -125,6 +130,16 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, */ mapsize = sizeof(struct bus_dmamap) + (sizeof(bus_dma_segment_t) * (nsegments - 1)); + + /* allocate and use bounce buffers when running as SEV guest */ + if (cpu_sev_guestmode) { + /* this many pages plus one in case we get split */ + npages = round_page(size) / PAGE_SIZE + 1; + if (npages < nsegments) + npages = nsegments; + mapsize += sizeof(struct vm_page *) * npages; + } + if ((mapstore = malloc(mapsize, M_DEVBUF, (flags & BUS_DMA_NOWAIT) ? (M_NOWAIT|M_ZERO) : (M_WAITOK|M_ZERO))) == NULL) @@ -135,8 +150,59 @@ _bus_dmamap_create(bus_dma_tag_t t, bus_size_t size, int nsegments, map->_dm_segcnt = nsegments; map->_dm_maxsegsz = maxsegsz; map->_dm_boundary = boundary; + if (cpu_sev_guestmode) { + map->_dm_pages = (void *)&map->dm_segs[nsegments]; + map->_dm_npages = npages; + } map->_dm_flags = flags & ~(BUS_DMA_WAITOK|BUS_DMA_NOWAIT); + if (!cpu_sev_guestmode) { + *dmamp = map; + return (0); + } + + sz = npages << PGSHIFT; + kd = flags & BUS_DMA_NOWAIT ? &kd_trylock : &kd_waitok; + va = (vaddr_t)km_alloc(sz, &kv_any, &kp_none, kd); + if (va == 0) { + map->_dm_npages = 0; + free(map, M_DEVBUF, mapsize); + return (ENOMEM); + } + + TAILQ_INIT(&mlist); + error = uvm_pglistalloc(sz, 0, -1, PAGE_SIZE, 0, &mlist, nsegments, + (flags & BUS_DMA_NOWAIT) ? UVM_PLA_NOWAIT : UVM_PLA_WAITOK); + if (error) { + map->_dm_npages = 0; + km_free((void *)va, sz, &kv_any, &kp_none); + free(map, M_DEVBUF, mapsize); + return (ENOMEM); + } + + sva = va; + ssize = sz; + pgnext = TAILQ_FIRST(&mlist); + for (pg = map->_dm_pages; npages--; va += PAGE_SIZE, pg++) { + *pg = pgnext; + error = pmap_enter(pmap_kernel(), va, VM_PAGE_TO_PHYS(*pg), + PROT_READ | PROT_WRITE, + PROT_READ | PROT_WRITE | PMAP_WIRED | + PMAP_CANFAIL | PMAP_NOCRYPT); + if (error) { + pmap_update(pmap_kernel()); + map->_dm_npages = 0; + km_free((void *)sva, ssize, &kv_any, &kp_none); + free(map, M_DEVBUF, mapsize); + uvm_pglistfree(&mlist); + return (ENOMEM); + } + pgnext = TAILQ_NEXT(*pg, pageq); + bzero((void *)va, PAGE_SIZE); + } + pmap_update(pmap_kernel()); + map->_dm_pgva = sva; + *dmamp = map; return (0); } @@ -149,7 +215,25 @@ void _bus_dmamap_destroy(bus_dma_tag_t t, bus_dmamap_t map) { size_t mapsize; - + struct vm_page **pg; + struct pglist mlist; + vaddr_t va; + + if (map->_dm_pgva) { + km_free((void *)map->_dm_pgva, map->_dm_npages << PGSHIFT, + &kv_any, &kp_none); + } + + if (map->_dm_pages) { + TAILQ_INIT(&mlist); + for (pg = map->_dm_pages, va = map->_dm_pgva; + map->_dm_npages--; va += PAGE_SIZE, pg++) { + pmap_remove(pmap_kernel(), va, va + PAGE_SIZE); + TAILQ_INSERT_TAIL(&mlist, *pg, pageq); + } + uvm_pglistfree(&mlist); + } + mapsize = sizeof(struct bus_dmamap) + (sizeof(bus_dma_segment_t) * (map->_dm_segcnt - 1)); free(map, M_DEVBUF, mapsize); @@ -383,6 +467,7 @@ _bus_dmamap_unload(bus_dma_tag_t t, bus_dmamap_t map) */ map->dm_mapsize = 0; map->dm_nsegs = 0; + map->_dm_nused = 0; } /* @@ -393,7 +478,40 @@ void _bus_dmamap_sync(bus_dma_tag_t t, bus_dmamap_t map, bus_addr_t addr, bus_size_t size, int op) { - /* Nothing to do here. */ + bus_dma_segment_t *sg; + int i, off = addr; + bus_size_t l; + + if (!cpu_sev_guestmode) + return; + + for (i = map->_dm_segcnt, sg = map->dm_segs; size && i--; sg++) { + if (off >= sg->ds_len) { + off -= sg->ds_len; + continue; + } + + l = sg->ds_len - off; + if (l > size) + l = size; + size -= l; + + /* PREREAD and POSTWRITE are no-ops. */ + + /* READ: device -> memory */ + if (op & BUS_DMASYNC_POSTREAD) { + bcopy((void *)(sg->_ds_bounce_va + off), + (void *)(sg->_ds_va + off), l); + } + + /* WRITE: memory -> device */ + if (op & BUS_DMASYNC_PREWRITE) { + bcopy((void *)(sg->_ds_va + off), + (void *)(sg->_ds_bounce_va + off), l); + } + + off = 0; + } } /* @@ -565,10 +683,11 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, int first) { bus_size_t sgsize; - bus_addr_t curaddr, lastaddr, baddr, bmask; - vaddr_t vaddr = (vaddr_t)buf; - int seg; + bus_addr_t curaddr, lastaddr, baddr, bmask, oaddr = -1; + vaddr_t pgva = -1, vaddr = (vaddr_t)buf; + int seg, page, off; pmap_t pmap; + struct vm_page *pg; if (p != NULL) pmap = p->p_vmspace->vm_map.pmap; @@ -589,6 +708,19 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, panic("Non dma-reachable buffer at curaddr %#lx(raw)", curaddr); + if (cpu_sev_guestmode) { + /* use bounce buffer */ + if (map->_dm_nused + 1 >= map->_dm_npages) + return (ENOMEM); + + off = vaddr & PAGE_MASK; + pg = map->_dm_pages[page = map->_dm_nused++]; + oaddr = curaddr; + curaddr = VM_PAGE_TO_PHYS(pg) + off; + + pgva = map->_dm_pgva + (page << PGSHIFT) + off; + } + /* * Compute the segment size, and adjust counts. */ @@ -611,7 +743,10 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, */ if (first) { map->dm_segs[seg].ds_addr = curaddr; + map->dm_segs[seg]._ds_bounce_addr = oaddr; map->dm_segs[seg].ds_len = sgsize; + map->dm_segs[seg]._ds_va = vaddr; + map->dm_segs[seg]._ds_bounce_va = pgva; first = 0; } else { if (curaddr == lastaddr && @@ -625,7 +760,10 @@ _bus_dmamap_load_buffer(bus_dma_tag_t t, bus_dmamap_t map, void *buf, if (++seg >= map->_dm_segcnt) break; map->dm_segs[seg].ds_addr = curaddr; + map->dm_segs[seg]._ds_bounce_addr = oaddr; map->dm_segs[seg].ds_len = sgsize; + map->dm_segs[seg]._ds_va = vaddr; + map->dm_segs[seg]._ds_bounce_va = pgva; } } diff --git a/sys/arch/amd64/include/bus.h b/sys/arch/amd64/include/bus.h index 33d6cd6eaeb..d78735ede6f 100644 --- a/sys/arch/amd64/include/bus.h +++ b/sys/arch/amd64/include/bus.h @@ -550,8 +550,12 @@ typedef struct bus_dmamap *bus_dmamap_t; * are suitable for programming into DMA registers. */ struct bus_dma_segment { - bus_addr_t ds_addr; /* DMA address */ - bus_size_t ds_len; /* length of transfer */ + bus_addr_t ds_addr; /* DMA address */ + bus_addr_t _ds_bounce_addr; /* bounce buffer */ + bus_size_t ds_len; /* length of transfer */ + vaddr_t _ds_va; /* mapped loaded data */ + vaddr_t _ds_bounce_va; /* mapped bounced data */ + /* * Ugh. need this so can pass alignment down from bus_dmamem_alloc * to scatter gather maps. only the first one is used so the rest is @@ -655,6 +659,11 @@ struct bus_dmamap { void *_dm_cookie; /* cookie for bus-specific functions */ + struct vm_page **_dm_pages; /* replacement pages */ + vaddr_t _dm_pgva; /* those above -- mapped */ + int _dm_npages; /* number of pages allocated */ + int _dm_nused; /* number of pages replaced */ + /* * PUBLIC MEMBERS: these are used by machine-independent code. */