From: Mike Larkin Subject: Re: AMD SEV 5/5: Add support to vmd(8) To: Hans-Jörg Höxer Cc: tech@openbsd.org Date: Thu, 29 Aug 2024 22:25:36 -0700 On Wed, Aug 28, 2024 at 03:27:09PM +0200, Hans-Jörg Höxer wrote: > Hi, > > this change adds the necessary pieces to vmd(8). To enable SEV, one > can add the new parameter "sev" to a vm section in vm.conf. > > Take care, > HJ. > -------------------------------------------------------------------------- > commit d1022be5060e1149d363795e9210ee01cdc7e307 > Author: Hans-Joerg Hoexer > Date: Thu Jul 11 16:20:59 2024 +0200 > > vmd(8): initial AMD SEV support > > To launch a guest with AMD SEV enabled, vmd needs to do a few things: > > - retrieve ASID used by guest on VM creation > - provide ASID to ccp(4) > - let ccp(4) encrypt memory used intially by guest > - run guest > - release resources held by ccp(4) on guest shutdown > > To enable SEV for a guest use the parameter "vm" in the guest's vm > section in vm.conf. If you're going to copy this commit message, I think you meant "sev" here and not "vm". See other comments below. ok mlarkin once these changes made. -ml > > diff --git a/usr.sbin/vmd/Makefile b/usr.sbin/vmd/Makefile > index 22c1e887823..a0d5ae00e62 100644 > --- a/usr.sbin/vmd/Makefile > +++ b/usr.sbin/vmd/Makefile > @@ -11,6 +11,7 @@ SRCS+= vionet.c > .if ${MACHINE} == "amd64" > SRCS+= i8253.c i8259.c fw_cfg.c loadfile_elf.c mc146818.c ns8250.c > SRCS+= x86_vm.c x86_mmio.c > +SRCS+= psp.c sev.c > .endif # amd64 > .if ${MACHINE} == "arm64" > SRCS+= arm64_vm.c > diff --git a/usr.sbin/vmd/arm64_vm.c b/usr.sbin/vmd/arm64_vm.c > index 282dbcb4985..6d176ac4cfc 100644 > --- a/usr.sbin/vmd/arm64_vm.c > +++ b/usr.sbin/vmd/arm64_vm.c > @@ -160,3 +160,51 @@ vcpu_exit_pci(struct vm_run_params *vrp) > /* NOTREACHED */ > return (0xff); > } > + > +void > +set_return_data(struct vm_exit *vei, uint32_t data) > +{ > + fatalx("%s: unimplemented", __func__); > + /* NOTREACHED */ > + return; > +} > + > +void > +get_input_data(struct vm_exit *vei, uint32_t *data) > +{ > + fatalx("%s: unimplemented", __func__); > + /* NOTREACHED */ > + return; > +} > + > +int > +sev_init(struct vmd_vm *vm) > +{ > + fatalx("%s: unimplemented", __func__); > + /* NOTREACHED */ > + return (-1); > +} > + > +int > +sev_shutdown(struct vmd_vm *vm) > +{ > + fatalx("%s: unimplemented", __func__); > + /* NOTREACHED */ > + return (-1); > +} > + > +int > +sev_activate(struct vmd_vm *vm, int vcpu_id) > +{ > + fatalx("%s: unimplemented", __func__); > + /* NOTREACHED */ > + return (-1); > +} > + > +int > +sev_encrypt_memory(struct vmd_vm *vm) > +{ > + fatalx("%s: unimplemented", __func__); > + /* NOTREACHED */ > + return (-1); > +} > diff --git a/usr.sbin/vmd/loadfile_elf.c b/usr.sbin/vmd/loadfile_elf.c > index 166aa04c5e1..8d4a70a1148 100644 > --- a/usr.sbin/vmd/loadfile_elf.c > +++ b/usr.sbin/vmd/loadfile_elf.c > @@ -130,6 +130,8 @@ static void mbcopy(void *, paddr_t, int); > extern char *__progname; > extern int vm_id; > > +uint64_t pg_crypt = 0; > + > /* > * setsegment > * > @@ -193,7 +195,7 @@ push_gdt(void) > setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); > setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); > > - write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); > + write_mem_enc(GDT_PAGE, gdtpage, PAGE_SIZE); The majority of the cases will be non-encrypted; IMO we should leave the name as write_mem and have that function determine whether or not to encrypt based on the setting. There are a bunch of these. > } > > /* > @@ -229,20 +231,21 @@ push_pt_64(void) > > /* PDPDE0 - first 1GB */ > memset(ptes, 0, sizeof(ptes)); > - ptes[0] = PG_V | PML3_PAGE; > - write_mem(PML4_PAGE, ptes, PAGE_SIZE); > + ptes[0] = pg_crypt | PG_V | PML3_PAGE; > + write_mem_enc(PML4_PAGE, ptes, PAGE_SIZE); > > /* PDE0 - first 1GB */ > memset(ptes, 0, sizeof(ptes)); > - ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE; > - write_mem(PML3_PAGE, ptes, PAGE_SIZE); > + ptes[0] = pg_crypt | PG_V | PG_RW | PG_u | PML2_PAGE; > + write_mem_enc(PML3_PAGE, ptes, PAGE_SIZE); > > /* First 1GB (in 2MB pages) */ > memset(ptes, 0, sizeof(ptes)); > for (i = 0 ; i < 512; i++) { > - ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i); > + ptes[i] = pg_crypt | PG_V | PG_RW | PG_u | PG_PS | > + ((2048 * 1024) * i); > } > - write_mem(PML2_PAGE, ptes, PAGE_SIZE); > + write_mem_enc(PML2_PAGE, ptes, PAGE_SIZE); > } > > /* > @@ -300,8 +303,18 @@ loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs, > vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE; > vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL; > } > - else > + else { > + if (vcp->vcp_sev) { > + if (vcp->vcp_poscbit == 0) { > + log_warnx("SEV enabled but no C-bit reported"); > + return 1; > + } > + pg_crypt = (1ULL << vcp->vcp_poscbit); > + log_debug("%s: poscbit %d pg_crypt 0x%016llx", > + __func__, vcp->vcp_poscbit, pg_crypt); > + } > push_pt_64(); > + } > > if (bootdevice == VMBOOTDEV_NET) { > bootmac = &bm; > @@ -412,7 +425,7 @@ push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac) > > ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */ > > - write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE); > + write_mem_enc(BOOTARGS_PAGE, ba, PAGE_SIZE); > > return (i * sizeof(uint32_t)); > } > @@ -462,7 +475,7 @@ push_stack(uint32_t bootargsz, uint32_t end) > stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */ > stack[--loc] = 0; > > - write_mem(STACK_PAGE, &stack, PAGE_SIZE); > + write_mem_enc(STACK_PAGE, &stack, PAGE_SIZE); > > return (1024 - (loc - 1)) * sizeof(uint32_t); > } > @@ -490,6 +503,8 @@ mread(gzFile fp, paddr_t addr, size_t sz) > size_t i, osz; > char buf[PAGE_SIZE]; > > + sev_add_memsegment(addr, sz); > + I would prefer this be named something else to account for other potential implementations later. Also, at first glance, I was mistakenly thinking that this really is adding a new memory range to the VM. It's not, it's just informing ccp(4) that there is a new area of guest memory that needs to be encrypted. That's fine, and needed, but maybe a better name can be invented. > /* > * break up the 'sz' bytes into PAGE_SIZE chunks for use with > * write_mem > @@ -565,6 +580,8 @@ marc4random_buf(paddr_t addr, int sz) > int i, ct; > char buf[PAGE_SIZE]; > > + sev_add_memsegment(addr, sz); > + > /* > * break up the 'sz' bytes into PAGE_SIZE chunks for use with > * write_mem > @@ -612,7 +629,7 @@ marc4random_buf(paddr_t addr, int sz) > static void > mbzero(paddr_t addr, int sz) > { > - if (write_mem(addr, NULL, sz)) > + if (write_mem_enc(addr, NULL, sz)) > return; > } > > @@ -632,7 +649,7 @@ mbzero(paddr_t addr, int sz) > static void > mbcopy(void *src, paddr_t dst, int sz) > { > - write_mem(dst, src, sz); > + write_mem_enc(dst, src, sz); > } > > /* > diff --git a/usr.sbin/vmd/parse.y b/usr.sbin/vmd/parse.y > index aacfd635100..24a60c1b924 100644 > --- a/usr.sbin/vmd/parse.y > +++ b/usr.sbin/vmd/parse.y > @@ -126,7 +126,7 @@ typedef struct { > %token FORMAT GROUP > %token INET6 INSTANCE INTERFACE LLADDR LOCAL LOCKED MEMORY NET NIFS OWNER > %token PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID STAGGERED START > -%token PARALLEL DELAY > +%token PARALLEL DELAY SEV > %token NUMBER > %token STRING > %type lladdr > @@ -140,6 +140,7 @@ typedef struct { > %type optstring > %type string > %type vm_instance > +%type sev; > > %% > > @@ -414,6 +415,9 @@ vm_opts_l : vm_opts_l vm_opts nl > vm_opts : disable { > vmc_disable = $1; > } > + | sev { > + vcp->vcp_sev = 1; > + } > | DISK string image_format { > if (parse_disk($2, $3) != 0) { > yyerror("failed to parse disks: %s", $2); > @@ -757,6 +761,9 @@ disable : ENABLE { $$ = 0; } > | DISABLE { $$ = 1; } > ; > > +sev : SEV { $$ = 1; } > + ; > + > bootdevice : CDROM { $$ = VMBOOTDEV_CDROM; } > | DISK { $$ = VMBOOTDEV_DISK; } > | NET { $$ = VMBOOTDEV_NET; } > @@ -841,6 +848,7 @@ lookup(char *s) > { "path", PATH }, > { "prefix", PREFIX }, > { "rdomain", RDOMAIN }, > + { "sev", SEV }, > { "size", SIZE }, > { "socket", SOCKET }, > { "staggered", STAGGERED }, > diff --git a/usr.sbin/vmd/psp.c b/usr.sbin/vmd/psp.c > new file mode 100644 > index 00000000000..0205d71d34a > --- /dev/null > +++ b/usr.sbin/vmd/psp.c > @@ -0,0 +1,272 @@ > +/* $OpenBSD: $ */ > + > +/* > + * Copyright (c) 2023, 2024 Hans-Joerg Hoexer > + * > + * Permission to use, copy, modify, and distribute this software for any > + * purpose with or without fee is hereby granted, provided that the above > + * copyright notice and this permission notice appear in all copies. > + * > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > + */ > + > +#include > +#include > +#include > +#include > + > +#include > +#include > + > +#include > + > +#include "vmd.h" > + > +extern struct vmd *env; > + > +/* Guest policy */ > +#define GPOL_NODBG (1ULL << 0) /* no debuggin */ > +#define GPOL_NOKS (1ULL << 1) /* no key sharing */ > +#define GPOL_ES (1ULL << 2) /* SEV-ES required */ > +#define GPOL_NOSEND (1ULL << 3) /* no guest migration */ > +#define GPOL_DOMAIN (1ULL << 4) /* no migration to other domain */ > +#define GPOL_SEV (1ULL << 5) /* no migration to non-SEV platform */ > + > + > +/* > + * Retrieve platform state. > + */ > +int > +psp_get_pstate(uint16_t *state) > +{ > + struct psp_platform_status pst; > + > + if (ioctl(env->vmd_ccp_fd, PSP_IOC_GET_PSTATUS, &pst) < 0) { > + log_warn("%s: ioctl", __func__); > + return (-1); > + } > + > + if (state) > + *state = pst.state; > + > + return (0); > +} > + > + > +/* > + * Flush data fabrics of all cores. > + * > + * This ensures all data of a SEV enabled guest is committed to > + * memory. This needs to be done before an ASID is assigend to > + * guest using psp_activate(). > + */ > +int > +psp_df_flush(void) > +{ > + if (ioctl(env->vmd_ccp_fd, PSP_IOC_DF_FLUSH) < 0) { > + log_warn("%s: ioctl", __func__); > + return (-1); > + } > + > + return (0); > +} > + > + > +/* > + * Retrieve guest state. > + */ > +int > +psp_get_gstate(uint32_t handle, uint32_t *policy, uint32_t *asid, > + uint8_t *state) > +{ > + struct psp_guest_status gst; > + > + memset(&gst, 0, sizeof(gst)); > + gst.handle = handle; > + > + if (ioctl(env->vmd_ccp_fd, PSP_IOC_GET_GSTATUS, &gst) < 0) { > + log_warn("%s: ioctl", __func__); > + return (-1); > + } > + > + if (policy) > + *policy = gst.policy; > + if (asid) > + *asid = gst.asid; > + if (state) > + *state = gst.state; > + > + return (0); > +} > + > + > +/* > + * Start the launch sequence of a guest. > + */ > +int > +psp_launch_start(uint32_t *handle) > +{ > + struct psp_launch_start ls; > + > + memset(&ls, 0, sizeof(ls)); > + > + /* Set guest policy. */ > + ls.policy = (GPOL_NODBG | GPOL_NOKS | GPOL_NOSEND | GPOL_DOMAIN | > + GPOL_SEV); > + > + if (ioctl(env->vmd_ccp_fd, PSP_IOC_LAUNCH_START, &ls) < 0) { > + log_warn("%s: ioctl", __func__); > + return (-1); > + } > + > + if (handle) > + *handle = ls.handle; > + > + return (0); > +} > + > + > +/* > + * Encrypt and measure a memory range. > + */ > +int > +psp_launch_update(uint32_t handle, vaddr_t v, size_t len) > +{ > + struct psp_launch_update_data lud; > + > + memset(&lud, 0, sizeof(lud)); > + lud.handle = handle; > + lud.paddr = v; /* will be converted to paddr */ > + lud.length = len; > + > + if (ioctl(env->vmd_ccp_fd, PSP_IOC_LAUNCH_UPDATE_DATA, &lud) < 0) { > + log_warn("%s: ioctl", __func__); > + return (-1); > + } > + > + return (0); > +} > + > + > +/* > + * Finalize and return memory measurement. > + * > + * We ask the PSP to provide a measurement (HMAC) over the encrypted > + * memory. As we do not yet negotiate a shared integrity key with > + * the PSP, the measurement is not really meaningful. Thus we just > + * log it for now. > + */ > +int > +psp_launch_measure(uint32_t handle) > +{ > + struct psp_launch_measure lm; > + char *p, buf[256]; > + size_t len; > + unsigned int i; > + > + memset(&lm, 0, sizeof(lm)); > + lm.handle = handle; > + lm.measure_len = sizeof(lm.psp_measure); > + memset(lm.measure, 0, sizeof(lm.measure)); > + memset(lm.measure_nonce, 0, sizeof(lm.measure_nonce)); > + > + if (ioctl(env->vmd_ccp_fd, PSP_IOC_LAUNCH_MEASURE, &lm) < 0) { > + log_warn("%s: ioctl", __func__); > + return (-1); > + } > + > + /* > + * We can not verify the measurement, yet. Therefore just > + * log it. > + */ > + len = sizeof(buf); > + memset(buf, 0, len); > + p = buf; > + for (i = 0; i < sizeof(lm.measure) && len >= 2; > + i++, p += 2, len -= 2) { > + snprintf(p, len, "%02x", lm.measure[i]); > + } > + log_info("%s: measurement\t0x%s", __func__, buf); > + > + len = sizeof(buf); > + memset(buf, 0, len); > + p = buf; > + for (i = 0; i < sizeof(lm.measure_nonce) && len >= 2; > + i++, p += 2, len -= 2) { > + snprintf(p, len, "%02x", lm.measure_nonce[i]); > + } > + log_info("%s: nonce\t0x%s", __func__, buf); > + > + return (0); > +} > + > + > +/* > + * Finalize launch sequence. > + */ > +int > +psp_launch_finish(uint32_t handle) > +{ > + struct psp_launch_finish lf; > + > + lf.handle = handle; > + > + if (ioctl(env->vmd_ccp_fd, PSP_IOC_LAUNCH_FINISH, &lf) < 0) { > + log_warn("%s: ioctl", __func__); > + return (-1); > + } > + > + return (0); > +} > + > + > +/* > + * Activate a guest. > + * > + * This associates the guest's ASID with the handle used to identify > + * crypto contexts managed by the PSP. > + */ > +int > +psp_activate(uint32_t handle, uint32_t asid) > +{ > + struct psp_activate act; > + > + act.handle = handle; > + act.asid = asid; > + > + if (ioctl(env->vmd_ccp_fd, PSP_IOC_ACTIVATE, &act) < 0) { > + log_warn("%s: ioctl", __func__); > + return (-1); > + } > + > + return (0); > +} > + > + > +/* > + * Deactivate and decommission a guest. > + * > + * This deassociates the guest's ASID from the crypto contexts in > + * the PSP. Then the PSP releases the crypto contexts (i.e. deletes > + * keys). > + */ > +int > +psp_guest_shutdown(uint32_t handle) > +{ > + struct psp_guest_shutdown gshutdown; > + > + gshutdown.handle = handle; > + > + if (ioctl(env->vmd_ccp_fd, PSP_IOC_GUEST_SHUTDOWN, &gshutdown) < 0) { > + log_warn("%s: ioctl", __func__); > + return (-1); > + } > + > + return (0); > +} > diff --git a/usr.sbin/vmd/sev.c b/usr.sbin/vmd/sev.c > new file mode 100644 > index 00000000000..7dc649fc448 > --- /dev/null > +++ b/usr.sbin/vmd/sev.c > @@ -0,0 +1,246 @@ > +/* $OpenBSD: $ */ > + > +/* > + * Copyright (c) 2023, 2024 Hans-Joerg Hoexer > + * > + * Permission to use, copy, modify, and distribute this software for any > + * purpose with or without fee is hereby granted, provided that the above > + * copyright notice and this permission notice appear in all copies. > + * > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > + */ > + > +#include > +#include > +#include > +#include > + > +#include > +#include > +#include > + > +#include > + > +#include "vmd.h" > + > +extern struct vmd_vm *current_vm; > + > +/* > + * Prepare guest to use SEV. > + * > + * This asks the PSP to create a new crypto contexts including a s/contexts/context/ > + * memory encryption key and assign a handle to the context. > + * > + * When the PSP driver ccp(4) attaches, it initializes the platform. > + * If this fails for whatever reason we can not run a guest using SEV. > + */ > +int > +sev_init(struct vmd_vm *vm) > +{ > + struct vmop_create_params *vmc = &vm->vm_params; > + struct vm_create_params *vcp = &vmc->vmc_params; > + uint32_t handle; > + uint16_t pstate; > + uint8_t gstate; > + > + if (!vcp->vcp_sev) > + return (0); > + > + if (psp_get_pstate(&pstate)) { > + log_warnx("%s: failed to get platform state", __func__); > + return (-1); > + } > + if (pstate == PSP_PSTATE_UNINIT) { > + log_warnx("%s: platform uninitialized", __func__); > + return (-1); > + } > + > + if (psp_launch_start(&handle) < 0) { > + log_warnx("%s: launch failed", __func__); > + return (-1); > + }; > + vm->vm_sev_handle = handle; > + > + if (psp_get_gstate(vm->vm_sev_handle, NULL, NULL, &gstate)) { > + log_warnx("%s: failed to get guest state", __func__); > + return (-1); > + } > + if (gstate != PSP_GSTATE_LUPDATE) { > + log_warnx("%s: invalid guest state: 0x%hx", __func__, gstate); > + return (-1); > + } > + > + return (0); > +} > + > +/* > + * Record memory segments to be encrypted for SEV. > + */ > +int > +sev_add_memsegment(vaddr_t addr, size_t size) > +{ > + struct vmop_create_params *vmc; > + struct vm_create_params *vcp; > + struct vm_mem_range *vmr; > + size_t off; > + int i; > + > + vmc = ¤t_vm->vm_params; > + vcp = &vmc->vmc_params; > + > + if (!vcp->vcp_sev) > + return (0); > + > + if (size == 0) > + return (0); > + > + /* Adjust address and size to be aligend to AES_XTS_BLOCKSIZE. */ > + if (addr & (AES_XTS_BLOCKSIZE - 1)) { > + size += (addr & (AES_XTS_BLOCKSIZE - 1)); > + addr &= ~(AES_XTS_BLOCKSIZE - 1); > + } > + > + vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, addr, size); > + if (vmr == NULL) { > + log_warnx("%s: failed - invalid memory range addr = 0x%lx, " > + "len = 0x%zx", __func__, addr, size); > + return (-1); > + } > + if (current_vm->vm_sev_nmemsegments == > + nitems(current_vm->vm_sev_memsegments)) { > + log_warnx("%s: failed - out of SEV memory segments", __func__); > + return (-1); > + } > + i = current_vm->vm_sev_nmemsegments++; > + > + off = addr - vmr->vmr_gpa; > + > + current_vm->vm_sev_memsegments[i].vmr_va = vmr->vmr_va + off; > + current_vm->vm_sev_memsegments[i].vmr_size = size; > + current_vm->vm_sev_memsegments[i].vmr_gpa = vmr->vmr_gpa + off; > + > + log_debug("%s: i %d addr 0x%lx size 0x%lx vmr_va 0x%lx vmr_gpa 0x%lx " > + "vmr_size 0x%lx", __func__, i, addr, size, > + current_vm->vm_sev_memsegments[i].vmr_va, > + current_vm->vm_sev_memsegments[i].vmr_gpa, > + current_vm->vm_sev_memsegments[i].vmr_size); > + > + return (0); > +} > + > +/* > + * Encrypt and measure previously recorded memroy segments. > + * > + * This encrypts the memory initially used by the guest. This > + * includes the ELF kernel image, initial stack and page tables. I think we should just say "kernel or BIOS image" because this implies the feature only works with vmctl -b. > + * > + * We also ask the PSP to provide a measurement. However, right > + * now we can not really verify it. > + */ > +int > +sev_encrypt_memory(struct vmd_vm *vm) > +{ > + struct vmop_create_params *vmc = &vm->vm_params; > + struct vm_create_params *vcp = &vmc->vmc_params; > + struct vm_mem_range *vmr; > + size_t i; > + uint8_t gstate; > + > + if (!vcp->vcp_sev) > + return (0); > + > + for (i = 0; i < vm->vm_sev_nmemsegments; i++) { > + vmr = &vm->vm_sev_memsegments[i]; > + > + /* tell PSP to encrypt this range */ > + if (psp_launch_update(vm->vm_sev_handle, vmr->vmr_va, > + roundup(vmr->vmr_size, AES_XTS_BLOCKSIZE))) { > + log_warnx("%s: failed to launch update page " > + "%zu:0x%lx", __func__, i, vmr->vmr_va); > + return (-1); > + } > + > + log_debug("%s: encrypted %zu:0x%lx size 0x%lx", __func__, i, > + vmr->vmr_va, vmr->vmr_size); > + } > + if (psp_launch_measure(vm->vm_sev_handle)) { > + log_warnx("%s: failed to launch measure", __func__); > + return (-1); > + } > + if (psp_launch_finish(vm->vm_sev_handle)) { > + log_warnx("%s: failed to launch finish", __func__); > + return (-1); > + } > + > + if (psp_get_gstate(vm->vm_sev_handle, NULL, NULL, &gstate)) { > + log_warnx("%s: failed to get guest state", __func__); > + return (-1); > + } > + if (gstate != PSP_GSTATE_RUNNING) { > + log_warnx("%s: invalid guest state: 0x%hx", __func__, gstate); > + return (-1); > + } > + > + return (0); > +} > + > + > +/* > + * Activate a guest's SEV crypto state. > + */ > +int > +sev_activate(struct vmd_vm *vm, int vcpu_id) > +{ > + struct vmop_create_params *vmc = &vm->vm_params; > + struct vm_create_params *vcp = &vmc->vmc_params; > + uint8_t gstate; > + > + if (!vcp->vcp_sev) > + return (0); > + > + if (psp_df_flush() || > + psp_activate(vm->vm_sev_handle, vm->vm_sev_asid[vcpu_id])) { > + log_warnx("%s: failed to activate guest: 0x%x:0x%x", __func__, > + vm->vm_sev_handle, vm->vm_sev_asid[vcpu_id]); > + return (-1); > + } > + > + if (psp_get_gstate(vm->vm_sev_handle, NULL, NULL, &gstate)) { > + log_warnx("%s: failed to get guest state", __func__); > + return (-1); > + } > + if (gstate != PSP_GSTATE_LUPDATE) { > + log_warnx("%s: invalid guest state: 0x%hx", __func__, gstate); > + return (-1); > + } > + > + return (0); > +} > + > + > +/* > + * Deactivate and decommission a guest's SEV crypto state. > + */ > +int > +sev_shutdown(struct vmd_vm *vm) > +{ > + struct vmop_create_params *vmc = &vm->vm_params; > + struct vm_create_params *vcp = &vmc->vmc_params; > + > + if (!vcp->vcp_sev) > + return (0); > + > + if (psp_guest_shutdown(vm->vm_sev_handle)) { > + log_warnx("failed to deactivate guest"); > + return (-1); > + } > + vm->vm_sev_handle = 0; > + > + return (0); > +} > diff --git a/usr.sbin/vmd/vm.c b/usr.sbin/vmd/vm.c > index e8c73b0e053..c46f46a17cf 100644 > --- a/usr.sbin/vmd/vm.c > +++ b/usr.sbin/vmd/vm.c > @@ -48,6 +48,7 @@ > #include > > #include "atomicio.h" > +#include "loadfile.h" > #include "mmio.h" > #include "pci.h" > #include "virtio.h" > @@ -163,6 +164,11 @@ vm_main(int fd, int fd_vmm) > } > } > > + if (vcp->vcp_sev && env->vmd_ccp_fd < 0) { > + log_warnx("%s not available", CCP_NODE); > + _exit(EINVAL); > + } > + > ret = start_vm(&vm, fd); > _exit(ret); > } > @@ -230,6 +236,13 @@ start_vm(struct vmd_vm *vm, int fd) > return (ret); > } > > + /* Setup SEV. */ > + ret = sev_init(vm); > + if (ret) { > + log_warnx("could not initialize SEV"); > + return (ret); > + } > + > /* > * Some of vmd currently relies on global state (current_vm, con_fd). > */ > @@ -318,6 +331,10 @@ start_vm(struct vmd_vm *vm, int fd) > */ > ret = run_vm(&vm->vm_params, &vrs); > > + /* Shutdown SEV. */ > + if (sev_shutdown(vm)) > + log_warnx("%s: could not shutdown SEV", __func__); > + > /* Ensure that any in-flight data is written back */ > virtio_shutdown(vm); > > @@ -456,6 +473,9 @@ vm_shutdown(unsigned int cmd) > } > imsg_flush(¤t_vm->vm_iev.ibuf); > > + if (sev_shutdown(current_vm)) > + log_warnx("%s: could not shutdown SEV", __func__); > + > _exit(0); > } > > @@ -820,6 +840,7 @@ static int > vmm_create_vm(struct vmd_vm *vm) > { > struct vm_create_params *vcp = &vm->vm_params.vmc_params; > + size_t i; > > /* Sanity check arguments */ > if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM) > @@ -838,6 +859,9 @@ vmm_create_vm(struct vmd_vm *vm) > if (ioctl(env->vmd_fd, VMM_IOC_CREATE, vcp) == -1) > return (errno); > > + for (i = 0; i < vcp->vcp_ncpus; i++) > + vm->vm_sev_asid[i] = vcp->vcp_asid[i]; > + > return (0); > } > > @@ -920,6 +944,18 @@ run_vm(struct vmop_create_params *vmc, struct vcpu_reg_state *vrs) > return (EIO); > } > > + if (sev_activate(current_vm, i)) { > + log_warnx("%s: SEV activatation failed for VCPU " > + "%zu failed - exiting.", __progname, i); > + return (EIO); > + } > + > + if (sev_encrypt_memory(current_vm)) { > + log_warnx("%s: memory encryption failed for VCPU " > + "%zu failed - exiting.", __progname, i); > + return (EIO); > + } > + > /* once more because reset_cpu changes regs */ > if (current_vm->vm_state & VM_STATE_RECEIVED) { > vregsp.vrwp_vm_id = vcp->vcp_id; > diff --git a/usr.sbin/vmd/vm.conf.5 b/usr.sbin/vmd/vm.conf.5 > index ed6cd41df64..e07ba35103b 100644 > --- a/usr.sbin/vmd/vm.conf.5 > +++ b/usr.sbin/vmd/vm.conf.5 > @@ -323,6 +323,8 @@ If only > .Pf : Ar group > is given, > only the group is set. > +.It Ic sev > +Enables SEV for guest. > .El > .Sh VM INSTANCES > It is possible to use configured or running VMs as a template for Do we want a .Xr for ccp(4) here? Shrug. Your call. > diff --git a/usr.sbin/vmd/vmd.c b/usr.sbin/vmd/vmd.c > index 232bc82d8d2..d37abf2364f 100644 > --- a/usr.sbin/vmd/vmd.c > +++ b/usr.sbin/vmd/vmd.c > @@ -661,7 +661,7 @@ main(int argc, char **argv) > int ch; > enum privsep_procid proc_id = PROC_PARENT; > int proc_instance = 0, vm_launch = 0; > - int vmm_fd = -1, vm_fd = -1; > + int vmm_fd = -1, vm_fd = -1, ccp_fd = -1; > const char *errp, *title = NULL; > int argc0 = argc; > char dev_type = '\0'; > @@ -673,7 +673,7 @@ main(int argc, char **argv) > env->vmd_fd = -1; > env->vmd_fd6 = -1; > > - while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:nt:vp:")) != -1) { > + while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:j:nt:vp:")) != -1) { > switch (ch) { > case 'D': > if (cmdline_symset(optarg) < 0) > @@ -735,6 +735,12 @@ main(int argc, char **argv) > if (errp) > fatalx("invalid vmm fd"); > break; > + case 'j': > + /* -1 means no PSP available */ > + ccp_fd = strtonum(optarg, -1, 128, &errp); > + if (errp) > + fatalx("invalid psp fd"); > + break; > default: > usage(); > } > @@ -763,6 +769,7 @@ main(int argc, char **argv) > > ps = &env->vmd_ps; > ps->ps_env = env; > + env->vmd_ccp_fd = ccp_fd; > > if (config_init(env) == -1) > fatal("failed to initialize configuration"); > @@ -837,6 +844,12 @@ main(int argc, char **argv) > if (!env->vmd_noaction) > proc_connect(ps); > > + if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) { > + env->vmd_ccp_fd = open(CCP_NODE, O_RDWR); > + if (env->vmd_ccp_fd == -1) > + log_debug("%s: failed to open %s", __func__, CCP_NODE); > + } > + > if (vmd_configure() == -1) > fatalx("configuration failed"); > > @@ -917,6 +930,12 @@ vmd_configure(void) > proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1, > IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0); > > + /* Send PSP device fd to vmm proc. */ > + if (env->vmd_ccp_fd != -1) { > + proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1, > + IMSG_VMDOP_RECEIVE_CCP_FD, -1, env->vmd_ccp_fd, NULL, 0); > + } > + > /* Send shared global configuration to all children */ > if (config_setconfig(env) == -1) > return (-1); > diff --git a/usr.sbin/vmd/vmd.h b/usr.sbin/vmd/vmd.h > index 2f2056541c8..e6660585054 100644 > --- a/usr.sbin/vmd/vmd.h > +++ b/usr.sbin/vmd/vmd.h > @@ -50,6 +50,7 @@ > #define VMD_CONF "/etc/vm.conf" > #define SOCKET_NAME "/var/run/vmd.sock" > #define VMM_NODE "/dev/vmm" > +#define CCP_NODE "/dev/ccp" > #define VM_DEFAULT_BIOS "/etc/firmware/vmm-bios" > #define VM_DEFAULT_KERNEL "/bsd" > #define VM_DEFAULT_DEVICE "hd0a" > @@ -131,6 +132,7 @@ enum imsg_type { > IMSG_VMDOP_GET_INFO_VM_END_DATA, > IMSG_VMDOP_LOAD, > IMSG_VMDOP_RECEIVE_VMM_FD, > + IMSG_VMDOP_RECEIVE_CCP_FD, > IMSG_VMDOP_RELOAD, > IMSG_VMDOP_PRIV_IFDESCR, > IMSG_VMDOP_PRIV_IFADD, > @@ -305,6 +307,12 @@ struct vmd_vm { > struct vmop_create_params vm_params; > pid_t vm_pid; > uint32_t vm_vmid; > + uint32_t vm_sev_handle; > + uint32_t vm_sev_asid[VMM_MAX_VCPUS_PER_VM]; > + > +#define VM_SEV_NSEGMENTS 128 > + size_t vm_sev_nmemsegments; > + struct vm_mem_range vm_sev_memsegments[VM_SEV_NSEGMENTS]; > > int vm_kernel; > char *vm_kernel_path; /* Used by vm.conf. */ > @@ -398,6 +406,7 @@ struct vmd { > int vmd_fd; > int vmd_fd6; > int vmd_ptmfd; > + int vmd_ccp_fd; > }; > > struct vm_dev_pipe { > @@ -508,6 +517,8 @@ void unpause_vm_md(struct vmd_vm *); > int dump_devs(int); > int dump_send_header(int); > void *hvaddr_mem(paddr_t, size_t); > +struct vm_mem_range * > + find_gpa_range(struct vm_create_params *, paddr_t, size_t); > int write_mem(paddr_t, const void *, size_t); > int read_mem(paddr_t, void *, size_t); > int intr_ack(struct vmd_vm *); > @@ -538,6 +549,8 @@ void vm_pipe_init2(struct vm_dev_pipe *, void (*)(int, short, void *), > void *); > void vm_pipe_send(struct vm_dev_pipe *, enum pipe_msg_type); > enum pipe_msg_type vm_pipe_recv(struct vm_dev_pipe *); > +int write_mem(paddr_t, const void *buf, size_t); > +int write_mem_enc(paddr_t, const void *buf, size_t); > int remap_guest_mem(struct vmd_vm *, int); > __dead void vm_shutdown(unsigned int); > > @@ -573,4 +586,22 @@ __dead void vionet_main(int, int); > /* vioblk.c */ > __dead void vioblk_main(int, int); > > +/* psp.c */ > +int psp_get_pstate(uint16_t *); > +int psp_df_flush(void); > +int psp_get_gstate(uint32_t, uint32_t *, uint32_t *, uint8_t *); > +int psp_launch_start(uint32_t *); > +int psp_launch_update(uint32_t, vaddr_t, size_t); > +int psp_launch_measure(uint32_t); > +int psp_launch_finish(uint32_t); > +int psp_activate(uint32_t, uint32_t); > +int psp_guest_shutdown(uint32_t); > + > +/* sev.c */ > +int sev_init(struct vmd_vm *); > +int sev_add_memsegment(vaddr_t, size_t); > +int sev_encrypt_memory(struct vmd_vm *); > +int sev_activate(struct vmd_vm *, int); > +int sev_shutdown(struct vmd_vm *); > + > #endif /* VMD_H */ > diff --git a/usr.sbin/vmd/vmm.c b/usr.sbin/vmd/vmm.c > index 6a98e43f751..e4dd6f7b6bb 100644 > --- a/usr.sbin/vmd/vmm.c > +++ b/usr.sbin/vmd/vmm.c > @@ -325,6 +325,11 @@ vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg) > /* Get and terminate all running VMs */ > get_info_vm(ps, NULL, 1); > break; > + case IMSG_VMDOP_RECEIVE_CCP_FD: > + if (env->vmd_ccp_fd > -1) > + fatalx("already received psp fd"); > + env->vmd_ccp_fd = imsg->fd; > + break; > default: > return (-1); > } > @@ -645,7 +650,7 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) > { > struct vm_create_params *vcp; > struct vmd_vm *vm; > - char *nargv[8], num[32], vmm_fd[32]; > + char *nargv[10], num[32], vmm_fd[32], ccp_fd[32]; > int fd, ret = EINVAL; > int fds[2]; > pid_t vm_pid; > @@ -760,6 +765,9 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) > close(fd); > } > > + if (env->vmd_ccp_fd > 0) > + fcntl(env->vmd_ccp_fd, F_SETFD, 0); /* psp device fd */ > + > /* > * Prepare our new argv for execvp(2) with the fd of our open > * pipe to the parent/vmm process as an argument. > @@ -769,6 +777,8 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) > snprintf(num, sizeof(num), "%d", fds[1]); > memset(vmm_fd, 0, sizeof(vmm_fd)); > snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd); > + memset(ccp_fd, 0, sizeof(ccp_fd)); > + snprintf(ccp_fd, sizeof(ccp_fd), "%d", env->vmd_ccp_fd); > > nargv[0] = env->argv0; > nargv[1] = "-V"; > @@ -776,14 +786,16 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid) > nargv[3] = "-n"; > nargv[4] = "-i"; > nargv[5] = vmm_fd; > - nargv[6] = NULL; > + nargv[6] = "-j"; > + nargv[7] = ccp_fd; > + nargv[8] = NULL; > > if (env->vmd_verbose == 1) { > - nargv[6] = VMD_VERBOSE_1; > - nargv[7] = NULL; > + nargv[8] = VMD_VERBOSE_1; > + nargv[9] = NULL; > } else if (env->vmd_verbose > 1) { > - nargv[6] = VMD_VERBOSE_2; > - nargv[7] = NULL; > + nargv[8] = VMD_VERBOSE_2; > + nargv[9] = NULL; > } > > /* Control resumes in vmd main(). */ > diff --git a/usr.sbin/vmd/x86_vm.c b/usr.sbin/vmd/x86_vm.c > index d0caf98dd12..7dbe53533d9 100644 > --- a/usr.sbin/vmd/x86_vm.c > +++ b/usr.sbin/vmd/x86_vm.c > @@ -52,8 +52,6 @@ extern char *__progname; > void create_memory_map(struct vm_create_params *); > int translate_gva(struct vm_exit*, uint64_t, uint64_t *, int); > > -static struct vm_mem_range *find_gpa_range(struct vm_create_params *, paddr_t, > - size_t); > static int loadfile_bios(gzFile, off_t, struct vcpu_reg_state *); > static int vcpu_exit_eptviolation(struct vm_run_params *); > static void vcpu_exit_inout(struct vm_run_params *); > @@ -792,7 +790,7 @@ vcpu_exit_pci(struct vm_run_params *vrp) > * NULL: on failure if there is no memory range as described by the parameters > * Pointer to vm_mem_range that contains the start of the range otherwise. > */ > -static struct vm_mem_range * > +struct vm_mem_range * > find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len) > { > size_t i, n; > @@ -838,7 +836,7 @@ find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len) > return (vmr); > } > /* > - * write_mem > + * _write_mem > * > * Copies data from 'buf' into the guest VM's memory at paddr 'dst'. > * > @@ -846,14 +844,15 @@ find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len) > * dst: the destination paddr_t in the guest VM > * buf: data to copy (or NULL to zero the data) > * len: number of bytes to copy > + * enc: prepare for SEV encryption > * > * Return values: > * 0: success > * EINVAL: if the guest physical memory range [dst, dst + len) does not > * exist in the guest. > */ > -int > -write_mem(paddr_t dst, const void *buf, size_t len) > +static int > +_write_mem(paddr_t dst, const void *buf, size_t len, int enc) > { > const char *from = buf; > char *to; > @@ -868,6 +867,9 @@ write_mem(paddr_t dst, const void *buf, size_t len) > return (EINVAL); > } > > + if (enc && sev_add_memsegment((vaddr_t)dst, len) < 0) > + return (EINVAL); > + With the bulk of the changes above (all write_mem becoming write_mem_enc), this unfairly penalizes any write_mem on non-SEV hardware as we need to now route through sev_add_memsegment every time. Sure, it short-circuits immediately when it checks for SEV, but the default path should be the fastest for the non-SEV case, IMO. > off = dst - vmr->vmr_gpa; > while (len != 0) { > n = vmr->vmr_size - off; > @@ -889,6 +891,18 @@ write_mem(paddr_t dst, const void *buf, size_t len) > return (0); > } > > +int > +write_mem(paddr_t dst, const void *buf, size_t len) > +{ > + return _write_mem(dst, buf, len, 0); > +} > + > +int > +write_mem_enc(paddr_t dst, const void *buf, size_t len) > +{ > + return _write_mem(dst, buf, len, 1); > +} > + > /* > * read_mem > *