Download raw body.
AMD SEV 5/5: Add support to vmd(8)
On Wed, Aug 28, 2024 at 03:27:09PM +0200, Hans-Jörg Höxer wrote:
> Hi,
>
> this change adds the necessary pieces to vmd(8). To enable SEV, one
> can add the new parameter "sev" to a vm section in vm.conf.
>
> Take care,
> HJ.
> --------------------------------------------------------------------------
> commit d1022be5060e1149d363795e9210ee01cdc7e307
> Author: Hans-Joerg Hoexer <hshoexer@genua.de>
> Date: Thu Jul 11 16:20:59 2024 +0200
>
> vmd(8): initial AMD SEV support
>
> To launch a guest with AMD SEV enabled, vmd needs to do a few things:
>
> - retrieve ASID used by guest on VM creation
> - provide ASID to ccp(4)
> - let ccp(4) encrypt memory used intially by guest
> - run guest
> - release resources held by ccp(4) on guest shutdown
>
> To enable SEV for a guest use the parameter "vm" in the guest's vm
> section in vm.conf.
If you're going to copy this commit message, I think you meant "sev" here
and not "vm".
See other comments below. ok mlarkin once these changes made.
-ml
>
> diff --git a/usr.sbin/vmd/Makefile b/usr.sbin/vmd/Makefile
> index 22c1e887823..a0d5ae00e62 100644
> --- a/usr.sbin/vmd/Makefile
> +++ b/usr.sbin/vmd/Makefile
> @@ -11,6 +11,7 @@ SRCS+= vionet.c
> .if ${MACHINE} == "amd64"
> SRCS+= i8253.c i8259.c fw_cfg.c loadfile_elf.c mc146818.c ns8250.c
> SRCS+= x86_vm.c x86_mmio.c
> +SRCS+= psp.c sev.c
> .endif # amd64
> .if ${MACHINE} == "arm64"
> SRCS+= arm64_vm.c
> diff --git a/usr.sbin/vmd/arm64_vm.c b/usr.sbin/vmd/arm64_vm.c
> index 282dbcb4985..6d176ac4cfc 100644
> --- a/usr.sbin/vmd/arm64_vm.c
> +++ b/usr.sbin/vmd/arm64_vm.c
> @@ -160,3 +160,51 @@ vcpu_exit_pci(struct vm_run_params *vrp)
> /* NOTREACHED */
> return (0xff);
> }
> +
> +void
> +set_return_data(struct vm_exit *vei, uint32_t data)
> +{
> + fatalx("%s: unimplemented", __func__);
> + /* NOTREACHED */
> + return;
> +}
> +
> +void
> +get_input_data(struct vm_exit *vei, uint32_t *data)
> +{
> + fatalx("%s: unimplemented", __func__);
> + /* NOTREACHED */
> + return;
> +}
> +
> +int
> +sev_init(struct vmd_vm *vm)
> +{
> + fatalx("%s: unimplemented", __func__);
> + /* NOTREACHED */
> + return (-1);
> +}
> +
> +int
> +sev_shutdown(struct vmd_vm *vm)
> +{
> + fatalx("%s: unimplemented", __func__);
> + /* NOTREACHED */
> + return (-1);
> +}
> +
> +int
> +sev_activate(struct vmd_vm *vm, int vcpu_id)
> +{
> + fatalx("%s: unimplemented", __func__);
> + /* NOTREACHED */
> + return (-1);
> +}
> +
> +int
> +sev_encrypt_memory(struct vmd_vm *vm)
> +{
> + fatalx("%s: unimplemented", __func__);
> + /* NOTREACHED */
> + return (-1);
> +}
> diff --git a/usr.sbin/vmd/loadfile_elf.c b/usr.sbin/vmd/loadfile_elf.c
> index 166aa04c5e1..8d4a70a1148 100644
> --- a/usr.sbin/vmd/loadfile_elf.c
> +++ b/usr.sbin/vmd/loadfile_elf.c
> @@ -130,6 +130,8 @@ static void mbcopy(void *, paddr_t, int);
> extern char *__progname;
> extern int vm_id;
>
> +uint64_t pg_crypt = 0;
> +
> /*
> * setsegment
> *
> @@ -193,7 +195,7 @@ push_gdt(void)
> setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
> setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
>
> - write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
> + write_mem_enc(GDT_PAGE, gdtpage, PAGE_SIZE);
The majority of the cases will be non-encrypted; IMO we should leave the name
as write_mem and have that function determine whether or not to encrypt based
on the setting. There are a bunch of these.
> }
>
> /*
> @@ -229,20 +231,21 @@ push_pt_64(void)
>
> /* PDPDE0 - first 1GB */
> memset(ptes, 0, sizeof(ptes));
> - ptes[0] = PG_V | PML3_PAGE;
> - write_mem(PML4_PAGE, ptes, PAGE_SIZE);
> + ptes[0] = pg_crypt | PG_V | PML3_PAGE;
> + write_mem_enc(PML4_PAGE, ptes, PAGE_SIZE);
>
> /* PDE0 - first 1GB */
> memset(ptes, 0, sizeof(ptes));
> - ptes[0] = PG_V | PG_RW | PG_u | PML2_PAGE;
> - write_mem(PML3_PAGE, ptes, PAGE_SIZE);
> + ptes[0] = pg_crypt | PG_V | PG_RW | PG_u | PML2_PAGE;
> + write_mem_enc(PML3_PAGE, ptes, PAGE_SIZE);
>
> /* First 1GB (in 2MB pages) */
> memset(ptes, 0, sizeof(ptes));
> for (i = 0 ; i < 512; i++) {
> - ptes[i] = PG_V | PG_RW | PG_u | PG_PS | ((2048 * 1024) * i);
> + ptes[i] = pg_crypt | PG_V | PG_RW | PG_u | PG_PS |
> + ((2048 * 1024) * i);
> }
> - write_mem(PML2_PAGE, ptes, PAGE_SIZE);
> + write_mem_enc(PML2_PAGE, ptes, PAGE_SIZE);
> }
>
> /*
> @@ -300,8 +303,18 @@ loadfile_elf(gzFile fp, struct vmd_vm *vm, struct vcpu_reg_state *vrs,
> vrs->vrs_crs[VCPU_REGS_CR4] = CR4_PSE;
> vrs->vrs_msrs[VCPU_REGS_EFER] = 0ULL;
> }
> - else
> + else {
> + if (vcp->vcp_sev) {
> + if (vcp->vcp_poscbit == 0) {
> + log_warnx("SEV enabled but no C-bit reported");
> + return 1;
> + }
> + pg_crypt = (1ULL << vcp->vcp_poscbit);
> + log_debug("%s: poscbit %d pg_crypt 0x%016llx",
> + __func__, vcp->vcp_poscbit, pg_crypt);
> + }
> push_pt_64();
> + }
>
> if (bootdevice == VMBOOTDEV_NET) {
> bootmac = &bm;
> @@ -412,7 +425,7 @@ push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac)
>
> ba[i++] = 0xFFFFFFFF; /* BOOTARG_END */
>
> - write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
> + write_mem_enc(BOOTARGS_PAGE, ba, PAGE_SIZE);
>
> return (i * sizeof(uint32_t));
> }
> @@ -462,7 +475,7 @@ push_stack(uint32_t bootargsz, uint32_t end)
> stack[--loc] = MAKEBOOTDEV(0x4, 0, 0, 0, 0); /* bootdev: sd0a */
> stack[--loc] = 0;
>
> - write_mem(STACK_PAGE, &stack, PAGE_SIZE);
> + write_mem_enc(STACK_PAGE, &stack, PAGE_SIZE);
>
> return (1024 - (loc - 1)) * sizeof(uint32_t);
> }
> @@ -490,6 +503,8 @@ mread(gzFile fp, paddr_t addr, size_t sz)
> size_t i, osz;
> char buf[PAGE_SIZE];
>
> + sev_add_memsegment(addr, sz);
> +
I would prefer this be named something else to account for other potential
implementations later. Also, at first glance, I was mistakenly thinking that
this really is adding a new memory range to the VM. It's not, it's just
informing ccp(4) that there is a new area of guest memory that needs to be
encrypted. That's fine, and needed, but maybe a better name can be invented.
> /*
> * break up the 'sz' bytes into PAGE_SIZE chunks for use with
> * write_mem
> @@ -565,6 +580,8 @@ marc4random_buf(paddr_t addr, int sz)
> int i, ct;
> char buf[PAGE_SIZE];
>
> + sev_add_memsegment(addr, sz);
> +
> /*
> * break up the 'sz' bytes into PAGE_SIZE chunks for use with
> * write_mem
> @@ -612,7 +629,7 @@ marc4random_buf(paddr_t addr, int sz)
> static void
> mbzero(paddr_t addr, int sz)
> {
> - if (write_mem(addr, NULL, sz))
> + if (write_mem_enc(addr, NULL, sz))
> return;
> }
>
> @@ -632,7 +649,7 @@ mbzero(paddr_t addr, int sz)
> static void
> mbcopy(void *src, paddr_t dst, int sz)
> {
> - write_mem(dst, src, sz);
> + write_mem_enc(dst, src, sz);
> }
>
> /*
> diff --git a/usr.sbin/vmd/parse.y b/usr.sbin/vmd/parse.y
> index aacfd635100..24a60c1b924 100644
> --- a/usr.sbin/vmd/parse.y
> +++ b/usr.sbin/vmd/parse.y
> @@ -126,7 +126,7 @@ typedef struct {
> %token FORMAT GROUP
> %token INET6 INSTANCE INTERFACE LLADDR LOCAL LOCKED MEMORY NET NIFS OWNER
> %token PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID STAGGERED START
> -%token PARALLEL DELAY
> +%token PARALLEL DELAY SEV
> %token <v.number> NUMBER
> %token <v.string> STRING
> %type <v.lladdr> lladdr
> @@ -140,6 +140,7 @@ typedef struct {
> %type <v.string> optstring
> %type <v.string> string
> %type <v.string> vm_instance
> +%type <v.number> sev;
>
> %%
>
> @@ -414,6 +415,9 @@ vm_opts_l : vm_opts_l vm_opts nl
> vm_opts : disable {
> vmc_disable = $1;
> }
> + | sev {
> + vcp->vcp_sev = 1;
> + }
> | DISK string image_format {
> if (parse_disk($2, $3) != 0) {
> yyerror("failed to parse disks: %s", $2);
> @@ -757,6 +761,9 @@ disable : ENABLE { $$ = 0; }
> | DISABLE { $$ = 1; }
> ;
>
> +sev : SEV { $$ = 1; }
> + ;
> +
> bootdevice : CDROM { $$ = VMBOOTDEV_CDROM; }
> | DISK { $$ = VMBOOTDEV_DISK; }
> | NET { $$ = VMBOOTDEV_NET; }
> @@ -841,6 +848,7 @@ lookup(char *s)
> { "path", PATH },
> { "prefix", PREFIX },
> { "rdomain", RDOMAIN },
> + { "sev", SEV },
> { "size", SIZE },
> { "socket", SOCKET },
> { "staggered", STAGGERED },
> diff --git a/usr.sbin/vmd/psp.c b/usr.sbin/vmd/psp.c
> new file mode 100644
> index 00000000000..0205d71d34a
> --- /dev/null
> +++ b/usr.sbin/vmd/psp.c
> @@ -0,0 +1,272 @@
> +/* $OpenBSD: $ */
> +
> +/*
> + * Copyright (c) 2023, 2024 Hans-Joerg Hoexer <hshoexer@genua.de>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/device.h>
> +#include <sys/ioctl.h>
> +#include <sys/rwlock.h>
> +
> +#include <machine/bus.h>
> +#include <dev/ic/ccpvar.h>
> +
> +#include <string.h>
> +
> +#include "vmd.h"
> +
> +extern struct vmd *env;
> +
> +/* Guest policy */
> +#define GPOL_NODBG (1ULL << 0) /* no debuggin */
> +#define GPOL_NOKS (1ULL << 1) /* no key sharing */
> +#define GPOL_ES (1ULL << 2) /* SEV-ES required */
> +#define GPOL_NOSEND (1ULL << 3) /* no guest migration */
> +#define GPOL_DOMAIN (1ULL << 4) /* no migration to other domain */
> +#define GPOL_SEV (1ULL << 5) /* no migration to non-SEV platform */
> +
> +
> +/*
> + * Retrieve platform state.
> + */
> +int
> +psp_get_pstate(uint16_t *state)
> +{
> + struct psp_platform_status pst;
> +
> + if (ioctl(env->vmd_ccp_fd, PSP_IOC_GET_PSTATUS, &pst) < 0) {
> + log_warn("%s: ioctl", __func__);
> + return (-1);
> + }
> +
> + if (state)
> + *state = pst.state;
> +
> + return (0);
> +}
> +
> +
> +/*
> + * Flush data fabrics of all cores.
> + *
> + * This ensures all data of a SEV enabled guest is committed to
> + * memory. This needs to be done before an ASID is assigend to
> + * guest using psp_activate().
> + */
> +int
> +psp_df_flush(void)
> +{
> + if (ioctl(env->vmd_ccp_fd, PSP_IOC_DF_FLUSH) < 0) {
> + log_warn("%s: ioctl", __func__);
> + return (-1);
> + }
> +
> + return (0);
> +}
> +
> +
> +/*
> + * Retrieve guest state.
> + */
> +int
> +psp_get_gstate(uint32_t handle, uint32_t *policy, uint32_t *asid,
> + uint8_t *state)
> +{
> + struct psp_guest_status gst;
> +
> + memset(&gst, 0, sizeof(gst));
> + gst.handle = handle;
> +
> + if (ioctl(env->vmd_ccp_fd, PSP_IOC_GET_GSTATUS, &gst) < 0) {
> + log_warn("%s: ioctl", __func__);
> + return (-1);
> + }
> +
> + if (policy)
> + *policy = gst.policy;
> + if (asid)
> + *asid = gst.asid;
> + if (state)
> + *state = gst.state;
> +
> + return (0);
> +}
> +
> +
> +/*
> + * Start the launch sequence of a guest.
> + */
> +int
> +psp_launch_start(uint32_t *handle)
> +{
> + struct psp_launch_start ls;
> +
> + memset(&ls, 0, sizeof(ls));
> +
> + /* Set guest policy. */
> + ls.policy = (GPOL_NODBG | GPOL_NOKS | GPOL_NOSEND | GPOL_DOMAIN |
> + GPOL_SEV);
> +
> + if (ioctl(env->vmd_ccp_fd, PSP_IOC_LAUNCH_START, &ls) < 0) {
> + log_warn("%s: ioctl", __func__);
> + return (-1);
> + }
> +
> + if (handle)
> + *handle = ls.handle;
> +
> + return (0);
> +}
> +
> +
> +/*
> + * Encrypt and measure a memory range.
> + */
> +int
> +psp_launch_update(uint32_t handle, vaddr_t v, size_t len)
> +{
> + struct psp_launch_update_data lud;
> +
> + memset(&lud, 0, sizeof(lud));
> + lud.handle = handle;
> + lud.paddr = v; /* will be converted to paddr */
> + lud.length = len;
> +
> + if (ioctl(env->vmd_ccp_fd, PSP_IOC_LAUNCH_UPDATE_DATA, &lud) < 0) {
> + log_warn("%s: ioctl", __func__);
> + return (-1);
> + }
> +
> + return (0);
> +}
> +
> +
> +/*
> + * Finalize and return memory measurement.
> + *
> + * We ask the PSP to provide a measurement (HMAC) over the encrypted
> + * memory. As we do not yet negotiate a shared integrity key with
> + * the PSP, the measurement is not really meaningful. Thus we just
> + * log it for now.
> + */
> +int
> +psp_launch_measure(uint32_t handle)
> +{
> + struct psp_launch_measure lm;
> + char *p, buf[256];
> + size_t len;
> + unsigned int i;
> +
> + memset(&lm, 0, sizeof(lm));
> + lm.handle = handle;
> + lm.measure_len = sizeof(lm.psp_measure);
> + memset(lm.measure, 0, sizeof(lm.measure));
> + memset(lm.measure_nonce, 0, sizeof(lm.measure_nonce));
> +
> + if (ioctl(env->vmd_ccp_fd, PSP_IOC_LAUNCH_MEASURE, &lm) < 0) {
> + log_warn("%s: ioctl", __func__);
> + return (-1);
> + }
> +
> + /*
> + * We can not verify the measurement, yet. Therefore just
> + * log it.
> + */
> + len = sizeof(buf);
> + memset(buf, 0, len);
> + p = buf;
> + for (i = 0; i < sizeof(lm.measure) && len >= 2;
> + i++, p += 2, len -= 2) {
> + snprintf(p, len, "%02x", lm.measure[i]);
> + }
> + log_info("%s: measurement\t0x%s", __func__, buf);
> +
> + len = sizeof(buf);
> + memset(buf, 0, len);
> + p = buf;
> + for (i = 0; i < sizeof(lm.measure_nonce) && len >= 2;
> + i++, p += 2, len -= 2) {
> + snprintf(p, len, "%02x", lm.measure_nonce[i]);
> + }
> + log_info("%s: nonce\t0x%s", __func__, buf);
> +
> + return (0);
> +}
> +
> +
> +/*
> + * Finalize launch sequence.
> + */
> +int
> +psp_launch_finish(uint32_t handle)
> +{
> + struct psp_launch_finish lf;
> +
> + lf.handle = handle;
> +
> + if (ioctl(env->vmd_ccp_fd, PSP_IOC_LAUNCH_FINISH, &lf) < 0) {
> + log_warn("%s: ioctl", __func__);
> + return (-1);
> + }
> +
> + return (0);
> +}
> +
> +
> +/*
> + * Activate a guest.
> + *
> + * This associates the guest's ASID with the handle used to identify
> + * crypto contexts managed by the PSP.
> + */
> +int
> +psp_activate(uint32_t handle, uint32_t asid)
> +{
> + struct psp_activate act;
> +
> + act.handle = handle;
> + act.asid = asid;
> +
> + if (ioctl(env->vmd_ccp_fd, PSP_IOC_ACTIVATE, &act) < 0) {
> + log_warn("%s: ioctl", __func__);
> + return (-1);
> + }
> +
> + return (0);
> +}
> +
> +
> +/*
> + * Deactivate and decommission a guest.
> + *
> + * This deassociates the guest's ASID from the crypto contexts in
> + * the PSP. Then the PSP releases the crypto contexts (i.e. deletes
> + * keys).
> + */
> +int
> +psp_guest_shutdown(uint32_t handle)
> +{
> + struct psp_guest_shutdown gshutdown;
> +
> + gshutdown.handle = handle;
> +
> + if (ioctl(env->vmd_ccp_fd, PSP_IOC_GUEST_SHUTDOWN, &gshutdown) < 0) {
> + log_warn("%s: ioctl", __func__);
> + return (-1);
> + }
> +
> + return (0);
> +}
> diff --git a/usr.sbin/vmd/sev.c b/usr.sbin/vmd/sev.c
> new file mode 100644
> index 00000000000..7dc649fc448
> --- /dev/null
> +++ b/usr.sbin/vmd/sev.c
> @@ -0,0 +1,246 @@
> +/* $OpenBSD: $ */
> +
> +/*
> + * Copyright (c) 2023, 2024 Hans-Joerg Hoexer <hshoexer@genua.de>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <sys/types.h>
> +#include <sys/device.h>
> +#include <sys/param.h>
> +#include <sys/rwlock.h>
> +
> +#include <machine/bus.h>
> +#include <crypto/xform.h>
> +#include <dev/ic/ccpvar.h>
> +
> +#include <string.h>
> +
> +#include "vmd.h"
> +
> +extern struct vmd_vm *current_vm;
> +
> +/*
> + * Prepare guest to use SEV.
> + *
> + * This asks the PSP to create a new crypto contexts including a
s/contexts/context/
> + * memory encryption key and assign a handle to the context.
> + *
> + * When the PSP driver ccp(4) attaches, it initializes the platform.
> + * If this fails for whatever reason we can not run a guest using SEV.
> + */
> +int
> +sev_init(struct vmd_vm *vm)
> +{
> + struct vmop_create_params *vmc = &vm->vm_params;
> + struct vm_create_params *vcp = &vmc->vmc_params;
> + uint32_t handle;
> + uint16_t pstate;
> + uint8_t gstate;
> +
> + if (!vcp->vcp_sev)
> + return (0);
> +
> + if (psp_get_pstate(&pstate)) {
> + log_warnx("%s: failed to get platform state", __func__);
> + return (-1);
> + }
> + if (pstate == PSP_PSTATE_UNINIT) {
> + log_warnx("%s: platform uninitialized", __func__);
> + return (-1);
> + }
> +
> + if (psp_launch_start(&handle) < 0) {
> + log_warnx("%s: launch failed", __func__);
> + return (-1);
> + };
> + vm->vm_sev_handle = handle;
> +
> + if (psp_get_gstate(vm->vm_sev_handle, NULL, NULL, &gstate)) {
> + log_warnx("%s: failed to get guest state", __func__);
> + return (-1);
> + }
> + if (gstate != PSP_GSTATE_LUPDATE) {
> + log_warnx("%s: invalid guest state: 0x%hx", __func__, gstate);
> + return (-1);
> + }
> +
> + return (0);
> +}
> +
> +/*
> + * Record memory segments to be encrypted for SEV.
> + */
> +int
> +sev_add_memsegment(vaddr_t addr, size_t size)
> +{
> + struct vmop_create_params *vmc;
> + struct vm_create_params *vcp;
> + struct vm_mem_range *vmr;
> + size_t off;
> + int i;
> +
> + vmc = ¤t_vm->vm_params;
> + vcp = &vmc->vmc_params;
> +
> + if (!vcp->vcp_sev)
> + return (0);
> +
> + if (size == 0)
> + return (0);
> +
> + /* Adjust address and size to be aligend to AES_XTS_BLOCKSIZE. */
> + if (addr & (AES_XTS_BLOCKSIZE - 1)) {
> + size += (addr & (AES_XTS_BLOCKSIZE - 1));
> + addr &= ~(AES_XTS_BLOCKSIZE - 1);
> + }
> +
> + vmr = find_gpa_range(¤t_vm->vm_params.vmc_params, addr, size);
> + if (vmr == NULL) {
> + log_warnx("%s: failed - invalid memory range addr = 0x%lx, "
> + "len = 0x%zx", __func__, addr, size);
> + return (-1);
> + }
> + if (current_vm->vm_sev_nmemsegments ==
> + nitems(current_vm->vm_sev_memsegments)) {
> + log_warnx("%s: failed - out of SEV memory segments", __func__);
> + return (-1);
> + }
> + i = current_vm->vm_sev_nmemsegments++;
> +
> + off = addr - vmr->vmr_gpa;
> +
> + current_vm->vm_sev_memsegments[i].vmr_va = vmr->vmr_va + off;
> + current_vm->vm_sev_memsegments[i].vmr_size = size;
> + current_vm->vm_sev_memsegments[i].vmr_gpa = vmr->vmr_gpa + off;
> +
> + log_debug("%s: i %d addr 0x%lx size 0x%lx vmr_va 0x%lx vmr_gpa 0x%lx "
> + "vmr_size 0x%lx", __func__, i, addr, size,
> + current_vm->vm_sev_memsegments[i].vmr_va,
> + current_vm->vm_sev_memsegments[i].vmr_gpa,
> + current_vm->vm_sev_memsegments[i].vmr_size);
> +
> + return (0);
> +}
> +
> +/*
> + * Encrypt and measure previously recorded memroy segments.
> + *
> + * This encrypts the memory initially used by the guest. This
> + * includes the ELF kernel image, initial stack and page tables.
I think we should just say "kernel or BIOS image" because this implies
the feature only works with vmctl -b.
> + *
> + * We also ask the PSP to provide a measurement. However, right
> + * now we can not really verify it.
> + */
> +int
> +sev_encrypt_memory(struct vmd_vm *vm)
> +{
> + struct vmop_create_params *vmc = &vm->vm_params;
> + struct vm_create_params *vcp = &vmc->vmc_params;
> + struct vm_mem_range *vmr;
> + size_t i;
> + uint8_t gstate;
> +
> + if (!vcp->vcp_sev)
> + return (0);
> +
> + for (i = 0; i < vm->vm_sev_nmemsegments; i++) {
> + vmr = &vm->vm_sev_memsegments[i];
> +
> + /* tell PSP to encrypt this range */
> + if (psp_launch_update(vm->vm_sev_handle, vmr->vmr_va,
> + roundup(vmr->vmr_size, AES_XTS_BLOCKSIZE))) {
> + log_warnx("%s: failed to launch update page "
> + "%zu:0x%lx", __func__, i, vmr->vmr_va);
> + return (-1);
> + }
> +
> + log_debug("%s: encrypted %zu:0x%lx size 0x%lx", __func__, i,
> + vmr->vmr_va, vmr->vmr_size);
> + }
> + if (psp_launch_measure(vm->vm_sev_handle)) {
> + log_warnx("%s: failed to launch measure", __func__);
> + return (-1);
> + }
> + if (psp_launch_finish(vm->vm_sev_handle)) {
> + log_warnx("%s: failed to launch finish", __func__);
> + return (-1);
> + }
> +
> + if (psp_get_gstate(vm->vm_sev_handle, NULL, NULL, &gstate)) {
> + log_warnx("%s: failed to get guest state", __func__);
> + return (-1);
> + }
> + if (gstate != PSP_GSTATE_RUNNING) {
> + log_warnx("%s: invalid guest state: 0x%hx", __func__, gstate);
> + return (-1);
> + }
> +
> + return (0);
> +}
> +
> +
> +/*
> + * Activate a guest's SEV crypto state.
> + */
> +int
> +sev_activate(struct vmd_vm *vm, int vcpu_id)
> +{
> + struct vmop_create_params *vmc = &vm->vm_params;
> + struct vm_create_params *vcp = &vmc->vmc_params;
> + uint8_t gstate;
> +
> + if (!vcp->vcp_sev)
> + return (0);
> +
> + if (psp_df_flush() ||
> + psp_activate(vm->vm_sev_handle, vm->vm_sev_asid[vcpu_id])) {
> + log_warnx("%s: failed to activate guest: 0x%x:0x%x", __func__,
> + vm->vm_sev_handle, vm->vm_sev_asid[vcpu_id]);
> + return (-1);
> + }
> +
> + if (psp_get_gstate(vm->vm_sev_handle, NULL, NULL, &gstate)) {
> + log_warnx("%s: failed to get guest state", __func__);
> + return (-1);
> + }
> + if (gstate != PSP_GSTATE_LUPDATE) {
> + log_warnx("%s: invalid guest state: 0x%hx", __func__, gstate);
> + return (-1);
> + }
> +
> + return (0);
> +}
> +
> +
> +/*
> + * Deactivate and decommission a guest's SEV crypto state.
> + */
> +int
> +sev_shutdown(struct vmd_vm *vm)
> +{
> + struct vmop_create_params *vmc = &vm->vm_params;
> + struct vm_create_params *vcp = &vmc->vmc_params;
> +
> + if (!vcp->vcp_sev)
> + return (0);
> +
> + if (psp_guest_shutdown(vm->vm_sev_handle)) {
> + log_warnx("failed to deactivate guest");
> + return (-1);
> + }
> + vm->vm_sev_handle = 0;
> +
> + return (0);
> +}
> diff --git a/usr.sbin/vmd/vm.c b/usr.sbin/vmd/vm.c
> index e8c73b0e053..c46f46a17cf 100644
> --- a/usr.sbin/vmd/vm.c
> +++ b/usr.sbin/vmd/vm.c
> @@ -48,6 +48,7 @@
> #include <util.h>
>
> #include "atomicio.h"
> +#include "loadfile.h"
> #include "mmio.h"
> #include "pci.h"
> #include "virtio.h"
> @@ -163,6 +164,11 @@ vm_main(int fd, int fd_vmm)
> }
> }
>
> + if (vcp->vcp_sev && env->vmd_ccp_fd < 0) {
> + log_warnx("%s not available", CCP_NODE);
> + _exit(EINVAL);
> + }
> +
> ret = start_vm(&vm, fd);
> _exit(ret);
> }
> @@ -230,6 +236,13 @@ start_vm(struct vmd_vm *vm, int fd)
> return (ret);
> }
>
> + /* Setup SEV. */
> + ret = sev_init(vm);
> + if (ret) {
> + log_warnx("could not initialize SEV");
> + return (ret);
> + }
> +
> /*
> * Some of vmd currently relies on global state (current_vm, con_fd).
> */
> @@ -318,6 +331,10 @@ start_vm(struct vmd_vm *vm, int fd)
> */
> ret = run_vm(&vm->vm_params, &vrs);
>
> + /* Shutdown SEV. */
> + if (sev_shutdown(vm))
> + log_warnx("%s: could not shutdown SEV", __func__);
> +
> /* Ensure that any in-flight data is written back */
> virtio_shutdown(vm);
>
> @@ -456,6 +473,9 @@ vm_shutdown(unsigned int cmd)
> }
> imsg_flush(¤t_vm->vm_iev.ibuf);
>
> + if (sev_shutdown(current_vm))
> + log_warnx("%s: could not shutdown SEV", __func__);
> +
> _exit(0);
> }
>
> @@ -820,6 +840,7 @@ static int
> vmm_create_vm(struct vmd_vm *vm)
> {
> struct vm_create_params *vcp = &vm->vm_params.vmc_params;
> + size_t i;
>
> /* Sanity check arguments */
> if (vcp->vcp_ncpus > VMM_MAX_VCPUS_PER_VM)
> @@ -838,6 +859,9 @@ vmm_create_vm(struct vmd_vm *vm)
> if (ioctl(env->vmd_fd, VMM_IOC_CREATE, vcp) == -1)
> return (errno);
>
> + for (i = 0; i < vcp->vcp_ncpus; i++)
> + vm->vm_sev_asid[i] = vcp->vcp_asid[i];
> +
> return (0);
> }
>
> @@ -920,6 +944,18 @@ run_vm(struct vmop_create_params *vmc, struct vcpu_reg_state *vrs)
> return (EIO);
> }
>
> + if (sev_activate(current_vm, i)) {
> + log_warnx("%s: SEV activatation failed for VCPU "
> + "%zu failed - exiting.", __progname, i);
> + return (EIO);
> + }
> +
> + if (sev_encrypt_memory(current_vm)) {
> + log_warnx("%s: memory encryption failed for VCPU "
> + "%zu failed - exiting.", __progname, i);
> + return (EIO);
> + }
> +
> /* once more because reset_cpu changes regs */
> if (current_vm->vm_state & VM_STATE_RECEIVED) {
> vregsp.vrwp_vm_id = vcp->vcp_id;
> diff --git a/usr.sbin/vmd/vm.conf.5 b/usr.sbin/vmd/vm.conf.5
> index ed6cd41df64..e07ba35103b 100644
> --- a/usr.sbin/vmd/vm.conf.5
> +++ b/usr.sbin/vmd/vm.conf.5
> @@ -323,6 +323,8 @@ If only
> .Pf : Ar group
> is given,
> only the group is set.
> +.It Ic sev
> +Enables SEV for guest.
> .El
> .Sh VM INSTANCES
> It is possible to use configured or running VMs as a template for
Do we want a .Xr for ccp(4) here? Shrug. Your call.
> diff --git a/usr.sbin/vmd/vmd.c b/usr.sbin/vmd/vmd.c
> index 232bc82d8d2..d37abf2364f 100644
> --- a/usr.sbin/vmd/vmd.c
> +++ b/usr.sbin/vmd/vmd.c
> @@ -661,7 +661,7 @@ main(int argc, char **argv)
> int ch;
> enum privsep_procid proc_id = PROC_PARENT;
> int proc_instance = 0, vm_launch = 0;
> - int vmm_fd = -1, vm_fd = -1;
> + int vmm_fd = -1, vm_fd = -1, ccp_fd = -1;
> const char *errp, *title = NULL;
> int argc0 = argc;
> char dev_type = '\0';
> @@ -673,7 +673,7 @@ main(int argc, char **argv)
> env->vmd_fd = -1;
> env->vmd_fd6 = -1;
>
> - while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:nt:vp:")) != -1) {
> + while ((ch = getopt(argc, argv, "D:P:I:V:X:df:i:j:nt:vp:")) != -1) {
> switch (ch) {
> case 'D':
> if (cmdline_symset(optarg) < 0)
> @@ -735,6 +735,12 @@ main(int argc, char **argv)
> if (errp)
> fatalx("invalid vmm fd");
> break;
> + case 'j':
> + /* -1 means no PSP available */
> + ccp_fd = strtonum(optarg, -1, 128, &errp);
> + if (errp)
> + fatalx("invalid psp fd");
> + break;
> default:
> usage();
> }
> @@ -763,6 +769,7 @@ main(int argc, char **argv)
>
> ps = &env->vmd_ps;
> ps->ps_env = env;
> + env->vmd_ccp_fd = ccp_fd;
>
> if (config_init(env) == -1)
> fatal("failed to initialize configuration");
> @@ -837,6 +844,12 @@ main(int argc, char **argv)
> if (!env->vmd_noaction)
> proc_connect(ps);
>
> + if (env->vmd_noaction == 0 && proc_id == PROC_PARENT) {
> + env->vmd_ccp_fd = open(CCP_NODE, O_RDWR);
> + if (env->vmd_ccp_fd == -1)
> + log_debug("%s: failed to open %s", __func__, CCP_NODE);
> + }
> +
> if (vmd_configure() == -1)
> fatalx("configuration failed");
>
> @@ -917,6 +930,12 @@ vmd_configure(void)
> proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1,
> IMSG_VMDOP_RECEIVE_VMM_FD, -1, env->vmd_fd, NULL, 0);
>
> + /* Send PSP device fd to vmm proc. */
> + if (env->vmd_ccp_fd != -1) {
> + proc_compose_imsg(&env->vmd_ps, PROC_VMM, -1,
> + IMSG_VMDOP_RECEIVE_CCP_FD, -1, env->vmd_ccp_fd, NULL, 0);
> + }
> +
> /* Send shared global configuration to all children */
> if (config_setconfig(env) == -1)
> return (-1);
> diff --git a/usr.sbin/vmd/vmd.h b/usr.sbin/vmd/vmd.h
> index 2f2056541c8..e6660585054 100644
> --- a/usr.sbin/vmd/vmd.h
> +++ b/usr.sbin/vmd/vmd.h
> @@ -50,6 +50,7 @@
> #define VMD_CONF "/etc/vm.conf"
> #define SOCKET_NAME "/var/run/vmd.sock"
> #define VMM_NODE "/dev/vmm"
> +#define CCP_NODE "/dev/ccp"
> #define VM_DEFAULT_BIOS "/etc/firmware/vmm-bios"
> #define VM_DEFAULT_KERNEL "/bsd"
> #define VM_DEFAULT_DEVICE "hd0a"
> @@ -131,6 +132,7 @@ enum imsg_type {
> IMSG_VMDOP_GET_INFO_VM_END_DATA,
> IMSG_VMDOP_LOAD,
> IMSG_VMDOP_RECEIVE_VMM_FD,
> + IMSG_VMDOP_RECEIVE_CCP_FD,
> IMSG_VMDOP_RELOAD,
> IMSG_VMDOP_PRIV_IFDESCR,
> IMSG_VMDOP_PRIV_IFADD,
> @@ -305,6 +307,12 @@ struct vmd_vm {
> struct vmop_create_params vm_params;
> pid_t vm_pid;
> uint32_t vm_vmid;
> + uint32_t vm_sev_handle;
> + uint32_t vm_sev_asid[VMM_MAX_VCPUS_PER_VM];
> +
> +#define VM_SEV_NSEGMENTS 128
> + size_t vm_sev_nmemsegments;
> + struct vm_mem_range vm_sev_memsegments[VM_SEV_NSEGMENTS];
>
> int vm_kernel;
> char *vm_kernel_path; /* Used by vm.conf. */
> @@ -398,6 +406,7 @@ struct vmd {
> int vmd_fd;
> int vmd_fd6;
> int vmd_ptmfd;
> + int vmd_ccp_fd;
> };
>
> struct vm_dev_pipe {
> @@ -508,6 +517,8 @@ void unpause_vm_md(struct vmd_vm *);
> int dump_devs(int);
> int dump_send_header(int);
> void *hvaddr_mem(paddr_t, size_t);
> +struct vm_mem_range *
> + find_gpa_range(struct vm_create_params *, paddr_t, size_t);
> int write_mem(paddr_t, const void *, size_t);
> int read_mem(paddr_t, void *, size_t);
> int intr_ack(struct vmd_vm *);
> @@ -538,6 +549,8 @@ void vm_pipe_init2(struct vm_dev_pipe *, void (*)(int, short, void *),
> void *);
> void vm_pipe_send(struct vm_dev_pipe *, enum pipe_msg_type);
> enum pipe_msg_type vm_pipe_recv(struct vm_dev_pipe *);
> +int write_mem(paddr_t, const void *buf, size_t);
> +int write_mem_enc(paddr_t, const void *buf, size_t);
> int remap_guest_mem(struct vmd_vm *, int);
> __dead void vm_shutdown(unsigned int);
>
> @@ -573,4 +586,22 @@ __dead void vionet_main(int, int);
> /* vioblk.c */
> __dead void vioblk_main(int, int);
>
> +/* psp.c */
> +int psp_get_pstate(uint16_t *);
> +int psp_df_flush(void);
> +int psp_get_gstate(uint32_t, uint32_t *, uint32_t *, uint8_t *);
> +int psp_launch_start(uint32_t *);
> +int psp_launch_update(uint32_t, vaddr_t, size_t);
> +int psp_launch_measure(uint32_t);
> +int psp_launch_finish(uint32_t);
> +int psp_activate(uint32_t, uint32_t);
> +int psp_guest_shutdown(uint32_t);
> +
> +/* sev.c */
> +int sev_init(struct vmd_vm *);
> +int sev_add_memsegment(vaddr_t, size_t);
> +int sev_encrypt_memory(struct vmd_vm *);
> +int sev_activate(struct vmd_vm *, int);
> +int sev_shutdown(struct vmd_vm *);
> +
> #endif /* VMD_H */
> diff --git a/usr.sbin/vmd/vmm.c b/usr.sbin/vmd/vmm.c
> index 6a98e43f751..e4dd6f7b6bb 100644
> --- a/usr.sbin/vmd/vmm.c
> +++ b/usr.sbin/vmd/vmm.c
> @@ -325,6 +325,11 @@ vmm_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg)
> /* Get and terminate all running VMs */
> get_info_vm(ps, NULL, 1);
> break;
> + case IMSG_VMDOP_RECEIVE_CCP_FD:
> + if (env->vmd_ccp_fd > -1)
> + fatalx("already received psp fd");
> + env->vmd_ccp_fd = imsg->fd;
> + break;
> default:
> return (-1);
> }
> @@ -645,7 +650,7 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
> {
> struct vm_create_params *vcp;
> struct vmd_vm *vm;
> - char *nargv[8], num[32], vmm_fd[32];
> + char *nargv[10], num[32], vmm_fd[32], ccp_fd[32];
> int fd, ret = EINVAL;
> int fds[2];
> pid_t vm_pid;
> @@ -760,6 +765,9 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
> close(fd);
> }
>
> + if (env->vmd_ccp_fd > 0)
> + fcntl(env->vmd_ccp_fd, F_SETFD, 0); /* psp device fd */
> +
> /*
> * Prepare our new argv for execvp(2) with the fd of our open
> * pipe to the parent/vmm process as an argument.
> @@ -769,6 +777,8 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
> snprintf(num, sizeof(num), "%d", fds[1]);
> memset(vmm_fd, 0, sizeof(vmm_fd));
> snprintf(vmm_fd, sizeof(vmm_fd), "%d", env->vmd_fd);
> + memset(ccp_fd, 0, sizeof(ccp_fd));
> + snprintf(ccp_fd, sizeof(ccp_fd), "%d", env->vmd_ccp_fd);
>
> nargv[0] = env->argv0;
> nargv[1] = "-V";
> @@ -776,14 +786,16 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *pid)
> nargv[3] = "-n";
> nargv[4] = "-i";
> nargv[5] = vmm_fd;
> - nargv[6] = NULL;
> + nargv[6] = "-j";
> + nargv[7] = ccp_fd;
> + nargv[8] = NULL;
>
> if (env->vmd_verbose == 1) {
> - nargv[6] = VMD_VERBOSE_1;
> - nargv[7] = NULL;
> + nargv[8] = VMD_VERBOSE_1;
> + nargv[9] = NULL;
> } else if (env->vmd_verbose > 1) {
> - nargv[6] = VMD_VERBOSE_2;
> - nargv[7] = NULL;
> + nargv[8] = VMD_VERBOSE_2;
> + nargv[9] = NULL;
> }
>
> /* Control resumes in vmd main(). */
> diff --git a/usr.sbin/vmd/x86_vm.c b/usr.sbin/vmd/x86_vm.c
> index d0caf98dd12..7dbe53533d9 100644
> --- a/usr.sbin/vmd/x86_vm.c
> +++ b/usr.sbin/vmd/x86_vm.c
> @@ -52,8 +52,6 @@ extern char *__progname;
> void create_memory_map(struct vm_create_params *);
> int translate_gva(struct vm_exit*, uint64_t, uint64_t *, int);
>
> -static struct vm_mem_range *find_gpa_range(struct vm_create_params *, paddr_t,
> - size_t);
> static int loadfile_bios(gzFile, off_t, struct vcpu_reg_state *);
> static int vcpu_exit_eptviolation(struct vm_run_params *);
> static void vcpu_exit_inout(struct vm_run_params *);
> @@ -792,7 +790,7 @@ vcpu_exit_pci(struct vm_run_params *vrp)
> * NULL: on failure if there is no memory range as described by the parameters
> * Pointer to vm_mem_range that contains the start of the range otherwise.
> */
> -static struct vm_mem_range *
> +struct vm_mem_range *
> find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len)
> {
> size_t i, n;
> @@ -838,7 +836,7 @@ find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len)
> return (vmr);
> }
> /*
> - * write_mem
> + * _write_mem
> *
> * Copies data from 'buf' into the guest VM's memory at paddr 'dst'.
> *
> @@ -846,14 +844,15 @@ find_gpa_range(struct vm_create_params *vcp, paddr_t gpa, size_t len)
> * dst: the destination paddr_t in the guest VM
> * buf: data to copy (or NULL to zero the data)
> * len: number of bytes to copy
> + * enc: prepare for SEV encryption
> *
> * Return values:
> * 0: success
> * EINVAL: if the guest physical memory range [dst, dst + len) does not
> * exist in the guest.
> */
> -int
> -write_mem(paddr_t dst, const void *buf, size_t len)
> +static int
> +_write_mem(paddr_t dst, const void *buf, size_t len, int enc)
> {
> const char *from = buf;
> char *to;
> @@ -868,6 +867,9 @@ write_mem(paddr_t dst, const void *buf, size_t len)
> return (EINVAL);
> }
>
> + if (enc && sev_add_memsegment((vaddr_t)dst, len) < 0)
> + return (EINVAL);
> +
With the bulk of the changes above (all write_mem becoming write_mem_enc),
this unfairly penalizes any write_mem on non-SEV hardware as we need to
now route through sev_add_memsegment every time. Sure, it short-circuits
immediately when it checks for SEV, but the default path should be the
fastest for the non-SEV case, IMO.
> off = dst - vmr->vmr_gpa;
> while (len != 0) {
> n = vmr->vmr_size - off;
> @@ -889,6 +891,18 @@ write_mem(paddr_t dst, const void *buf, size_t len)
> return (0);
> }
>
> +int
> +write_mem(paddr_t dst, const void *buf, size_t len)
> +{
> + return _write_mem(dst, buf, len, 0);
> +}
> +
> +int
> +write_mem_enc(paddr_t dst, const void *buf, size_t len)
> +{
> + return _write_mem(dst, buf, len, 1);
> +}
> +
> /*
> * read_mem
> *
AMD SEV 5/5: Add support to vmd(8)