Download raw body.
Support for AMD SEV-ES
Hi everyone,
here's an updated diff for SEV-ES support. It mainly reworks the #VC
trap handling and the entry path for #VC:
Normal traps quickly re-enalbe interrups. However, #VC might occur during
interrupt handling before a specific hardware interrups gets masked.
Re-enabling interrupts in the #VC trap handler might thus cause nested
interrupts of the same level. Therefore, provide a special trap entry
which does not enable interrupts again.
There are still some XXX comments, though.
Take care!
HJ.
--------------------------------------------------------------------------
diff --git a/sys/arch/amd64/amd64/ghcb.c b/sys/arch/amd64/amd64/ghcb.c
new file mode 100644
index 00000000000..c9448017153
--- /dev/null
+++ b/sys/arch/amd64/amd64/ghcb.c
@@ -0,0 +1,221 @@
+/* $OpenBSD:$ */
+
+/*
+ * Copyright (c) 2024, 2025 Hans-Joerg Hoexer <hshoexer@genua.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <machine/frame.h>
+#include <machine/ghcb.h>
+
+vaddr_t ghcb_vaddr;
+paddr_t ghcb_paddr;
+
+/*
+ * ghcb_clear
+ *
+ * Clear GHCB by setting to all 0.
+ * Used by host and guest.
+ */
+void
+ghcb_clear(struct ghcb_sa *ghcb)
+{
+ memset(ghcb, 0, sizeof(*ghcb));
+}
+
+/*
+ * ghcb_valbm_set
+ *
+ * Set the quad word position of qword in the GHCB valid bitmap.
+ * Used by host and guest.
+ */
+int
+ghcb_valbm_set(uint8_t *bm, int qword)
+{
+ if (qword > GHCB_MAX)
+ return (1);
+
+ bm[GHCB_IDX(qword)] |= (1 << GHCB_BIT(qword));
+
+ return (0);
+}
+
+/*
+ * ghcb_valbm_isset
+ *
+ * Indicate wether a specific quad word is set or not.
+ * Used by host and guest.
+ */
+int
+ghcb_valbm_isset(uint8_t *bm, int qword)
+{
+ if (qword > GHCB_MAX)
+ return (0);
+
+ return (bm[GHCB_IDX(qword)] & (1 << GHCB_BIT(qword)));
+}
+
+/*
+ * ghcb_valid
+ *
+ * To provide valid information, the exitcode, exitinfo1 and exitinfo2
+ * must be set in the GHCB. Verify by checking valid_bitmap.
+ * Used by host only.
+ */
+int
+ghcb_valid(struct ghcb_sa *ghcb)
+{
+ uint8_t *bm = ghcb->valid_bitmap;
+
+ return (ghcb_valbm_isset(bm, GHCB_SW_EXITCODE) &&
+ ghcb_valbm_isset(bm, GHCB_SW_EXITINFO1) &&
+ ghcb_valbm_isset(bm, GHCB_SW_EXITINFO2));
+}
+
+
+/*
+ * ghcb_verify_bm
+ *
+ * To be verified positive, the given valid bitmap must exactly
+ * match the expected bitmap.
+ * Used by host only.
+ */
+int
+ghcb_verify_bm(uint8_t *valid_bm, uint8_t *expected_bm)
+{
+ return (memcmp(valid_bm, expected_bm, GHCB_VB_SZ));
+}
+
+/*
+ * ghcb_verify_bm_guest
+ *
+ * To be verified positive, the given expected bitmap must be at
+ * least a subset of the provided valid bitmap. This ensures, the
+ * host provides at least the information requested by the guest.
+ * Used by guest only.
+ * This is required for running on a Linux/KVM host.
+ */
+int
+ghcb_verify_bm_guest(uint8_t *valid_bm, uint8_t *expected_bm)
+{
+ return ((ghcb_valbm_isset(expected_bm, GHCB_RAX) &&
+ !ghcb_valbm_isset(valid_bm, GHCB_RAX)) ||
+ (ghcb_valbm_isset(expected_bm, GHCB_RBX) &&
+ !ghcb_valbm_isset(valid_bm, GHCB_RBX)) ||
+ (ghcb_valbm_isset(expected_bm, GHCB_RCX) &&
+ !ghcb_valbm_isset(valid_bm, GHCB_RCX)) ||
+ (ghcb_valbm_isset(expected_bm, GHCB_RDX) &&
+ !ghcb_valbm_isset(valid_bm, GHCB_RDX)) ||
+ (ghcb_valbm_isset(expected_bm, GHCB_SW_EXITINFO1) &&
+ !ghcb_valbm_isset(valid_bm, GHCB_SW_EXITINFO1)) ||
+ (ghcb_valbm_isset(expected_bm, GHCB_SW_EXITINFO2) &&
+ !ghcb_valbm_isset(valid_bm, GHCB_SW_EXITINFO2)));
+}
+
+/*
+ */
+void
+ghcb_sync_val(int type, int size, struct ghcb_sync *gs)
+{
+ if (size > GHCB_SZ64)
+ panic("invalide size: %d", size);
+
+ switch (type) {
+ case GHCB_RAX:
+ gs->sz_a = size;
+ break;
+ case GHCB_RBX:
+ gs->sz_b = size;
+ break;
+ case GHCB_RCX:
+ gs->sz_c = size;
+ break;
+ case GHCB_RDX:
+ gs->sz_d = size;
+ break;
+ case GHCB_SW_EXITCODE:
+ case GHCB_SW_EXITINFO1:
+ case GHCB_SW_EXITINFO2:
+ break;
+
+ default:
+ panic("invalid type: %d", type);
+ /* NOTREACHED */
+ }
+
+ ghcb_valbm_set(gs->valid_bitmap, type);
+}
+
+/*
+ */
+void
+ghcb_sync_out(struct trapframe *frame, uint64_t exitcode, uint64_t exitinfo1,
+ uint64_t exitinfo2, struct ghcb_sa *ghcb, struct ghcb_sync *gsout)
+{
+ uint64_t masks[] = {
+ 0x00000000000000ffULL, 0x000000000000ffffULL,
+ 0x00000000ffffffffULL, 0xffffffffffffffffULL };
+
+ ghcb_clear(ghcb);
+
+ memcpy(ghcb->valid_bitmap, gsout->valid_bitmap,
+ sizeof(ghcb->valid_bitmap));
+
+ if (ghcb_valbm_isset(gsout->valid_bitmap, GHCB_RAX))
+ ghcb->v_rax = frame->tf_rax & masks[gsout->sz_a];
+ if (ghcb_valbm_isset(gsout->valid_bitmap, GHCB_RBX))
+ ghcb->v_rbx = frame->tf_rbx & masks[gsout->sz_b];
+ if (ghcb_valbm_isset(gsout->valid_bitmap, GHCB_RCX))
+ ghcb->v_rcx = frame->tf_rcx & masks[gsout->sz_c];
+ if (ghcb_valbm_isset(gsout->valid_bitmap, GHCB_RDX))
+ ghcb->v_rdx = frame->tf_rdx & masks[gsout->sz_d];
+
+ if (ghcb_valbm_isset(gsout->valid_bitmap, GHCB_SW_EXITCODE))
+ ghcb->v_sw_exitcode = exitcode;
+ if (ghcb_valbm_isset(gsout->valid_bitmap, GHCB_SW_EXITINFO1))
+ ghcb->v_sw_exitinfo1 = exitinfo1;
+ if (ghcb_valbm_isset(gsout->valid_bitmap, GHCB_SW_EXITINFO2))
+ ghcb->v_sw_exitinfo2 = exitinfo2;
+}
+
+void
+ghcb_sync_in(struct trapframe *frame, struct ghcb_sa *ghcb,
+ struct ghcb_sync *gsin)
+{
+ uint64_t masks[] = {
+ 0x00000000000000ffULL, 0x000000000000ffffULL,
+ 0x00000000ffffffffULL, 0xffffffffffffffffULL };
+
+ if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_RAX)) {
+ frame->tf_rax &= ~masks[gsin->sz_a];
+ frame->tf_rax |= (ghcb->v_rax & masks[gsin->sz_a]);
+ }
+ if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_RBX)) {
+ frame->tf_rbx &= ~masks[gsin->sz_b];
+ frame->tf_rbx |= (ghcb->v_rbx & masks[gsin->sz_b]);
+ }
+ if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_RCX)) {
+ frame->tf_rcx &= ~masks[gsin->sz_c];
+ frame->tf_rcx |= (ghcb->v_rcx & masks[gsin->sz_c]);
+ }
+ if (ghcb_valbm_isset(gsin->valid_bitmap, GHCB_RDX)) {
+ frame->tf_rdx &= ~masks[gsin->sz_d];
+ frame->tf_rdx |= (ghcb->v_rdx & masks[gsin->sz_d]);
+ }
+
+ ghcb_clear(ghcb);
+}
diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c
index 18ecbc8f4c8..d547918c696 100644
--- a/sys/arch/amd64/amd64/identcpu.c
+++ b/sys/arch/amd64/amd64/identcpu.c
@@ -67,6 +67,7 @@ int cpuspeed;
int amd64_has_xcrypt;
int amd64_pos_cbit; /* C bit position for SEV */
+int amd64_min_noes_asid;
int has_rdrand;
int has_rdseed;
@@ -710,6 +711,11 @@ identifycpu(struct cpu_info *ci)
'd', CPUID_MEMBER(ci_feature_amdsev_edx),
CPUID_AMDSEV_EDX_BITS);
amd64_pos_cbit = (ci->ci_feature_amdsev_ebx & 0x3f);
+ amd64_min_noes_asid = (ci->ci_feature_amdsev_edx);
+ if (cpu_sev_guestmode && CPU_IS_PRIMARY(ci))
+ printf("\n%s: SEV%s guest mode", ci->ci_dev->dv_xname,
+ ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED) ?
+ "-ES" : "");
}
printf("\n");
diff --git a/sys/arch/amd64/amd64/locore0.S b/sys/arch/amd64/amd64/locore0.S
index 978821715e7..2988b072903 100644
--- a/sys/arch/amd64/amd64/locore0.S
+++ b/sys/arch/amd64/amd64/locore0.S
@@ -111,6 +111,7 @@
#include <machine/param.h>
#include <machine/segments.h>
#include <machine/specialreg.h>
+#include <machine/trap.h>
/*
* override user-land alignment before including asm.h
@@ -193,6 +194,58 @@ bi_size_ok:
pushl $PSL_MBO
popfl
+ /*
+ * Setup temporary #VC trap handler, in case we are running
+ * on an AMD CPU in SEV-ES guest mode. Will be reset by
+ * init_x86_64().
+ * We are setting up two handlers:
+ *
+ * 1) locore_vc_trap32: Triggered when we are running in
+ * 32-bit legacy mode.
+ *
+ * 2) locore_vc_trap64: Triggered when we are running in
+ * 32-bit compatibility mode.
+ *
+ * The latter one is used by vmd(8).
+ */
+ movl $RELOC(early_idt), %ecx
+ movl $T_VC, %edx
+ leal (%ecx, %edx, 8), %ecx /* 32bit #VC IDT slot */
+
+ pushl %cs /* get current %cs */
+ popl %ebx
+ shll $16, %ebx
+
+ movl $RELOC(locore_vc_trap32), %eax
+ andl $0x0000ffff, %eax
+ orl %ebx, %eax /* use current %cs */
+ movl %eax, (%ecx)
+
+ movl $RELOC(locore_vc_trap32), %eax
+ andl $0xffff0000, %eax
+ orl $((0x80 | SDT_SYS386IGT) << 8), %eax
+ movl %eax, 4(%ecx)
+
+ movl $RELOC(early_idt), %ecx
+ movl $(2 * T_VC), %edx
+ leal (%ecx, %edx, 8), %ecx /* 64bit #VC IDT slot */
+
+ movl $RELOC(locore_vc_trap64), %eax
+ andl $0x0000ffff, %eax
+ orl $(GSEL(3, SEL_KPL) << 16), %eax
+ movl %eax, (%ecx)
+
+ movl $RELOC(locore_vc_trap64), %eax
+ andl $0xffff0000, %eax
+ orl $((0x80 | SDT_SYS386IGT) << 8), %eax
+ movl %eax, 4(%ecx)
+ xorl %eax, %eax
+ movl %eax, 8(%ecx)
+ movl %eax, 12(%ecx)
+
+ movl $RELOC(idtlc), %eax
+ lidt (%eax)
+
xorl %eax,%eax
cpuid
movl %eax,RELOC(cpuid_level)
@@ -288,8 +341,9 @@ cont:
/* Are we in guest mode with SEV enabled? */
movl $MSR_SEV_STATUS, %ecx
rdmsr
- andl $SEV_STAT_ENABLED, %eax
+ testl $SEV_STAT_ENABLED, %eax
jz .Lno_sev
+ movl %eax, RELOC(cpu_sev_guestmode) /* we are a SEV guest */
/* Determine C bit position */
movl %ebx, %ecx /* %ebx from previous cpuid */
@@ -332,8 +386,6 @@ cont:
andl %eax, RELOC(pg_frame + 4) /* apply mask */
andl %eax, RELOC(pg_lgframe + 4)
- movl $0x1, RELOC(cpu_sev_guestmode) /* we are a SEV guest */
-
.Lno_sev:
/*
@@ -379,7 +431,9 @@ cont:
#define PROC0_DMP2_OFF (PROC0_DMP3_OFF + NDML3_ENTRIES * NBPG)
#define TABLESIZE \
((NKL4_KIMG_ENTRIES + TABLE_L3_ENTRIES + TABLE_L2_ENTRIES + 1 + UPAGES + \
- NDML3_ENTRIES + NDML2_ENTRIES + 3) * NBPG)
+ NDML3_ENTRIES + NDML2_ENTRIES + 2 + 3) * NBPG)
+#define PROC0_GHCB_OFF (TABLESIZE - 5 * NBPG)
+#define GHCB_SIZE (2 * NBPG)
#define fillkpt \
pushl %ebp ; /* save */ \
@@ -403,6 +457,17 @@ cont:
loop 1b ; /* till finished */ \
popl %ebp
+
+#define fillkpt_nx_nc \
+ pushl %ebp ; /* save */ \
+ movl RELOC((pg_nx + 4)), %ebp ; /* NX bit? */ \
+1: movl %eax,(%ebx) ; /* store phys addr */ \
+ movl %ebp,4(%ebx) ; /* upper 32 bits */ \
+ addl $8,%ebx ; /* next pte/pde */ \
+ addl $NBPG,%eax ; /* next phys page */ \
+ loop 1b ; /* till finished */ \
+ popl %ebp
+
/* Find end of kernel image. */
movl $RELOC(end),%edi
#if (NKSYMS || defined(DDB))
@@ -509,6 +574,16 @@ map_tables:
shrl $PGSHIFT,%ecx
fillkpt_nx
+ /* Re-Map GHCB shared (ie. unencrypted) */
+ /* XXX hshoexer: Only in SEV-ES guestmode. */
+ pushl %ebx /* save current slot */
+ subl $(5 << 3),%ebx /* move back to slot of GHCB */
+ leal (PROC0_GHCB_OFF)(%esi),%eax
+ orl $(PG_V|PG_KW), %eax
+ movl $(GHCB_SIZE>>PGSHIFT), %ecx
+ fillkpt_nx_nc
+ popl %ebx /* continue with slot saved above */
+
/* Map ISA I/O mem (later atdevbase) RW, NX */
movl $(IOM_BEGIN|PG_V|PG_KW/*|PG_N*/),%eax
movl $(IOM_SIZE>>PGSHIFT),%ecx
@@ -626,7 +701,6 @@ store_pte:
*/
movl $MSR_EFER,%ecx
rdmsr
- xorl %eax,%eax /* XXX */
orl $(EFER_LME|EFER_SCE),%eax
movl RELOC((pg_nx + 4)), %ebx
cmpl $0, %ebx
@@ -712,6 +786,12 @@ longmode_hi:
addq %rsi,%rdx
movq %rdx,atdevbase(%rip)
+ /* Relocate GHCB. */
+ /* XXX hshoexer: Only in SEV-ES guestmode. */
+ movq $(PROC0_GHCB_OFF+KERNBASE),%rdx
+ addq %rsi,%rdx
+ movq %rdx,ghcb_vaddr(%rip)
+
/* Record start of symbols */
movq $__kernel_bss_end, ssym(%rip)
@@ -734,12 +814,131 @@ longmode_hi:
movw %ax,%fs
leaq TABLESIZE(%rsi),%rdi
+ subq $(NBPG*2), %rdi
subq $(NBPG*3), %rdi
/* XXX merge these */
call init_x86_64
call main
+ /* MSR Protocol Request Codes */
+#define MSRPROTO_CPUID_REQ 0x4
+#define MSRPROTO_TERM_REQ 0x100
+
+vc_cpuid64:
+ shll $30, %eax /* requested register */
+ orl $MSRPROTO_CPUID_REQ, %eax
+ movl %ebx, %edx /* CPUID function */
+ movl $MSR_SEV_GHCB, %ecx
+ wrmsr
+ rep vmmcall
+ rdmsr
+ ret
+
+ .globl locore_vc_trap64
+locore_vc_trap64:
+ pushq %rax
+ pushq %rbx
+ pushq %rcx
+ pushq %rdx
+
+#define SVM_VMEXIT_CPUID 0x72
+ cmpl $SVM_VMEXIT_CPUID, 32(%rsp)
+ jne .Lterminate64
+
+ movl %eax, %ebx /* save CPUID function */
+
+ movl $0, %eax /* request cpuid, get %eax */
+ call vc_cpuid64
+ movq %rdx, 24(%rsp)
+
+ movl $1, %eax /* get %ebx */
+ call vc_cpuid64
+ movq %rdx, 16(%rsp)
+
+ movl $2, %eax /* get %ecx */
+ call vc_cpuid64
+ movq %rdx, 8(%rsp)
+
+ movl $3, %eax /* get %edx */
+ call vc_cpuid64
+ movq %rdx, 0(%rsp)
+
+ popq %rdx
+ popq %rcx
+ popq %rbx
+ popq %rax
+ addq $8, %rsp
+ addq $2, (%rsp)
+ iretq
+
+.Lterminate64:
+ movl $MSRPROTO_TERM_REQ, %eax
+ movl $MSR_SEV_GHCB, %ecx
+ wrmsr
+ rep vmmcall
+.Lterm_loop64:
+ hlt
+ jmp .Lterm_loop64
+
+ .code32
+vc_cpuid32:
+ shll $30, %eax /* requested register */
+ orl $MSRPROTO_CPUID_REQ, %eax
+ movl %ebx, %edx /* CPUID function */
+ movl $MSR_SEV_GHCB, %ecx
+ wrmsr
+ rep vmmcall
+ rdmsr
+ ret
+
+ .globl locore_vc_trap32
+locore_vc_trap32:
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+
+#define SVM_VMEXIT_CPUID 0x72
+ cmpl $SVM_VMEXIT_CPUID, 16(%esp)
+ jne .Lterminate32
+
+ movl %eax, %ebx /* save CPUID function */
+
+ movl $0, %eax /* request cpuid, get %eax */
+ call vc_cpuid32
+ movl %edx, 12(%esp)
+
+ movl $1, %eax /* get %ebx */
+ call vc_cpuid32
+ movl %edx, 8(%esp)
+
+ movl $2, %eax /* get %ecx */
+ call vc_cpuid32
+ movl %edx, 4(%esp)
+
+ movl $3, %eax /* get %edx */
+ call vc_cpuid32
+ movl %edx, 0(%esp)
+
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+ addl $4, %esp
+ addl $2, (%esp)
+ iret
+
+.Lterminate32:
+ movl $MSRPROTO_TERM_REQ, %eax
+ movl $MSR_SEV_GHCB, %ecx
+ wrmsr
+ rep vmmcall
+.Lterm_loop32:
+ hlt
+ jmp .Lterm_loop32
+
+
.section .codepatch,"a"
.align 8, 0xcc
.globl codepatch_begin
@@ -752,6 +951,20 @@ codepatch_end:
.previous
.data
+ .globl idtlc /* temporary locore IDT */
+idtlc:
+ .word early_idt_end-early_idt-1
+ .long _RELOC(early_idt)
+ .align 64, 0xcc
+
+ .globl early_idt
+early_idt:
+ .rept NIDT
+ .quad 0x0000000000000000
+ .quad 0x0000000000000000
+ .endr
+early_idt_end:
+
.globl gdt64
gdt64:
.word gdt64_end-gdt64_start-1
diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c
index 7f85f92cb88..0d8ea7e8593 100644
--- a/sys/arch/amd64/amd64/machdep.c
+++ b/sys/arch/amd64/amd64/machdep.c
@@ -100,6 +100,7 @@
#include <machine/mpbiosvar.h>
#include <machine/kcore.h>
#include <machine/tss.h>
+#include <machine/ghcb.h>
#include <dev/isa/isareg.h>
#include <dev/ic/i8042reg.h>
@@ -486,6 +487,8 @@ bios_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
extern int tsc_is_invariant;
extern int amd64_has_xcrypt;
extern int need_retpoline;
+extern int cpu_sev_guestmode;
+
const struct sysctl_bounded_args cpuctl_vars[] = {
{ CPU_LIDACTION, &lid_action, -1, 2 },
@@ -495,6 +498,7 @@ const struct sysctl_bounded_args cpuctl_vars[] = {
{ CPU_XCRYPT, &amd64_has_xcrypt, SYSCTL_INT_READONLY },
{ CPU_INVARIANTTSC, &tsc_is_invariant, SYSCTL_INT_READONLY },
{ CPU_RETPOLINE, &need_retpoline, SYSCTL_INT_READONLY },
+ { CPU_SEVGUESTMODE, &cpu_sev_guestmode, SYSCTL_INT_READONLY },
};
/*
@@ -1309,6 +1313,38 @@ cpu_init_idt(void)
lidt(®ion);
}
+uint64_t early_gdt[GDT_SIZE / 8];
+
+void
+cpu_init_early_vctrap(paddr_t addr)
+{
+ struct region_descriptor region;
+
+ extern struct region_descriptor gdt64;
+ extern struct gate_descriptor early_idt[NIDT];
+ extern void Xvctrap_early(void);
+
+ /* Setup temporary "early" longmode GDT, will be reset soon */
+ memset(early_gdt, 0, sizeof(early_gdt));
+ set_mem_segment(GDT_ADDR_MEM(early_gdt, GCODE_SEL), 0, 0xfffff,
+ SDT_MEMERA, SEL_KPL, 1, 0, 1);
+ set_mem_segment(GDT_ADDR_MEM(early_gdt, GDATA_SEL), 0, 0xfffff,
+ SDT_MEMRWA, SEL_KPL, 1, 0, 1);
+ setregion(®ion, early_gdt, GDT_SIZE - 1);
+ lgdt(®ion);
+
+ /* Setup temporary "early" longmode #VC entry, will be reset soon */
+ setgate(&early_idt[T_VC], Xvctrap_early, 0, SDT_SYS386IGT,
+ SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+ setregion(®ion, early_idt, NIDT * sizeof(idt[0]) - 1);
+ lidt(®ion);
+
+ /* Tell vmm(4) about our GHCB. */
+ ghcb_paddr = addr;
+ memset((void *)ghcb_vaddr, 0, 2 * PAGE_SIZE);
+ wrmsr(MSR_SEV_GHCB, ghcb_paddr);
+}
+
void
cpu_init_extents(void)
{
@@ -1429,6 +1465,13 @@ init_x86_64(paddr_t first_avail)
int x, ist;
uint64_t max_dm_size = ((uint64_t)512 * NUM_L4_SLOT_DIRECT) << 30;
+ /*
+ * locore0 mapped 2 pages for use as GHCB before pmap is initialized.
+ */
+ if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED))
+ cpu_init_early_vctrap(first_avail);
+ first_avail += 2 * NBPG;
+
/*
* locore0 mapped 3 pages for use before the pmap is initialized
* starting at first_avail. These pages are currently used by
diff --git a/sys/arch/amd64/amd64/trap.c b/sys/arch/amd64/amd64/trap.c
index 433db1be798..ccd46b97da4 100644
--- a/sys/arch/amd64/amd64/trap.c
+++ b/sys/arch/amd64/amd64/trap.c
@@ -86,6 +86,8 @@
#include <machine/fpu.h>
#include <machine/psl.h>
#include <machine/trap.h>
+#include <machine/ghcb.h>
+#include <machine/vmmvar.h>
#ifdef DDB
#include <ddb/db_output.h>
#include <machine/db_machdep.h>
@@ -95,6 +97,7 @@
int upageflttrap(struct trapframe *, uint64_t);
int kpageflttrap(struct trapframe *, uint64_t);
+int vctrap(struct trapframe *, int);
void kerntrap(struct trapframe *);
void usertrap(struct trapframe *);
void ast(struct trapframe *);
@@ -123,6 +126,7 @@ const char * const trap_type[] = {
"SSE FP exception", /* 19 T_XMM */
"virtualization exception", /* 20 T_VE */
"control protection exception", /* 21 T_CP */
+ "VMM communication exception", /* 29 T_VC */
};
const int trap_types = nitems(trap_type);
@@ -297,6 +301,151 @@ kpageflttrap(struct trapframe *frame, uint64_t cr2)
return 1;
}
+int
+vctrap(struct trapframe *frame, int user)
+{
+ uint64_t sw_exitcode, sw_exitinfo1, sw_exitinfo2;
+ uint8_t *rip = (uint8_t *)(frame->tf_rip);
+ uint16_t port;
+ struct ghcb_sync syncout, syncin;
+ struct ghcb_sa *ghcb;
+
+ intr_disable();
+
+ memset(&syncout, 0, sizeof(syncout));
+ memset(&syncin, 0, sizeof(syncin));
+
+ sw_exitcode = frame->tf_err;
+ sw_exitinfo1 = 0;
+ sw_exitinfo2 = 0;
+
+ switch (sw_exitcode) {
+ case SVM_VMEXIT_CPUID:
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout);
+ ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout);
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin);
+ ghcb_sync_val(GHCB_RBX, GHCB_SZ32, &syncin);
+ ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncin);
+ ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncin);
+ frame->tf_rip += 2;
+ break;
+ case SVM_VMEXIT_MSR: {
+ if (user)
+ return 0; /* not allowed from userspace */
+ if (*rip == 0x0f && *(rip + 1) == 0x30) {
+ /* WRMSR */
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout);
+ ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout);
+ ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncout);
+ sw_exitinfo1 = 1;
+ } else if (*rip == 0x0f && *(rip + 1) == 0x32) {
+ /* RDMSR */
+ ghcb_sync_val(GHCB_RCX, GHCB_SZ32, &syncout);
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin);
+ ghcb_sync_val(GHCB_RDX, GHCB_SZ32, &syncin);
+ } else
+ panic("failed to decode MSR");
+ frame->tf_rip += 2;
+ break;
+ }
+ case SVM_VMEXIT_IOIO: {
+ if (user)
+ return 0; /* not allowed from userspace */
+ switch (*rip) {
+ case 0x66: {
+ switch (*(rip + 1)) {
+ case 0xef: /* out %ax,(%dx) */
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ16, &syncout);
+ port = (uint16_t)frame->tf_rdx;
+ sw_exitinfo1 = (port << 16) |
+ (1ULL << 5);
+ frame->tf_rip += 2;
+ break;
+ case 0xed: /* in (%dx),%ax */
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ16, &syncin);
+ port = (uint16_t)frame->tf_rdx;
+ sw_exitinfo1 = (port << 16) |
+ (1ULL << 5) | (1ULL << 0);
+ frame->tf_rip += 2;
+ break;
+ default:
+ panic("failed to decode prefixed IOIO");
+ }
+ break;
+ }
+ case 0xe4: /* in $0x71,%al */
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncin);
+ port = *(rip + 1);
+ sw_exitinfo1 = (port << 16) | (1ULL << 4) |
+ (1ULL << 0);
+ frame->tf_rip += 2;
+ break;
+ case 0xe6: /* outb %al,$0x43 */
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncout);
+ port = *(rip + 1);
+ sw_exitinfo1 = (port << 16) | (1ULL << 4);
+ frame->tf_rip += 2;
+ break;
+ case 0xec: /* in (%dx),%al */
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncin);
+ port = (uint16_t)frame->tf_rdx;
+ sw_exitinfo1 = (port << 16) | (1ULL << 4) |
+ (1ULL << 0);
+ frame->tf_rip += 1;
+ break;
+ case 0xed: /* in (%dx),%eax */
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncin);
+ port = (uint16_t)frame->tf_rdx;
+ sw_exitinfo1 = (port << 16) | (1ULL << 6) |
+ (1ULL << 0);
+ frame->tf_rip += 1;
+ break;
+ case 0xee: /* out %al,(%dx) */
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ8, &syncout);
+ port = (uint16_t)frame->tf_rdx;
+ sw_exitinfo1 = (port << 16) | (1ULL << 4);
+ frame->tf_rip += 1;
+ break;
+ case 0xef: /* out %eax,(%dx) */
+ ghcb_sync_val(GHCB_RAX, GHCB_SZ32, &syncout);
+ port = (uint16_t)frame->tf_rdx;
+ sw_exitinfo1 = (port << 16) | (1ULL << 6);
+ frame->tf_rip += 1;
+ break;
+ default:
+ panic("failed to decode IOIO");
+ }
+ break;
+ }
+ default:
+ panic("invalid exit code 0x%llx", sw_exitcode);
+ }
+
+ /* Always required */
+ ghcb_sync_val(GHCB_SW_EXITCODE, GHCB_SZ64, &syncout);
+ ghcb_sync_val(GHCB_SW_EXITINFO1, GHCB_SZ64, &syncout);
+ ghcb_sync_val(GHCB_SW_EXITINFO2, GHCB_SZ64, &syncout);
+
+ /* Sync out to GHCB */
+ ghcb = (struct ghcb_sa *)ghcb_vaddr;
+ ghcb_sync_out(frame, sw_exitcode, sw_exitinfo1, sw_exitinfo2, ghcb,
+ &syncout);
+
+ /* Call hypervisor. */
+ vmgexit();
+
+ /* Verify response */
+ if (ghcb_verify_bm_guest(ghcb->valid_bitmap, syncin.valid_bitmap)) {
+ ghcb_clear(ghcb);
+ panic("invalid hypervisor response");
+ }
+
+ /* Sync in from GHCB */
+ ghcb_sync_in(frame, ghcb, &syncin);
+
+ return 1;
+}
+
/*
* kerntrap(frame):
@@ -348,6 +497,11 @@ kerntrap(struct trapframe *frame)
else
return;
#endif /* NISA > 0 */
+
+ case T_VC:
+ if (vctrap(frame, 0))
+ return;
+ goto we_re_toast;
}
}
@@ -427,7 +581,12 @@ usertrap(struct trapframe *frame)
code = (frame->tf_err & 0x7fff) < 4 ? ILL_BTCFI
: ILL_BADSTK;
break;
-
+ case T_VC:
+ if (vctrap(frame, 1))
+ goto out;
+ sig = SIGILL;
+ code = ILL_PRVOPC;
+ break;
case T_PAGEFLT: /* page fault */
if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p),
"[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n",
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 97c23d00d7f..2d9850b7655 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -373,6 +373,43 @@ IDTVEC(trap14)
ZTRAP(T_VE)
IDTVEC(trap15)
TRAP(T_CP)
+
+IDTVEC(trap1d)
+ /*
+ * #VC is AMD CPU specific, thus we don't use any Intel Meltdown
+ * workarounds.
+ *
+ * We handle #VC different from other traps, as we do not want
+ * to re-enable interrupts. #VC might happen during IRQ handling
+ * before a specific hardware interrupt gets masked. Re-enabling
+ * interrupts in the trap handler might cause nested IRQs of
+ * the same level. Thus keep interrupts disabled.
+ *
+ * On Intel CPUs we could use code patch to reset this entry.
+ */
+ pushq $T_VC
+ testb $SEL_RPL,24(%rsp)
+ je vctrap_kern
+ swapgs
+ FENCE_SWAPGS_MIS_TAKEN
+ movq %rax,CPUVAR(SCRATCH)
+
+ /* #VC from userspace */
+ TRAP_ENTRY_USER
+ cld
+ SMAP_CLAC
+ /* shortcut to regular path, but with interrupts disabled */
+ jmp recall_trap
+
+ /* #VC from kernspace */
+vctrap_kern:
+ FENCE_NO_SAFE_SMAP
+ TRAP_ENTRY_KERN
+ cld
+ SMAP_CLAC
+ /* shortcut to regular path, but with interrupts disabled */
+ jmp .Lreal_kern_trap
+
IDTVEC(trap1f)
IDTVEC_ALIAS(trap16, trap1f)
IDTVEC_ALIAS(trap17, trap1f)
@@ -381,7 +418,6 @@ IDTVEC_ALIAS(trap19, trap1f)
IDTVEC_ALIAS(trap1a, trap1f)
IDTVEC_ALIAS(trap1b, trap1f)
IDTVEC_ALIAS(trap1c, trap1f)
-IDTVEC_ALIAS(trap1d, trap1f)
IDTVEC_ALIAS(trap1e, trap1f)
/* 22 - 31 reserved for future exp */
ZTRAP(T_RESERVED)
@@ -513,6 +549,16 @@ END(alltraps_kern)
END(alltraps_kern_meltdown)
KTEXT_PAGE_END
+/* #VC trap entry for early bootstrap */
+IDTVEC(vctrap_early)
+ pushq $T_VC
+ TRAP_ENTRY_KERN /* early #VC has to be in kernel mode */
+ cld
+ movq %rsp, %rdi
+ movq $0x0, %rsi
+ call vctrap
+ movq $0,-8(%rsp)
+ INTRFASTEXIT
/*
* Macros for interrupt entry, call to handler, and exit.
diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c
index e3205f48eed..5fe93a8577c 100644
--- a/sys/arch/amd64/amd64/vmm_machdep.c
+++ b/sys/arch/amd64/amd64/vmm_machdep.c
@@ -37,6 +37,7 @@
#include <machine/biosvar.h>
#include <machine/segments.h>
#include <machine/cpufunc.h>
+#include <machine/ghcb.h>
#include <machine/vmmvar.h>
#include <dev/isa/isareg.h>
@@ -80,6 +81,7 @@ int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
+int vcpu_svm_init_vmsa(struct vcpu *, struct vcpu_reg_state *);
int vcpu_reload_vmcs_vmx(struct vcpu *);
int vcpu_init(struct vcpu *, struct vm_create_params *);
int vcpu_init_vmx(struct vcpu *);
@@ -95,6 +97,11 @@ int vmx_get_exit_info(uint64_t *, uint64_t *);
int vmx_load_pdptes(struct vcpu *);
int vmx_handle_exit(struct vcpu *);
int svm_handle_exit(struct vcpu *);
+int svm_gexit_sync_host(struct vcpu *);
+int svm_gexit_sync_guest(struct vcpu *);
+int svm_handle_gexit(struct vcpu *);
+int svm_handle_efercr(struct vcpu *, uint64_t);
+int svm_get_iflag(struct vcpu *, uint64_t);
int svm_handle_msr(struct vcpu *);
int vmm_handle_xsetbv(struct vcpu *, uint64_t *);
int vmx_handle_xsetbv(struct vcpu *);
@@ -125,6 +132,7 @@ int vmx_fault_page(struct vcpu *, paddr_t);
int vmx_handle_np_fault(struct vcpu *);
int svm_handle_np_fault(struct vcpu *);
int vmm_alloc_vpid(uint16_t *);
+int vmm_alloc_asid(uint16_t *, struct vcpu *);
void vmm_free_vpid(uint16_t);
const char *vcpu_state_decode(u_int);
const char *vmx_exit_reason_decode(uint32_t);
@@ -138,6 +146,7 @@ void vmx_setmsrbw(struct vcpu *, uint32_t);
void vmx_setmsrbrw(struct vcpu *, uint32_t);
void svm_set_clean(struct vcpu *, uint32_t);
void svm_set_dirty(struct vcpu *, uint32_t);
+int svm_get_vmsa(uint32_t, uint32_t, uint64_t *);
int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
void vmm_init_pvclock(struct vcpu *, paddr_t);
@@ -231,12 +240,16 @@ extern struct vmm_softc *vmm_softc;
extern vaddr_t idt_vaddr;
extern struct gate_descriptor *idt;
+/* Minimum ASID value for an SEV enabled, SEV-ES disabled guest. */
+extern int amd64_min_noes_asid;
+
/* Constants used in "CR access exit" */
#define CR_WRITE 0
#define CR_READ 1
#define CR_CLTS 2
#define CR_LMSW 3
+
/*
* vmm_enabled
*
@@ -1588,6 +1601,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
* External NMI exiting (SVM_INTERCEPT_NMI)
* CPUID instruction (SVM_INTERCEPT_CPUID)
* HLT instruction (SVM_INTERCEPT_HLT)
+ * INVLPGA instruction (SVM_INTERCEPT_INVLPGA)
* I/O instructions (SVM_INTERCEPT_INOUT)
* MSR access (SVM_INTERCEPT_MSR)
* shutdown events (SVM_INTERCEPT_SHUTDOWN)
@@ -1617,9 +1631,17 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR |
SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP;
- if (xsave_mask)
+ if (xsave_mask && !vcpu->vc_seves) /* XXX hshoexer */
vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV;
+ if (vcpu->vc_seves) {
+ /* With SEV-ES also intercept post EFER and CR[048] writes */
+ vmcb->v_intercept2 |= SVM_INTERCEPT_EFER_WRITE;
+ vmcb->v_intercept2 |= SVM_INTERCEPT_CR0_WRITE_POST;
+ vmcb->v_intercept2 |= SVM_INTERCEPT_CR4_WRITE_POST;
+ vmcb->v_intercept2 |= SVM_INTERCEPT_CR8_WRITE_POST;
+ }
+
/* Setup I/O bitmap */
memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE);
vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa);
@@ -1638,9 +1660,18 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
svm_setmsrbrw(vcpu, MSR_FSBASE);
svm_setmsrbrw(vcpu, MSR_GSBASE);
svm_setmsrbrw(vcpu, MSR_KERNELGSBASE);
+ svm_setmsrbrw(vcpu, MSR_SEV_GHCB);
+
+ /* allow reading SEV status */
+ svm_setmsrbrw(vcpu, MSR_SEV_STATUS);
- /* EFER is R/O so we can ensure the guest always has SVME */
- svm_setmsrbr(vcpu, MSR_EFER);
+ if (vcpu->vc_seves) {
+ /* With SEV-ES SVME can not be modified by the guest */
+ svm_setmsrbrw(vcpu, MSR_EFER);
+ } else {
+ /* EFER is R/O so we can ensure the guest always has SVME */
+ svm_setmsrbr(vcpu, MSR_EFER);
+ }
/* allow reading TSC */
svm_setmsrbr(vcpu, MSR_TSC);
@@ -1653,7 +1684,10 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
vmcb->v_asid = vcpu->vc_vpid;
/* TLB Control - First time in, flush all*/
- vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL;
+ if (vcpu->vc_seves)
+ vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ASID; /* XXX hshoexer */
+ else
+ vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL;
/* INTR masking */
vmcb->v_intr_masking = 1;
@@ -1672,19 +1706,93 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
if (vcpu->vc_sev)
vmcb->v_np_enable |= SVM_ENABLE_SEV;
+ /* SEV-ES */
+ if (vcpu->vc_seves) {
+ vmcb->v_np_enable |= SVM_SEVES_ENABLE;
+ vmcb->v_lbr_virt_enable |= SVM_LBRVIRT_ENABLE;
+
+ /* Set VMSA. */
+ vmcb->v_vmsa_pa = vcpu->vc_svm_vmsa_pa;
+
+ /* XXX hshoexer: LBR: guest_state_protected flag? */
+ svm_setmsrbrw(vcpu, MSR_DEBUGCTLMSR);
+ svm_setmsrbrw(vcpu, MSR_LASTBRANCHFROMIP);
+ svm_setmsrbrw(vcpu, MSR_LASTBRANCHTOIP);
+ svm_setmsrbrw(vcpu, MSR_LASTINTFROMIP);
+ svm_setmsrbrw(vcpu, MSR_LASTINTTOIP);
+
+ /* XXX hshoexer: virt vmload/vmsave */
+ vmcb->v_lbr_virt_enable |= 0x2;
+ }
+
/* Enable SVME in EFER (must always be set) */
vmcb->v_efer |= EFER_SVME;
- ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs);
+ if ((ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs)) != 0)
+ goto exit;
/* xcr0 power on default sets bit 0 (x87 state) */
vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
vcpu->vc_parent->vm_map->pmap->eptp = 0;
+ if ((ret = vcpu_svm_init_vmsa(vcpu, vrs)) != 0)
+ goto exit;
+
+exit:
return ret;
}
+/*
+ * vcpu_svm_init_vmsa
+ *
+ * Initialize VMSA with initial VCPU state.
+ */
+int
+vcpu_svm_init_vmsa(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
+{
+ uint64_t *gprs = vrs->vrs_gprs;
+ struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
+ struct vmsa *vmsa;
+
+ if (!vcpu->vc_seves)
+ return 0;
+
+ if (vmcb->v_dr7 & ~0x00000400) /* XXX hshoexer? */
+ return 1;
+
+ vmsa = (struct vmsa *)vcpu->vc_svm_vmsa_va;
+ memcpy(vmsa, &vmcb->vmcb_layout, sizeof(vmcb->vmcb_layout));
+
+ vmsa->v_rax = gprs[VCPU_REGS_RAX];
+ vmsa->v_rbx = gprs[VCPU_REGS_RBX];
+ vmsa->v_rcx = gprs[VCPU_REGS_RCX];
+ vmsa->v_rdx = gprs[VCPU_REGS_RDX];
+ vmsa->v_rsp = gprs[VCPU_REGS_RSP];
+ vmsa->v_rbp = gprs[VCPU_REGS_RBP];
+ vmsa->v_rsi = gprs[VCPU_REGS_RSI];
+ vmsa->v_rdi = gprs[VCPU_REGS_RDI];
+
+ vmsa->v_r8 = gprs[VCPU_REGS_R8];
+ vmsa->v_r9 = gprs[VCPU_REGS_R9];
+ vmsa->v_r10 = gprs[VCPU_REGS_R10];
+ vmsa->v_r11 = gprs[VCPU_REGS_R11];
+ vmsa->v_r12 = gprs[VCPU_REGS_R12];
+ vmsa->v_r13 = gprs[VCPU_REGS_R13];
+ vmsa->v_r14 = gprs[VCPU_REGS_R14];
+ vmsa->v_r15 = gprs[VCPU_REGS_R15];
+
+ vmsa->v_rip = gprs[VCPU_REGS_RIP];
+
+ vmsa->v_xcr0 = vcpu->vc_gueststate.vg_xcr0;
+
+ /* initialize FPU */
+ vmsa->v_x87_fcw = __INITIAL_NPXCW__;
+ vmsa->v_mxcsr = __INITIAL_MXCSR__;
+
+ return 0;
+}
+
/*
* svm_setmsrbr
*
@@ -2765,7 +2873,7 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
int ret = 0;
/* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */
- if (vmm_alloc_vpid(&vcpu->vc_vpid))
+ if (vmm_alloc_asid(&vcpu->vc_vpid, vcpu))
return (ENOMEM);
/* Allocate VMCB VA */
@@ -2829,6 +2937,28 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
(uint64_t)vcpu->vc_svm_hsa_va,
(uint64_t)vcpu->vc_svm_hsa_pa);
+
+ /* Allocate VM save area VA */
+ vcpu->vc_svm_vmsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
+ &kp_zero, &kd_waitok);
+
+ if (!vcpu->vc_svm_vmsa_va) {
+ ret = ENOMEM;
+ goto exit;
+ }
+
+ /* Compute VM save area PA */
+ if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_vmsa_va,
+ &vcpu->vc_svm_vmsa_pa)) {
+ ret = ENOMEM;
+ goto exit;
+ }
+
+ DPRINTF("%s: VMSA va @ 0x%llx, pa @ 0x%llx\n", __func__,
+ (uint64_t)vcpu->vc_svm_vmsa_va,
+ (uint64_t)vcpu->vc_svm_vmsa_pa);
+
+
/* Allocate IOIO area VA (3 pages) */
vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE, &kv_any,
&vmm_kp_contig, &kd_waitok);
@@ -2851,6 +2981,7 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
/* Shall we enable SEV? */
vcpu->vc_sev = vcp->vcp_sev;
+ vcpu->vc_seves = vcp->vcp_seves;
/* Inform vmd(8) about ASID and C bit position. */
vcp->vcp_poscbit = amd64_pos_cbit;
@@ -2957,6 +3088,11 @@ vcpu_deinit_svm(struct vcpu *vcpu)
&kp_zero);
vcpu->vc_svm_hsa_va = 0;
}
+ if (vcpu->vc_svm_vmsa_va) {
+ km_free((void *)vcpu->vc_svm_vmsa_va, PAGE_SIZE, &kv_page,
+ &kp_zero);
+ vcpu->vc_svm_vmsa_va = 0;
+ }
if (vcpu->vc_svm_ioio_va) {
km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE, &kv_any,
&vmm_kp_contig);
@@ -4038,7 +4174,7 @@ svm_handle_hlt(struct vcpu *vcpu)
/* All HLT insns are 1 byte */
vcpu->vc_gueststate.vg_rip += 1;
- if (!(rflags & PSL_I)) {
+ if (!svm_get_iflag(vcpu, rflags)) {
DPRINTF("%s: guest halted with interrupts disabled\n",
__func__);
return (EIO);
@@ -4134,7 +4270,7 @@ svm_handle_exit(struct vcpu *vcpu)
switch (exit_reason) {
case SVM_VMEXIT_VINTR:
- if (!(rflags & PSL_I)) {
+ if (!svm_get_iflag(vcpu, rflags)) {
DPRINTF("%s: impossible interrupt window exit "
"config\n", __func__);
ret = EINVAL;
@@ -4198,6 +4334,16 @@ svm_handle_exit(struct vcpu *vcpu)
ret = vmm_inject_ud(vcpu);
update_rip = 0;
break;
+ case SVM_VMEXIT_EFER_WRITE_TRAP:
+ case SVM_VMEXIT_CR0_WRITE_TRAP:
+ case SVM_VMEXIT_CR4_WRITE_TRAP:
+ case SVM_VMEXIT_CR8_WRITE_TRAP:
+ ret = svm_handle_efercr(vcpu, exit_reason);
+ update_rip = 0;
+ break;
+ case SVM_VMEXIT_VMGEXIT:
+ ret = svm_handle_gexit(vcpu);
+ break;
default:
DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
exit_reason, (uint64_t)vcpu->vc_control_pa);
@@ -4223,6 +4369,328 @@ svm_handle_exit(struct vcpu *vcpu)
return (ret);
}
+/*
+ * sync guest ghcb -> host vmcb/vcpu
+ */
+int
+svm_gexit_sync_host(struct vcpu *vcpu)
+{
+ struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
+ struct ghcb_sa *ghcb;
+ uint64_t svm_sw_exitcode;
+ uint8_t *valid_bm, expected_bm[0x10];
+
+ if (!vcpu->vc_seves)
+ return (0);
+
+ if (vcpu->vc_svm_ghcb_va == 0) {
+ printf("%s: GHCB not set\n", __func__);
+ return (0);
+ }
+ ghcb = (struct ghcb_sa *)vcpu->vc_svm_ghcb_va;
+
+ if (!ghcb_valid(ghcb))
+ return (EINVAL);
+
+ valid_bm = ghcb->valid_bitmap;
+
+ /* Always required. */
+ memset(expected_bm, 0, sizeof(expected_bm));
+ ghcb_valbm_set(expected_bm, GHCB_SW_EXITCODE);
+ ghcb_valbm_set(expected_bm, GHCB_SW_EXITINFO1);
+ ghcb_valbm_set(expected_bm, GHCB_SW_EXITINFO2);
+
+ svm_sw_exitcode = ghcb->v_sw_exitcode;
+ switch (svm_sw_exitcode) {
+ case SVM_VMEXIT_CPUID:
+ ghcb_valbm_set(expected_bm, GHCB_RAX);
+ ghcb_valbm_set(expected_bm, GHCB_RCX);
+ break;
+ case SVM_VMEXIT_MSR:
+ if (ghcb->v_sw_exitinfo1 == 1) {
+ /* WRMSR */
+ ghcb_valbm_set(expected_bm, GHCB_RAX);
+ ghcb_valbm_set(expected_bm, GHCB_RCX);
+ ghcb_valbm_set(expected_bm, GHCB_RDX);
+ } else {
+ /* RDMSR */
+ ghcb_valbm_set(expected_bm, GHCB_RCX);
+ }
+ break;
+ case SVM_VMEXIT_IOIO:
+ if (ghcb->v_sw_exitinfo1 & 0x1) {
+ /* in instruction, no registers used */
+ } else {
+ /* out instruction */
+ ghcb_valbm_set(expected_bm, GHCB_RAX);
+ }
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ if (ghcb_verify_bm(valid_bm, expected_bm) != 0)
+ return (EINVAL);
+
+ /* Always required */
+ vmcb->v_exitcode = vcpu->vc_gueststate.vg_exit_reason =
+ ghcb->v_sw_exitcode;
+ vmcb->v_exitinfo1 = ghcb->v_sw_exitinfo1;
+ vmcb->v_exitinfo2 = ghcb->v_sw_exitinfo2;
+
+ if (ghcb_valbm_isset(expected_bm, GHCB_RAX))
+ vmcb->v_rax = vcpu->vc_gueststate.vg_rax = ghcb->v_rax;
+ if (ghcb_valbm_isset(expected_bm, GHCB_RBX))
+ vcpu->vc_gueststate.vg_rbx = ghcb->v_rbx;
+ if (ghcb_valbm_isset(expected_bm, GHCB_RCX))
+ vcpu->vc_gueststate.vg_rcx = ghcb->v_rcx;
+ if (ghcb_valbm_isset(expected_bm, GHCB_RDX))
+ vcpu->vc_gueststate.vg_rdx = ghcb->v_rdx;
+
+ return (0);
+}
+
+/*
+ * sync host vmcb/vcpu -> guest ghcb
+ */
+int
+svm_gexit_sync_guest(struct vcpu *vcpu)
+{
+ uint64_t svm_sw_exitcode;
+ uint64_t svm_sw_exitinfo1, svm_sw_exitinfo2;
+ uint8_t *valid_bm;
+ struct ghcb_sa *ghcb;
+
+ if (!vcpu->vc_seves)
+ return (0);
+
+ if (vcpu->vc_svm_ghcb_va == 0)
+ return (0);
+
+ ghcb = (struct ghcb_sa *)vcpu->vc_svm_ghcb_va;
+ svm_sw_exitcode = ghcb->v_sw_exitcode;
+ svm_sw_exitinfo1 = ghcb->v_sw_exitinfo1;
+ svm_sw_exitinfo2 = ghcb->v_sw_exitinfo2;
+ ghcb_clear(ghcb);
+ valid_bm = ghcb->valid_bitmap;
+
+ switch (svm_sw_exitcode) {
+ case SVM_VMEXIT_CPUID:
+ ghcb_valbm_set(valid_bm, GHCB_RAX);
+ ghcb_valbm_set(valid_bm, GHCB_RBX);
+ ghcb_valbm_set(valid_bm, GHCB_RCX);
+ ghcb_valbm_set(valid_bm, GHCB_RDX);
+ break;
+ case SVM_VMEXIT_MSR:
+ if (svm_sw_exitinfo1 == 1) {
+ /* WRMSR -- nothing to return */
+ } else {
+ /* RDMSR */
+ ghcb_valbm_set(valid_bm, GHCB_RAX);
+ ghcb_valbm_set(valid_bm, GHCB_RDX);
+ }
+ break;
+ case SVM_VMEXIT_IOIO:
+ if (svm_sw_exitinfo1 & 0x1) {
+ /* IN */
+ ghcb_valbm_set(valid_bm, GHCB_RAX);
+ } else {
+ /* OUT -- nothing to return */
+ }
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ /* Always required */
+ svm_sw_exitinfo1 = 0;
+ svm_sw_exitinfo2 = 0;
+ ghcb_valbm_set(valid_bm, GHCB_SW_EXITINFO1);
+ ghcb_valbm_set(valid_bm, GHCB_SW_EXITINFO2);
+
+ if (ghcb_valbm_isset(valid_bm, GHCB_RAX))
+ ghcb->v_rax = vcpu->vc_gueststate.vg_rax;
+ if (ghcb_valbm_isset(valid_bm, GHCB_RBX))
+ ghcb->v_rbx = vcpu->vc_gueststate.vg_rbx;
+ if (ghcb_valbm_isset(valid_bm, GHCB_RCX))
+ ghcb->v_rcx = vcpu->vc_gueststate.vg_rcx;
+ if (ghcb_valbm_isset(valid_bm, GHCB_RDX))
+ ghcb->v_rdx = vcpu->vc_gueststate.vg_rdx;
+
+ if (ghcb_valbm_isset(valid_bm, GHCB_SW_EXITINFO1))
+ ghcb->v_sw_exitinfo1 = svm_sw_exitinfo1;
+ if (ghcb_valbm_isset(valid_bm, GHCB_SW_EXITINFO2))
+ ghcb->v_sw_exitinfo2 = svm_sw_exitinfo2;
+
+ return (0);
+}
+
+/*
+ * svm_handle_gexit
+ *
+ * Handle exits initiated by the guest due to #VC exceptions generated
+ * when SEV-ES is enabled.
+ */
+int
+svm_handle_gexit(struct vcpu *vcpu)
+{
+ struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
+ struct vm *vm = vcpu->vc_parent;
+ struct ghcb_sa *ghcb;
+ paddr_t ghcb_gpa, ghcb_hpa;
+ uint32_t req, resp;
+ uint64_t result;
+ int syncout, error = 0;
+
+ if (vcpu->vc_svm_ghcb_va == 0 && (vmcb->v_ghcb_gpa & ~PG_FRAME) == 0 &&
+ (vmcb->v_ghcb_gpa & PG_FRAME) != 0) {
+ /*
+ * Guest provides a valid guest physcial address
+ * for GHCB and it is not set yet -> assign it.
+ *
+ * We only accept a GHCB once; we decline re-definition.
+ */
+ ghcb_gpa = vmcb->v_ghcb_gpa & PG_FRAME;
+ if (!pmap_extract(vm->vm_map->pmap, ghcb_gpa, &ghcb_hpa))
+ return (EINVAL);
+ vcpu->vc_svm_ghcb_va = (vaddr_t)PMAP_DIRECT_MAP(ghcb_hpa);
+ } else if ((vmcb->v_ghcb_gpa & ~PG_FRAME) != 0) {
+ /*
+ * Low bits in use, thus must be a MSR protocol
+ * request.
+ */
+ req = (vmcb->v_ghcb_gpa & 0xffffffff);
+
+ /* We only support cpuid and terminate. */
+ if ((req & ~PG_FRAME) == MSR_PROTO_TERMINATE) {
+ DPRINTF("%s: guest requests termination\n", __func__);
+ return (1);
+ } else if ((req & ~PG_FRAME) != MSR_PROTO_CPUID_REQ)
+ return (EINVAL);
+
+ /* Emulate CPUID */
+ vmcb->v_exitcode = SVM_VMEXIT_CPUID;
+ vmcb->v_rax = vmcb->v_ghcb_gpa >> 32;
+ vcpu->vc_gueststate.vg_rax = 0;
+ vcpu->vc_gueststate.vg_rbx = 0;
+ vcpu->vc_gueststate.vg_rcx = 0;
+ vcpu->vc_gueststate.vg_rdx = 0;
+ error = vmm_handle_cpuid(vcpu);
+ if (error)
+ goto out;
+
+ switch (req >> 30) {
+ case 0: /* eax: emulate cpuid and return eax */
+ result = vmcb->v_rax;
+ break;
+ case 1: /* return ebx */
+ result = vcpu->vc_gueststate.vg_rbx;
+ break;
+ case 2: /* return ecx */
+ result = vcpu->vc_gueststate.vg_rcx;
+ break;
+ case 3: /* return edx */
+ result = vcpu->vc_gueststate.vg_rdx;
+ break;
+ default:
+ DPRINTF("%s: unknown request 0x%x\n", __func__, req);
+ return (EINVAL);
+ }
+
+ /* build response */
+ resp = MSR_PROTO_CPUID_RESP | (req & 0xc0000000);
+ vmcb->v_ghcb_gpa = (result << 32) | resp;
+
+ return (0);
+ }
+
+ /* Verify GHCB and synchronize guest state information. */
+ ghcb = (struct ghcb_sa *)vcpu->vc_svm_ghcb_va;
+ if (svm_gexit_sync_host(vcpu)) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /* Handle GHCB protocol */
+ syncout = 0;
+ switch (vmcb->v_exitcode) {
+ case SVM_VMEXIT_CPUID:
+ error = vmm_handle_cpuid(vcpu);
+ vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
+ vcpu->vc_gueststate.vg_rax = vmcb->v_rax;
+ syncout = 1;
+ break;
+ case SVM_VMEXIT_IOIO:
+ if (svm_handle_inout(vcpu) == 0)
+ error = EAGAIN;
+ break;
+ case SVM_VMEXIT_MSR:
+ error = svm_handle_msr(vcpu);
+ vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
+ syncout = 1;
+ break;
+ default:
+ DPRINTF("%s: unknown exit 0x%llx\n", __func__,
+ vmcb->v_exitcode);
+ error = EINVAL;
+ }
+
+ if (syncout)
+ error = svm_gexit_sync_guest(vcpu);
+
+out:
+ return (error);
+}
+
+/*
+ * svm_handle_efercr
+ *
+ * With SEV-ES the hypervisor can not intercept and modify writes
+ * to CR and EFER. However, a post write intercept notifies about
+ * the new state of these registers.
+ */
+int
+svm_handle_efercr(struct vcpu *vcpu, uint64_t exit_reason)
+{
+ struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
+
+ switch (exit_reason) {
+ case SVM_VMEXIT_EFER_WRITE_TRAP:
+ vmcb->v_efer = vmcb->v_exitinfo1;
+ break;
+ case SVM_VMEXIT_CR0_WRITE_TRAP:
+ vmcb->v_cr0 = vmcb->v_exitinfo1;
+ break;
+ case SVM_VMEXIT_CR4_WRITE_TRAP:
+ vmcb->v_cr4 = vmcb->v_exitinfo1;
+ break;
+ case SVM_VMEXIT_CR8_WRITE_TRAP:
+ /* XXX hshoexer: no state for CR8? */
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * svm_get_iflag
+ *
+ * With SEV-ES the hypervisor has no access to the flags register.
+ * Only the the state of the PSL_I is proivded by v_intr_shadow in
+ * the VMCB.
+ */
+int
+svm_get_iflag(struct vcpu *vcpu, uint64_t rflags)
+{
+ struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
+
+ if (vcpu->vc_seves)
+ return (vmcb->v_intr_shadow & SMV_GUEST_INTR_MASK);
+ return (rflags & PSL_I);
+}
+
/*
* vmx_handle_exit
*
@@ -6104,6 +6572,8 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
vcpu->vc_gueststate.vg_rip =
vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP];
vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
+ if (svm_gexit_sync_guest(vcpu))
+ return (EINVAL);
break;
case SVM_VMEXIT_NPF:
ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_GPRS,
@@ -6235,8 +6705,13 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
KASSERT(vmcb->v_intercept1 & SVM_INTERCEPT_INTR);
wrmsr(MSR_AMD_VM_HSAVE_PA, vcpu->vc_svm_hsa_pa);
- ret = svm_enter_guest(vcpu->vc_control_pa,
- &vcpu->vc_gueststate, &gdt);
+ if (vcpu->vc_seves) {
+ ret = svm_seves_enter_guest(vcpu->vc_control_pa,
+ vcpu->vc_svm_hsa_va + SVM_HSA_OFFSET, &gdt);
+ } else {
+ ret = svm_enter_guest(vcpu->vc_control_pa,
+ &vcpu->vc_gueststate, &gdt);
+ }
/* Restore host PKRU state. */
if (vmm_softc->sc_md.pkru_enabled) {
@@ -6248,6 +6723,8 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
* On exit, interrupts are disabled, and we are running with
* the guest FPU state still possibly on the CPU. Save the FPU
* state before re-enabling interrupts.
+ *
+ * XXX hshoexer: With SEV-ES we should be able to skip this.
*/
vmm_fpusave(vcpu);
@@ -6275,7 +6752,7 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
*/
ret = svm_handle_exit(vcpu);
- if (vcpu->vc_gueststate.vg_rflags & PSL_I)
+ if (svm_get_iflag(vcpu, vcpu->vc_gueststate.vg_rflags))
vcpu->vc_irqready = 1;
else
vcpu->vc_irqready = 0;
@@ -6325,27 +6802,32 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
}
/*
- * vmm_alloc_vpid
+ * _vmm_alloc_vpid
*
* Sets the memory location pointed to by "vpid" to the next available VPID
- * or ASID.
+ * or ASID. For SEV-ES consider minimum ASID value for non-ES enabled guests.
*
* Parameters:
* vpid: Pointer to location to receive the next VPID/ASID
+ * vcpu: Pointer to VCPU data structure
*
* Return Values:
* 0: The operation completed successfully
* ENOMEM: No VPIDs/ASIDs were available. Content of 'vpid' is unchanged.
*/
int
-vmm_alloc_vpid(uint16_t *vpid)
+_vmm_alloc_vpid(uint16_t *vpid, struct vcpu *vcpu)
{
- uint16_t i;
+ uint16_t i, minasid;
uint8_t idx, bit;
struct vmm_softc *sc = vmm_softc;
rw_enter_write(&vmm_softc->vpid_lock);
- for (i = 1; i <= sc->max_vpid; i++) {
+ if (vcpu == NULL || vcpu->vc_seves)
+ minasid = 1;
+ else
+ minasid = amd64_min_noes_asid;
+ for (i = minasid; i <= sc->max_vpid; i++) {
idx = i / 8;
bit = i - (idx * 8);
@@ -6367,6 +6849,18 @@ vmm_alloc_vpid(uint16_t *vpid)
return ENOMEM;
}
+int
+vmm_alloc_vpid(uint16_t *vpid)
+{
+ return _vmm_alloc_vpid(vpid, NULL);
+}
+
+int
+vmm_alloc_asid(uint16_t *asid, struct vcpu *vcpu)
+{
+ return _vmm_alloc_vpid(asid, vcpu);
+}
+
/*
* vmm_free_vpid
*
@@ -6779,6 +7273,36 @@ vcpu_state_decode(u_int state)
}
}
+/*
+ * svm_get_vmsa
+ *
+ * Return physical address of VMSA for specified VCPU.
+ */
+int
+svm_get_vmsa(uint32_t vmid, uint32_t vcpuid, uint64_t *vmsapa)
+{
+ struct vm *vm;
+ struct vcpu *vcpu;
+ int error, ret = 0;
+
+ error = vm_find(vmid, &vm);
+ if (error)
+ return (error);
+
+ vcpu = vm_find_vcpu(vm, vcpuid);
+ if (vcpu == NULL) {
+ ret = ENOENT;
+ goto out;
+ }
+
+ if (vmsapa)
+ *vmsapa = vcpu->vc_svm_vmsa_pa;
+
+out:
+ refcnt_rele_wake(&vm->vm_refcnt);
+ return (ret);
+}
+
#ifdef VMM_DEBUG
/*
* dump_vcpu
diff --git a/sys/arch/amd64/amd64/vmm_support.S b/sys/arch/amd64/amd64/vmm_support.S
index 30c1b75834f..f9f663cff62 100644
--- a/sys/arch/amd64/amd64/vmm_support.S
+++ b/sys/arch/amd64/amd64/vmm_support.S
@@ -42,6 +42,7 @@
.global vmx_enter_guest
.global vmm_dispatch_intr
.global svm_enter_guest
+ .global svm_seves_enter_guest
.text
.code64
@@ -662,3 +663,163 @@ restore_host_svm:
ret
lfence
END(svm_enter_guest)
+
+/*
+ * When using SEV-ES we have to save some of the host registers to
+ * the host state save area (HSA). According to the AMD Programmer's
+ * Manual Volume 2 Appendix B the HSA has the same layout as the guest
+ * save area (VMSA) except that it starts at offset 0x400 in the HSA
+ * page.
+ */
+ENTRY(svm_seves_enter_guest)
+ RETGUARD_SETUP(svm_seves_enter_guest, r11)
+ clgi
+ movq %rdi, %r8
+ pushfq
+
+ pushq %rdx /* gdt pointer */
+
+ /*
+ * Save (possibly) lazy-switched selectors
+ */
+ strw %ax
+ pushw %ax
+ movw %es, %ax
+ pushw %ax
+ movw %ds, %ax
+ pushw %ax
+ movw %ss, %ax
+ pushw %ax
+
+ movq $MSR_FSBASE, %rcx
+ rdmsr
+ pushq %rax
+ pushq %rdx
+ pushw %fs
+ movq $MSR_GSBASE, %rcx
+ rdmsr
+ pushq %rax
+ pushq %rdx
+ pushw %gs
+ movq $MSR_KERNELGSBASE, %rcx
+ rdmsr
+ pushq %rax
+ pushq %rdx
+
+ /*
+ * Save various MSRs
+ */
+ movq $MSR_STAR, %rcx
+ rdmsr
+ pushq %rax
+ pushq %rdx
+
+ movq $MSR_LSTAR, %rcx
+ rdmsr
+ pushq %rax
+ pushq %rdx
+
+ movq $MSR_SFMASK, %rcx
+ rdmsr
+ pushq %rax
+ pushq %rdx
+
+ RETGUARD_PUSH(r11)
+
+ /*
+ * Preserve callee-preserved registers as per AMD64 ABI in
+ * HSA. Although all registers will be restored from HSA
+ * on vmexit, these will not be saved on vmrun.
+ */
+ movq %r15, 0x378(%rsi)
+ movq %r14, 0x370(%rsi)
+ movq %r13, 0x368(%rsi)
+ movq %r12, 0x360(%rsi)
+ movq %rbp, 0x328(%rsi)
+ movq %rbx, 0x318(%rsi)
+
+ movq %r8, %rax /* rax = vmcb pa */
+
+ vmrun %rax
+
+ /* %rdi = 0 means we took an exit */
+ xorq %rdi, %rdi
+
+ RETGUARD_POP(r11)
+
+ /*
+ * Restore saved MSRs
+ */
+ popq %rdx
+ popq %rax
+ movq $MSR_SFMASK, %rcx
+ wrmsr
+
+ /* make sure guest doesn't bleed into host */
+ xorl %edx, %edx
+ xorl %eax, %eax
+ movq $MSR_CSTAR, %rcx
+ wrmsr
+
+ popq %rdx
+ popq %rax
+ movq $MSR_LSTAR, %rcx
+ wrmsr
+
+ popq %rdx
+ popq %rax
+ movq $MSR_STAR, %rcx
+ wrmsr
+
+ /*
+ * popw %gs will reset gsbase to 0, so preserve it
+ * first. This is to accommodate possibly lazy-switched
+ * selectors from above
+ */
+ cli
+ popq %rdx
+ popq %rax
+ movq $MSR_KERNELGSBASE, %rcx
+ wrmsr
+
+ popw %gs
+ popq %rdx
+ popq %rax
+ movq $MSR_GSBASE, %rcx
+ wrmsr
+
+ popw %fs
+ popq %rdx
+ popq %rax
+ movq $MSR_FSBASE, %rcx
+ wrmsr
+
+ popw %ax
+ movw %ax, %ss
+ popw %ax
+ movw %ax, %ds
+ popw %ax
+ movw %ax, %es
+
+ xorq %rax, %rax
+ lldtw %ax /* Host LDT is always 0 */
+
+ popw %ax /* ax = saved TR */
+
+ popq %rdx
+ addq $0x2, %rdx
+ movq (%rdx), %rdx
+
+ /* rdx = GDTR base addr */
+ andb $0xF9, 5(%rdx, %rax)
+
+ ltrw %ax
+
+ popfq
+
+ movq %rdi, %rax
+
+ RETGUARD_CHECK(svm_seves_enter_guest, r11)
+ ret
+ lfence
+END(svm_seves_enter_guest)
diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64
index 9b3aec4998c..081a21c3718 100644
--- a/sys/arch/amd64/conf/files.amd64
+++ b/sys/arch/amd64/conf/files.amd64
@@ -29,6 +29,7 @@ file arch/amd64/amd64/fpu.c
file arch/amd64/amd64/softintr.c
file arch/amd64/amd64/i8259.c
file arch/amd64/amd64/cacheinfo.c
+file arch/amd64/amd64/ghcb.c
file arch/amd64/amd64/vector.S
file arch/amd64/amd64/copy.S
file arch/amd64/amd64/spl.S
diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
index fc560c0ffce..5f66dd2c6f7 100644
--- a/sys/arch/amd64/include/cpu.h
+++ b/sys/arch/amd64/include/cpu.h
@@ -501,7 +501,8 @@ void mp_setperf_init(void);
#define CPU_INVARIANTTSC 17 /* has invariant TSC */
#define CPU_PWRACTION 18 /* action caused by power button */
#define CPU_RETPOLINE 19 /* cpu requires retpoline pattern */
-#define CPU_MAXID 20 /* number of valid machdep ids */
+#define CPU_SEVGUESTMODE 20 /* running as SEV guest */
+#define CPU_MAXID 21 /* number of valid machdep ids */
#define CTL_MACHDEP_NAMES { \
{ 0, 0 }, \
@@ -524,6 +525,7 @@ void mp_setperf_init(void);
{ "invarianttsc", CTLTYPE_INT }, \
{ "pwraction", CTLTYPE_INT }, \
{ "retpoline", CTLTYPE_INT }, \
+ { "sevguestmode", CTLTYPE_INT}, \
}
#endif /* !_MACHINE_CPU_H_ */
diff --git a/sys/arch/amd64/include/cpufunc.h b/sys/arch/amd64/include/cpufunc.h
index 6c09c256489..42953ef7c7d 100644
--- a/sys/arch/amd64/include/cpufunc.h
+++ b/sys/arch/amd64/include/cpufunc.h
@@ -420,6 +420,27 @@ breakpoint(void)
__asm volatile("int $3");
}
+/* VMGEXIT */
+static __inline void
+vmgexit(void)
+{
+ __asm volatile("rep; vmmcall");
+}
+
+/* Request VM termination from hypervisor. */
+static __inline void
+vmterminate(void)
+{
+ __asm volatile(
+ " movl $MSRPROTO_TERM_REQ, %%rdx ;"
+ " movl $MSR_SEV_GHCB, %%rcx ;"
+ " wrmsr ;"
+ " rep vmmcall ;"
+ "1: hlt ;"
+ " jmp 1b ;"
+ : :);
+}
+
void amd64_errata(struct cpu_info *);
void cpu_ucode_setup(void);
void cpu_ucode_apply(struct cpu_info *);
diff --git a/sys/arch/amd64/include/ghcb.h b/sys/arch/amd64/include/ghcb.h
new file mode 100644
index 00000000000..3d4b92188f4
--- /dev/null
+++ b/sys/arch/amd64/include/ghcb.h
@@ -0,0 +1,120 @@
+/* $OpenBSD:$ */
+
+/*
+ * Copyright (c) 2024, 2025 Hans-Joerg Hoexer <hshoexer@genua.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _MACHINE_GHCB_H_
+#define _MACHINE_GHCB_H_
+
+#define GHCB_OFFSET(m) ((m) / 8)
+#define GHCB_IDX(m) (GHCB_OFFSET((m)) / 8)
+#define GHCB_BIT(m) (GHCB_OFFSET((m)) % 8)
+
+#define GHCB_RAX 0x1F8
+#define GHCB_RBX 0x318
+#define GHCB_RCX 0x308
+#define GHCB_RDX 0x310
+#define GHCB_SW_EXITCODE 0x390
+#define GHCB_SW_EXITINFO1 0x398
+#define GHCB_SW_EXITINFO2 0x3A0
+
+#define GHCB_MAX 0xFFF
+
+struct ghcb_sa {
+ uint8_t v_pad0[0xcb]; /* 000h-0CAh */
+ uint8_t v_cpl; /* 0CBh */
+ uint8_t v_pad1[0x74]; /* 0CCh-13Fh */
+ uint64_t v_xss; /* 140h */
+ uint8_t v_pad2[0x18]; /* 148h-15Fh */
+ uint64_t v_dr7; /* 160h */
+ uint8_t v_pad3[0x10]; /* 168h-177h */
+ uint64_t v_rip; /* 178h */
+ uint8_t v_pad4[0x58]; /* 180h-1D7h */
+ uint64_t v_rsp; /* 1D8h */
+ uint8_t v_pad5[0x18]; /* 1E0h-1F7h */
+ uint64_t v_rax; /* 1F8h */
+ uint8_t v_pad6[0x108]; /* 200h-307h */
+ uint64_t v_rcx; /* 308h */
+ uint64_t v_rdx; /* 310h */
+ uint64_t v_rbx; /* 318h */
+ uint8_t v_pad7[0x8]; /* 320h-327h */
+ uint64_t v_rbp; /* 328h */
+ uint64_t v_rsi; /* 330h */
+ uint64_t v_rdi; /* 338h */
+ uint64_t v_r8; /* 340h */
+ uint64_t v_r9; /* 348h */
+ uint64_t v_r10; /* 350h */
+ uint64_t v_r11; /* 358h */
+ uint64_t v_r12; /* 360h */
+ uint64_t v_r13; /* 368h */
+ uint64_t v_r14; /* 370h */
+ uint64_t v_r15; /* 378h */
+ uint8_t v_pad8[0x10]; /* 380h-38Fh */
+ uint64_t v_sw_exitcode; /* 390h */
+ uint64_t v_sw_exitinfo1; /* 398h */
+ uint64_t v_sw_exitinfo2; /* 3a0h */
+ uint64_t v_sw_scratch; /* 3a8h */
+ uint8_t v_pad9[0x38]; /* 3B0h-3E7h */
+ uint64_t v_xcr0; /* 3E8h */
+#define GHCB_VB_SZ 0x10
+ uint8_t valid_bitmap[GHCB_VB_SZ];
+ /* 3F0h-3FFh */
+ uint64_t v_x87_state_gpa; /* 400h */
+ uint8_t v_pad10[0x3f8]; /* 408h-7FFh */
+ uint8_t v_sharedbuf[0x7f0]; /* 800h-FEFh */
+ uint8_t v_pad11[0xa]; /* FF0h-FF9h */
+ uint16_t v_ghcb_proto_version; /* FFAh-FFBh */
+ uint32_t v_ghcb_usage; /* FFCh-FFFh */
+};
+
+
+#define GHCB_SZ8 0
+#define GHCB_SZ16 1
+#define GHCB_SZ32 2
+#define GHCB_SZ64 3
+
+struct ghcb_sync {
+ uint8_t valid_bitmap[GHCB_VB_SZ];
+
+ int sz_a;
+ int sz_b;
+ int sz_c;
+ int sz_d;
+};
+
+
+/* Definitions used with the MSR protocol */
+#define MSR_PROTO_CPUID_REQ 0x4
+#define MSR_PROTO_CPUID_RESP 0x5
+#define MSR_PROTO_TERMINATE 0x100
+
+
+extern vaddr_t ghcb_vaddr;
+extern paddr_t ghcb_paddr;
+
+void ghcb_clear(struct ghcb_sa *);
+int ghcb_valbm_set(uint8_t *, int);
+int ghcb_valbm_isset(uint8_t *, int);
+int ghcb_valid(struct ghcb_sa *);
+int ghcb_verify_bm(uint8_t *, uint8_t *);
+int ghcb_verify_bm_guest(uint8_t *, uint8_t *);
+
+void ghcb_sync_val(int, int, struct ghcb_sync *);
+void ghcb_sync_out(struct trapframe *, uint64_t, uint64_t, uint64_t,
+ struct ghcb_sa *, struct ghcb_sync *);
+void ghcb_sync_in(struct trapframe *, struct ghcb_sa *, struct ghcb_sync *);
+
+#endif /* !_MACHINE_GHCB_H_ */
diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h
index 4208e2e13a1..a82acb4062f 100644
--- a/sys/arch/amd64/include/specialreg.h
+++ b/sys/arch/amd64/include/specialreg.h
@@ -723,8 +723,13 @@
#define NB_CFG_DISIOREQLOCK 0x0000000000000004ULL
#define NB_CFG_DISDATMSK 0x0000001000000000ULL
+#define MSR_SEV_GHCB 0xc0010130
+#define SEV_CPUID_REQ 0x00000004
+#define SEV_CPUID_RESP 0x00000005
+
#define MSR_SEV_STATUS 0xc0010131
#define SEV_STAT_ENABLED 0x00000001
+#define SEV_STAT_ES_ENABLED 0x00000002
#define MSR_LS_CFG 0xc0011020
#define LS_CFG_DIS_LS2_SQUISH 0x02000000
diff --git a/sys/arch/amd64/include/trap.h b/sys/arch/amd64/include/trap.h
index fa322ba9566..7506dddf804 100644
--- a/sys/arch/amd64/include/trap.h
+++ b/sys/arch/amd64/include/trap.h
@@ -62,3 +62,4 @@
#define T_XMM 19 /* SSE FP exception */
#define T_VE 20 /* virtualization exception */
#define T_CP 21 /* control protection exception */
+#define T_VC 29 /* VMM communication exception */
diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
index cd04fe4f07b..a32f9080ab6 100644
--- a/sys/arch/amd64/include/vmmvar.h
+++ b/sys/arch/amd64/include/vmmvar.h
@@ -625,6 +625,11 @@ struct vmcb_segment {
#define SVM_ENABLE_NP (1ULL << 0)
#define SVM_ENABLE_SEV (1ULL << 1)
+#define SVM_SEVES_ENABLE (1ULL << 2)
+
+#define SMV_GUEST_INTR_MASK (1ULL << 1)
+
+#define SVM_LBRVIRT_ENABLE (1ULL << 0)
struct vmcb {
union {
@@ -656,7 +661,7 @@ struct vmcb {
uint64_t v_exitintinfo; /* 088h */
uint64_t v_np_enable; /* 090h */
uint64_t v_avic_apic_bar; /* 098h */
- uint64_t v_pad4; /* 0A0h */
+ uint64_t v_ghcb_gpa; /* 0A0h */
uint64_t v_eventinj; /* 0A8h */
uint64_t v_n_cr3; /* 0B0h */
uint64_t v_lbr_virt_enable; /* 0B8h */
@@ -668,6 +673,8 @@ struct vmcb {
uint64_t v_pad5; /* 0E8h-0EFh */
uint64_t v_avic_logical_table; /* 0F0h */
uint64_t v_avic_phys; /* 0F8h */
+ uint64_t v_pad12; /* 100h */
+ uint64_t v_vmsa_pa; /* 108h */
};
uint8_t vmcb_control[0x400];
@@ -723,6 +730,135 @@ struct vmcb {
};
};
+struct vmsa {
+ struct vmcb_segment v_es; /* 000h */
+ struct vmcb_segment v_cs; /* 010h */
+ struct vmcb_segment v_ss; /* 020h */
+ struct vmcb_segment v_ds; /* 030h */
+ struct vmcb_segment v_fs; /* 040h */
+ struct vmcb_segment v_gs; /* 050h */
+ struct vmcb_segment v_gdtr; /* 060h */
+ struct vmcb_segment v_ldtr; /* 070h */
+ struct vmcb_segment v_idtr; /* 080h */
+ struct vmcb_segment v_tr; /* 090h */
+ uint64_t v_pl0_ssp; /* 0A0h */
+ uint64_t v_pl1_ssp; /* 0A8h */
+ uint64_t v_pl2_ssp; /* 0B0h */
+ uint64_t v_pl3_ssp; /* 0B8h */
+ uint64_t v_u_cet; /* 0C0h */
+ uint8_t v_pad1[0x2]; /* 0C8h-0C9h */
+ uint8_t v_vmpl; /* 0CAh */
+ uint8_t v_cpl; /* 0CBh */
+ uint8_t v_pad2[0x4]; /* 0CCh-0CFh */
+ uint64_t v_efer; /* 0D0h */
+ uint8_t v_pad3[0x68]; /* 0D8h-13Fh */
+ uint64_t v_xss; /* 140h */
+ uint64_t v_cr4; /* 148h */
+ uint64_t v_cr3; /* 150h */
+ uint64_t v_cr0; /* 158h */
+ uint64_t v_dr7; /* 160h */
+ uint64_t v_dr6; /* 168h */
+ uint64_t v_rflags; /* 170h */
+ uint64_t v_rip; /* 178h */
+ uint64_t v_dr0; /* 180h */
+ uint64_t v_dr1; /* 188h */
+ uint64_t v_dr2; /* 190h */
+ uint64_t v_dr3; /* 198h */
+ uint64_t v_dr0_addr_msk; /* 1A0h */
+ uint64_t v_dr1_addr_msk; /* 1A8h */
+ uint64_t v_dr2_addr_msk; /* 1B0h */
+ uint64_t v_dr3_addr_msk; /* 1B8h */
+ uint8_t v_pad4[0x18]; /* 1C0h-1D7h */
+ uint64_t v_rsp; /* 1D8h */
+ uint64_t v_s_cet; /* 1E0h */
+ uint64_t v_ssp; /* 1E8h */
+ uint64_t v_isst_addr; /* 1F0h */
+ uint64_t v_rax; /* 1F8h */
+ uint64_t v_star; /* 200h */
+ uint64_t v_lstar; /* 208h */
+ uint64_t v_cstar; /* 210h */
+ uint64_t v_sfmask; /* 218h */
+ uint64_t v_kgsbase; /* 220h */
+ uint64_t v_sysenter_cs; /* 228h */
+ uint64_t v_sysenter_esp; /* 230h */
+ uint64_t v_sysenter_eip; /* 238h */
+ uint64_t v_cr2; /* 240h */
+ uint8_t v_pad5[0x20]; /* 248h-267h */
+ uint64_t v_g_pat; /* 268h */
+ uint64_t v_dbgctl; /* 270h */
+ uint64_t v_br_from; /* 278h */
+ uint64_t v_br_to; /* 280h */
+ uint64_t v_lastexcpfrom; /* 288h */
+ uint64_t v_lastexcpto; /* 290h */
+ uint8_t v_pad6[0x48]; /* 298h-2DFh */
+ uint8_t v_pad7[0x8]; /* 2E0h-2E7h */
+ uint32_t v_pkru; /* 2E8h */
+ uint32_t v_tsc_aux; /* 2ECh */
+ uint64_t v_gst_tsc_scale;/* 2F0h */
+ uint64_t v_gst_tsc_off; /* 2F8h */
+ uint64_t v_reg_prot_nce; /* 300h */
+ uint64_t v_rcx; /* 308h */
+ uint64_t v_rdx; /* 310h */
+ uint64_t v_rbx; /* 318h */
+ uint64_t v_pad8; /* 320h */
+ uint64_t v_rbp; /* 328h */
+ uint64_t v_rsi; /* 330h */
+ uint64_t v_rdi; /* 338h */
+ uint64_t v_r8; /* 340h */
+ uint64_t v_r9; /* 348h */
+ uint64_t v_r10; /* 350h */
+ uint64_t v_r11; /* 358h */
+ uint64_t v_r12; /* 360h */
+ uint64_t v_r13; /* 368h */
+ uint64_t v_r14; /* 370h */
+ uint64_t v_r15; /* 378h */
+ uint8_t v_pad9[0x10]; /* 380h-38Fh */
+ uint64_t v_gst_exitinfo1;/* 390h */
+ uint64_t v_gst_exitinfo2;/* 398h */
+ uint64_t v_gst_exitiinfo;/* 3A0h */
+ uint64_t v_gst_nrip; /* 3A8h */
+ uint64_t v_sev_features; /* 3B0h */
+ uint64_t v_intr_ctrl; /* 3B8h */
+ uint64_t v_gst_exitcode; /* 3C0h */
+ uint64_t v_virtual_tom; /* 3C8h */
+ uint64_t v_tlb_id; /* 3D0h */
+ uint64_t v_pcup_id; /* 3D8h */
+ uint64_t v_eventinj; /* 3E0h */
+ uint64_t v_xcr0; /* 3E8h */
+ uint8_t v_pad10[0x10]; /* 3F0h-3FFh */
+ uint64_t v_x87_dp; /* 400h */
+ uint32_t v_mxcsr; /* 408h */
+ uint16_t v_x87_ftw; /* 40Ch */
+ uint16_t v_x87_fsw; /* 40Eh */
+ uint16_t v_x87_fcw; /* 410h */
+ uint16_t v_x87_fop; /* 412h */
+ uint16_t v_x87_ds; /* 414h */
+ uint16_t v_x87_cs; /* 416h */
+ uint64_t v_x87_rip; /* 418h */
+ uint8_t v_fp_x87[0x50]; /* 420h-46Fh */
+ uint8_t v_fp_xmm[0x100];/* 470h-56Fh */
+ uint8_t v_fp_ymm[0x100];/* 570h-66fh */
+ uint8_t v_lbr_st[0x100];/* 670h-76Fh */
+ uint64_t v_lbr_select; /* 770h */
+ uint64_t v_ibs_fetch_ctl;/* 778h */
+ uint64_t v_ibs_fetch_la; /* 780h */
+ uint64_t v_ibs_op_ctl; /* 788h */
+ uint64_t v_ibs_op_rip; /* 790h */
+ uint64_t v_ibs_op_data; /* 798h */
+ uint64_t v_ibs_op_data2; /* 7A0h */
+ uint64_t v_ibs_op_data3; /* 7A8h */
+ uint64_t v_ibs_dc_la; /* 7B0h */
+ uint64_t v_ibstgt_rip; /* 7B8h */
+ uint64_t v_ic_ibs_xtd_ct;/* 7C0h */
+};
+
+/*
+ * With SEV-ES the host save are (HSA) has the same layout as the
+ * VMSA. However, it has the offset 0x400 into the HSA page.
+ * See AMD APM Vol 2, Appendix B.
+ */
+#define SVM_HSA_OFFSET 0x400
+
struct vmcs {
uint32_t vmcs_revision;
};
@@ -876,9 +1012,14 @@ struct vcpu {
/* SVM only (all requiring [v]) */
vaddr_t vc_svm_hsa_va;
paddr_t vc_svm_hsa_pa;
+ vaddr_t vc_svm_vmsa_va;
+ paddr_t vc_svm_vmsa_pa;
+ vaddr_t vc_svm_ghcb_va;
+ paddr_t vc_svm_ghcb_pa;
vaddr_t vc_svm_ioio_va;
paddr_t vc_svm_ioio_pa;
int vc_sev; /* [I] */
+ int vc_seves; /* [I] */
};
SLIST_HEAD(vcpu_head, vcpu);
@@ -896,6 +1037,7 @@ int invept(uint64_t, struct vmx_invept_descriptor *);
int vmx_enter_guest(paddr_t *, struct vcpu_gueststate *, int, uint8_t);
int svm_enter_guest(uint64_t, struct vcpu_gueststate *,
struct region_descriptor *);
+int svm_seves_enter_guest(uint64_t, vaddr_t, struct region_descriptor *);
void start_vmm_on_cpu(struct cpu_info *);
void stop_vmm_on_cpu(struct cpu_info *);
void vmclear_on_cpu(struct cpu_info *);
@@ -911,6 +1053,7 @@ int vcpu_init(struct vcpu *, struct vm_create_params *);
void vcpu_deinit(struct vcpu *);
int vm_rwregs(struct vm_rwregs_params *, int);
int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
+int svm_get_vmsa(uint32_t, uint32_t, uint64_t *);
#endif /* _KERNEL */
diff --git a/sys/dev/ic/psp.c b/sys/dev/ic/psp.c
index bd991481062..f5ee6a36d72 100644
--- a/sys/dev/ic/psp.c
+++ b/sys/dev/ic/psp.c
@@ -29,6 +29,7 @@
#include <uvm/uvm_extern.h>
#include <crypto/xform.h>
+#include <machine/vmmvar.h>
#include <dev/ic/ccpvar.h>
#include <dev/ic/pspvar.h>
@@ -537,6 +538,32 @@ out:
return (error);
}
+int
+psp_launch_update_vmsa(struct psp_softc *sc,
+ struct psp_launch_update_vmsa *uluv)
+{
+ struct psp_launch_update_vmsa *luvmsa;
+ int error;
+
+ luvmsa = (struct psp_launch_update_vmsa *)sc->sc_cmd_kva;
+ bzero(luvmsa, sizeof(*luvmsa));
+
+ luvmsa->handle = uluv->handle;
+ luvmsa->paddr = uluv->paddr;
+ luvmsa->length = PAGE_SIZE;
+
+ /* Drain caches before we encrypt the VMSA. */
+ wbinvd_on_all_cpus_acked();
+
+ error = ccp_docmd(sc, PSP_CMD_LAUNCH_UPDATE_VMSA,
+ sc->sc_cmd_map->dm_segs[0].ds_addr);
+
+ if (error != 0)
+ return (EIO);
+
+ return (0);
+}
+
int
psp_launch_measure(struct psp_softc *sc, struct psp_launch_measure *ulm)
{
@@ -633,6 +660,26 @@ psp_activate(struct psp_softc *sc, struct psp_activate *uact)
return (error);
}
+int
+psp_encrypt_state(struct psp_softc *sc, struct psp_encrypt_state *ues)
+{
+ struct psp_launch_update_vmsa luvmsa;
+ uint64_t vmsa_paddr;
+ int error;
+
+ error = svm_get_vmsa(ues->vmid, ues->vcpuid, &vmsa_paddr);
+ if (error != 0)
+ return (error);
+
+ bzero(&luvmsa, sizeof(luvmsa));
+ luvmsa.handle = ues->handle;
+ luvmsa.paddr = vmsa_paddr;
+
+ error = psp_launch_update_vmsa(sc, &luvmsa);
+
+ return (error);
+}
+
int
psp_deactivate(struct psp_softc *sc, struct psp_deactivate *udeact)
{
@@ -814,6 +861,10 @@ pspioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
error = psp_launch_update_data(sc,
(struct psp_launch_update_data *)data, p);
break;
+ case PSP_IOC_LAUNCH_UPDATE_VMSA:
+ error = psp_launch_update_vmsa(sc,
+ (struct psp_launch_update_vmsa *)data);
+ break;
case PSP_IOC_LAUNCH_MEASURE:
error = psp_launch_measure(sc,
(struct psp_launch_measure *)data);
@@ -838,6 +889,9 @@ pspioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
error = psp_snp_get_pstatus(sc,
(struct psp_snp_platform_status *)data);
break;
+ case PSP_IOC_ENCRYPT_STATE:
+ error = psp_encrypt_state(sc, (struct psp_encrypt_state *)data);
+ break;
default:
error = ENOTTY;
break;
@@ -862,6 +916,7 @@ pledge_ioctl_psp(struct proc *p, long com)
case PSP_IOC_LAUNCH_MEASURE:
case PSP_IOC_LAUNCH_FINISH:
case PSP_IOC_ACTIVATE:
+ case PSP_IOC_ENCRYPT_STATE:
case PSP_IOC_GUEST_SHUTDOWN:
return (0);
default:
diff --git a/sys/dev/ic/pspvar.h b/sys/dev/ic/pspvar.h
index fa36a3a90db..b3889d84fc3 100644
--- a/sys/dev/ic/pspvar.h
+++ b/sys/dev/ic/pspvar.h
@@ -86,6 +86,7 @@
#define PSP_CMD_GUESTSTATUS 0x23
#define PSP_CMD_LAUNCH_START 0x30
#define PSP_CMD_LAUNCH_UPDATE_DATA 0x31
+#define PSP_CMD_LAUNCH_UPDATE_VMSA 0x32
#define PSP_CMD_LAUNCH_MEASURE 0x33
#define PSP_CMD_LAUNCH_FINISH 0x35
#define PSP_CMD_ATTESTATION 0x36
@@ -133,6 +134,24 @@ struct psp_launch_update_data {
uint32_t length;
} __packed;
+struct psp_launch_update_vmsa {
+ /* Input parameters for PSP_CMD_LAUNCH_UPDATE_VMSA */
+ uint32_t handle;
+ uint32_t reserved;
+ uint64_t paddr;
+ uint32_t length;
+} __packed;
+
+struct psp_encrypt_state {
+ /*
+ * Input parameters state encryption
+ */
+ uint32_t handle;
+ uint32_t asid;
+ uint32_t vmid;
+ uint32_t vcpuid;
+} __packed;
+
struct psp_measure {
/* Output buffer for PSP_CMD_LAUNCH_MEASURE */
uint8_t measure[32];
@@ -258,6 +277,9 @@ struct psp_snp_platform_status {
#define PSP_IOC_SNP_GET_PSTATUS _IOR('P', 11, struct psp_snp_platform_status)
#define PSP_IOC_INIT _IO('P', 12)
#define PSP_IOC_SHUTDOWN _IO('P', 13)
+#define PSP_IOC_LAUNCH_UPDATE_VMSA \
+ _IOW('P', 14, struct psp_launch_update_vmsa)
+#define PSP_IOC_ENCRYPT_STATE _IOW('P', 254, struct psp_encrypt_state)
#define PSP_IOC_GUEST_SHUTDOWN _IOW('P', 255, struct psp_guest_shutdown)
#ifdef _KERNEL
diff --git a/sys/dev/vmm/vmm.c b/sys/dev/vmm/vmm.c
index 0a86ddbecd3..bdc93928123 100644
--- a/sys/dev/vmm/vmm.c
+++ b/sys/dev/vmm/vmm.c
@@ -401,6 +401,8 @@ vm_create(struct vm_create_params *vcp, struct proc *p)
vcpu->vc_parent = vm;
vcpu->vc_id = vm->vm_vcpu_ct;
vm->vm_vcpu_ct++;
+ vcpu->vc_sev = vcp->vcp_sev;
+ vcpu->vc_seves = vcp->vcp_seves;
if ((ret = vcpu_init(vcpu, vcp)) != 0) {
printf("failed to init vcpu %d for vm %p\n", i, vm);
vm_teardown(&vm);
diff --git a/sys/dev/vmm/vmm.h b/sys/dev/vmm/vmm.h
index ca5a152f550..886ff51760a 100644
--- a/sys/dev/vmm/vmm.h
+++ b/sys/dev/vmm/vmm.h
@@ -50,6 +50,7 @@ struct vm_create_params {
struct vm_mem_range vcp_memranges[VMM_MAX_MEM_RANGES];
char vcp_name[VMM_MAX_NAME_LEN];
int vcp_sev;
+ int vcp_seves;
/* Output parameter from VMM_IOC_CREATE */
uint32_t vcp_id;
diff --git a/usr.sbin/vmd/loadfile_elf.c b/usr.sbin/vmd/loadfile_elf.c
index 2b62ca07565..73c2010397d 100644
--- a/usr.sbin/vmd/loadfile_elf.c
+++ b/usr.sbin/vmd/loadfile_elf.c
@@ -110,7 +110,7 @@ union {
} hdr;
static void setsegment(struct mem_segment_descriptor *, uint32_t,
- size_t, int, int, int, int);
+ size_t, int, int, int, int, int);
static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int);
static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int);
static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *);
@@ -148,7 +148,7 @@ uint64_t pg_crypt = 0;
*/
static void
setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
- int type, int dpl, int def32, int gran)
+ int type, int dpl, int def32, int gran, int lm)
{
sd->sd_lolimit = (int)limit;
sd->sd_lobase = (int)base;
@@ -157,7 +157,7 @@ setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit,
sd->sd_p = 1;
sd->sd_hilimit = (int)limit >> 16;
sd->sd_avl = 0;
- sd->sd_long = 0;
+ sd->sd_long = lm;
sd->sd_def32 = def32;
sd->sd_gran = gran;
sd->sd_hibase = (int)base >> 24;
@@ -185,11 +185,13 @@ push_gdt(void)
* Create three segment descriptors:
*
* GDT[0] : null descriptor. "Created" via memset above.
- * GDT[1] (selector @ 0x8): Executable segment, for CS
+ * GDT[1] (selector @ 0x8): Executable segment (compat mode), for CS
* GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS
+ * GDT[3] (selector @ 0x18): Executable segment (long mode), for CS
*/
- setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1);
- setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1);
+ setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1, 0);
+ setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1, 0);
+ setsegment(&sd[3], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 0, 1, 1);
write_mem(GDT_PAGE, gdtpage, PAGE_SIZE);
sev_register_encryption(GDT_PAGE, PAGE_SIZE);
diff --git a/usr.sbin/vmd/parse.y b/usr.sbin/vmd/parse.y
index 3f6866eb592..a98d85c0f95 100644
--- a/usr.sbin/vmd/parse.y
+++ b/usr.sbin/vmd/parse.y
@@ -123,7 +123,7 @@ typedef struct {
%token FORMAT GROUP
%token INET6 INSTANCE INTERFACE LLADDR LOCAL LOCKED MEMORY NET NIFS OWNER
%token PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID STAGGERED START
-%token PARALLEL DELAY SEV
+%token PARALLEL DELAY SEV SEVES
%token <v.number> NUMBER
%token <v.string> STRING
%type <v.lladdr> lladdr
@@ -138,6 +138,7 @@ typedef struct {
%type <v.string> string
%type <v.string> vm_instance
%type <v.number> sev;
+%type <v.number> seves;
%%
@@ -415,6 +416,9 @@ vm_opts : disable {
| sev {
vcp->vcp_sev = 1;
}
+ | seves {
+ vcp->vcp_sev = vcp->vcp_seves = 1;
+ }
| DISK string image_format {
if (parse_disk($2, $3) != 0) {
yyerror("failed to parse disks: %s", $2);
@@ -761,6 +765,9 @@ disable : ENABLE { $$ = 0; }
sev : SEV { $$ = 1; }
;
+seves : SEVES { $$ = 1; }
+ ;
+
bootdevice : CDROM { $$ = VMBOOTDEV_CDROM; }
| DISK { $$ = VMBOOTDEV_DISK; }
| NET { $$ = VMBOOTDEV_NET; }
@@ -846,6 +853,7 @@ lookup(char *s)
{ "prefix", PREFIX },
{ "rdomain", RDOMAIN },
{ "sev", SEV },
+ { "seves", SEVES },
{ "size", SIZE },
{ "socket", SOCKET },
{ "staggered", STAGGERED },
diff --git a/usr.sbin/vmd/psp.c b/usr.sbin/vmd/psp.c
index 320da37dc99..da6c521f823 100644
--- a/usr.sbin/vmd/psp.c
+++ b/usr.sbin/vmd/psp.c
@@ -118,7 +118,7 @@ psp_get_gstate(uint32_t handle, uint32_t *policy, uint32_t *asid,
* Start the launch sequence of a guest.
*/
int
-psp_launch_start(uint32_t *handle)
+psp_launch_start(uint32_t *handle, int seves)
{
struct psp_launch_start ls;
@@ -128,6 +128,9 @@ psp_launch_start(uint32_t *handle)
ls.policy = (GPOL_NODBG | GPOL_NOKS | GPOL_NOSEND | GPOL_DOMAIN |
GPOL_SEV);
+ if (seves) /* Add ES */
+ ls.policy |= GPOL_ES;
+
if (ioctl(env->vmd_psp_fd, PSP_IOC_LAUNCH_START, &ls) < 0) {
log_warn("%s: ioctl", __func__);
return (-1);
@@ -170,6 +173,27 @@ psp_launch_update(uint32_t handle, vaddr_t v, size_t len)
* the PSP, the measurement is not really meaningful. Thus we just
* log it for now.
*/
+int
+psp_encrypt_state(uint32_t handle, uint32_t asid, uint32_t vmid,
+ uint32_t vcpuid)
+{
+ struct psp_encrypt_state es;
+
+ memset(&es, 0, sizeof(es));
+ es.handle = handle;
+ es.asid = asid;
+ es.vmid = vmid;
+ es.vcpuid = vcpuid;
+
+ if (ioctl(env->vmd_psp_fd, PSP_IOC_ENCRYPT_STATE, &es) < 0) {
+ log_warn("%s: ioctl", __func__);
+ return (-1);
+ }
+
+ return (0);
+}
+
+
int
psp_launch_measure(uint32_t handle)
{
diff --git a/usr.sbin/vmd/sev.c b/usr.sbin/vmd/sev.c
index d5216461392..c2add19bfd9 100644
--- a/usr.sbin/vmd/sev.c
+++ b/usr.sbin/vmd/sev.c
@@ -58,7 +58,7 @@ sev_init(struct vmd_vm *vm)
return (-1);
}
- if (psp_launch_start(&handle) < 0) {
+ if (psp_launch_start(&handle, vcp->vcp_seves) < 0) {
log_warnx("%s: launch failed", __func__);
return (-1);
}
@@ -148,7 +148,6 @@ sev_encrypt_memory(struct vmd_vm *vm)
struct vm_create_params *vcp = &vmc->vmc_params;
struct vm_mem_range *vmr;
size_t i;
- uint8_t gstate;
if (!vcp->vcp_sev)
return (0);
@@ -167,23 +166,6 @@ sev_encrypt_memory(struct vmd_vm *vm)
log_debug("%s: encrypted %zu:0x%lx size 0x%lx", __func__, i,
vmr->vmr_va, vmr->vmr_size);
}
- if (psp_launch_measure(vm->vm_sev_handle)) {
- log_warnx("%s: failed to launch measure", __func__);
- return (-1);
- }
- if (psp_launch_finish(vm->vm_sev_handle)) {
- log_warnx("%s: failed to launch finish", __func__);
- return (-1);
- }
-
- if (psp_get_gstate(vm->vm_sev_handle, NULL, NULL, &gstate)) {
- log_warnx("%s: failed to get guest state", __func__);
- return (-1);
- }
- if (gstate != PSP_GSTATE_RUNNING) {
- log_warnx("%s: invalid guest state: 0x%hx", __func__, gstate);
- return (-1);
- }
return (0);
}
@@ -222,6 +204,57 @@ sev_activate(struct vmd_vm *vm, int vcpu_id)
}
+int
+sev_encrypt_state(struct vmd_vm *vm, int vcpu_id)
+{
+ struct vmop_create_params *vmc = &vm->vm_params;
+ struct vm_create_params *vcp = &vmc->vmc_params;
+
+ if (!vcp->vcp_seves)
+ return (0);
+
+ if (psp_encrypt_state(vm->vm_sev_handle, vm->vm_sev_asid[vcpu_id],
+ vcp->vcp_id, vcpu_id)) {
+ log_warnx("%s: failed to encrypt statet: 0x%x 0x%x 0x%0x 0x%0x",
+ __func__, vm->vm_sev_handle, vm->vm_sev_asid[vcpu_id],
+ vm->vm_vmid, vcpu_id);
+ return (-1);
+ }
+
+ return (0);
+}
+
+int
+sev_launch_finalize(struct vmd_vm *vm)
+{
+ struct vmop_create_params *vmc = &vm->vm_params;
+ struct vm_create_params *vcp = &vmc->vmc_params;
+ uint8_t gstate;
+
+ if (!vcp->vcp_sev)
+ return (0);
+
+ if (psp_launch_measure(vm->vm_sev_handle)) {
+ log_warnx("%s: failed to launch measure", __func__);
+ return (-1);
+ }
+ if (psp_launch_finish(vm->vm_sev_handle)) {
+ log_warnx("%s: failed to launch finish", __func__);
+ return (-1);
+ }
+
+ if (psp_get_gstate(vm->vm_sev_handle, NULL, NULL, &gstate)) {
+ log_warnx("%s: failed to get guest state", __func__);
+ return (-1);
+ }
+ if (gstate != PSP_GSTATE_RUNNING) {
+ log_warnx("%s: invalid guest state: 0x%hx", __func__, gstate);
+ return (-1);
+ }
+
+ return (0);
+}
+
/*
* Deactivate and decommission a guest's SEV crypto state.
*/
diff --git a/usr.sbin/vmd/vm.c b/usr.sbin/vmd/vm.c
index e399c0c0439..a9b97387073 100644
--- a/usr.sbin/vmd/vm.c
+++ b/usr.sbin/vmd/vm.c
@@ -958,6 +958,18 @@ run_vm(struct vmop_create_params *vmc, struct vcpu_reg_state *vrs)
}
}
+ if (sev_encrypt_state(current_vm, i)) {
+ log_warnx("%s: state encryption failed for VCPU "
+ "%zu failed - exiting.", __progname, i);
+ return (EIO);
+ }
+
+ if (sev_launch_finalize(current_vm)) {
+ log_warnx("%s: state encryption failed for VCPU "
+ "%zu failed - exiting.", __progname, i);
+ return (EIO);
+ }
+
ret = pthread_cond_init(&vcpu_run_cond[i], NULL);
if (ret) {
log_warnx("%s: cannot initialize cond var (%d)",
diff --git a/usr.sbin/vmd/vmd.h b/usr.sbin/vmd/vmd.h
index ef98fa5a238..37e8a519494 100644
--- a/usr.sbin/vmd/vmd.h
+++ b/usr.sbin/vmd/vmd.h
@@ -587,8 +587,9 @@ __dead void vioblk_main(int, int);
int psp_get_pstate(uint16_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *);
int psp_df_flush(void);
int psp_get_gstate(uint32_t, uint32_t *, uint32_t *, uint8_t *);
-int psp_launch_start(uint32_t *);
+int psp_launch_start(uint32_t *, int);
int psp_launch_update(uint32_t, vaddr_t, size_t);
+int psp_encrypt_state(uint32_t, uint32_t, uint32_t, uint32_t);
int psp_launch_measure(uint32_t);
int psp_launch_finish(uint32_t);
int psp_activate(uint32_t, uint32_t);
@@ -600,6 +601,8 @@ int sev_init(struct vmd_vm *);
int sev_register_encryption(vaddr_t, size_t);
int sev_encrypt_memory(struct vmd_vm *);
int sev_activate(struct vmd_vm *, int);
+int sev_encrypt_state(struct vmd_vm *, int);
+int sev_launch_finalize(struct vmd_vm *);
int sev_shutdown(struct vmd_vm *);
#endif /* VMD_H */
Support for AMD SEV-ES