Download raw body.
vmm(4): Configure SEV-ES 1/3
Hi,
this is diff 1/3 of a patchset. It configures SEV-ES in vmm(4).
Right now, this change may not alter current VM setups as vmd(8) can
not configure SEV-ES, yet. Thus any change would be a regression.
See commit message below for details.
The following diffs 2 and 3 will address guest interrupt flag state and
guest entry/exit.
The next patchset will then deal with SEV-ES specific guest exits.
Take care,
HJ.
----------------------------------------------------------------------------
commit 8a739a5c9ca492e77fd0cb65716cab53f837f2d9
Author: Hans-Joerg Hoexer <hshoexer@genua.de>
Date: Wed Jan 22 11:43:03 2025 +0100
vmm(4): Configure SEV-ES
For SEV-ES we have to adjust the guest configuration:
- Do not intercept XSETBV; we can not force access to XCR0 as it
is part of the encrypted state.
- We do not have direct access to EFER and CR[04], thus intercept
EFER and CR[04] "post write"; the provided exit handler will keep
track of the guest state.
- Regarding MSRs:
- Allow access to GHCB MSR (will be used for guest-host communication)
- Allow reading XSS state (will be needed for CPUD Extended State
Enumeration )
- Allow full read/write for EFER; SVME bit cannot not be modified
with SEV-ES; it will always be 1
- SEV-ES requires LBR virtualization; thus enable
When everything is set up, the initial state is copied to the VMSA,
which is not yet encrypted. Will have to be done by vmd(8) and
psp(4).
diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c
index 79b348ba061..3d5c58de71d 100644
--- a/sys/arch/amd64/amd64/vmm_machdep.c
+++ b/sys/arch/amd64/amd64/vmm_machdep.c
@@ -81,6 +81,7 @@ int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
+int vcpu_svm_init_vmsa(struct vcpu *, struct vcpu_reg_state *);
int vcpu_reload_vmcs_vmx(struct vcpu *);
int vcpu_init(struct vcpu *, struct vm_create_params *);
int vcpu_init_vmx(struct vcpu *);
@@ -96,6 +97,7 @@ int vmx_get_exit_info(uint64_t *, uint64_t *);
int vmx_load_pdptes(struct vcpu *);
int vmx_handle_exit(struct vcpu *);
int svm_handle_exit(struct vcpu *);
+int svm_handle_efercr(struct vcpu *, uint64_t);
int svm_handle_msr(struct vcpu *);
int vmm_handle_xsetbv(struct vcpu *, uint64_t *);
int vmx_handle_xsetbv(struct vcpu *);
@@ -1583,6 +1585,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
* External NMI exiting (SVM_INTERCEPT_NMI)
* CPUID instruction (SVM_INTERCEPT_CPUID)
* HLT instruction (SVM_INTERCEPT_HLT)
+ * INVLPGA instruction (SVM_INTERCEPT_INVLPGA)
* I/O instructions (SVM_INTERCEPT_INOUT)
* MSR access (SVM_INTERCEPT_MSR)
* shutdown events (SVM_INTERCEPT_SHUTDOWN)
@@ -1612,9 +1615,17 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR |
SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP;
- if (xsave_mask)
+ /* With SEV-ES we cannot force access XCR0, thus no intercept */
+ if (xsave_mask && !vcpu->vc_seves)
vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV;
+ if (vcpu->vc_seves) {
+ /* With SEV-ES also intercept post EFER and CR[04] writes */
+ vmcb->v_intercept2 |= SVM_INTERCEPT_EFER_WRITE;
+ vmcb->v_intercept2 |= SVM_INTERCEPT_CR0_WRITE_POST;
+ vmcb->v_intercept2 |= SVM_INTERCEPT_CR4_WRITE_POST;
+ }
+
/* Setup I/O bitmap */
memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE);
vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa);
@@ -1634,8 +1645,26 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
svm_setmsrbrw(vcpu, MSR_GSBASE);
svm_setmsrbrw(vcpu, MSR_KERNELGSBASE);
- /* EFER is R/O so we can ensure the guest always has SVME */
- svm_setmsrbr(vcpu, MSR_EFER);
+ /* allow reading SEV status */
+ svm_setmsrbrw(vcpu, MSR_SEV_STATUS);
+
+ if (vcpu->vc_seves) {
+ /* Allow read/write GHCB guest physical address */
+ svm_setmsrbrw(vcpu, MSR_SEV_GHCB);
+
+ /* Allow reading MSR_XSS; for CPUID Extended State Enum. */
+ svm_setmsrbr(vcpu, MSR_XSS);
+
+ /*
+ * With SEV-ES SVME can't be modified by the guest;
+ * host can only intercept post-write (see
+ * SVM_INTERCEPT_EFER_WRITE above).
+ */
+ svm_setmsrbrw(vcpu, MSR_EFER);
+ } else {
+ /* EFER is R/O so we can ensure the guest always has SVME */
+ svm_setmsrbr(vcpu, MSR_EFER);
+ }
/* allow reading TSC */
svm_setmsrbr(vcpu, MSR_TSC);
@@ -1667,19 +1696,78 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
if (vcpu->vc_sev)
vmcb->v_np_enable |= SVM_ENABLE_SEV;
+ /* SEV-ES */
+ if (vcpu->vc_seves) {
+ vmcb->v_np_enable |= SVM_SEVES_ENABLE;
+ vmcb->v_lbr_virt_enable |= SVM_LBRVIRT_ENABLE;
+
+ /* Set VMSA. */
+ vmcb->v_vmsa_pa = vcpu->vc_svm_vmsa_pa;
+ }
+
/* Enable SVME in EFER (must always be set) */
vmcb->v_efer |= EFER_SVME;
- ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs);
+ if ((ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs)) != 0)
+ return ret;
/* xcr0 power on default sets bit 0 (x87 state) */
vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
vcpu->vc_parent->vm_map->pmap->eptp = 0;
+ ret = vcpu_svm_init_vmsa(vcpu, vrs);
+
return ret;
}
+/*
+ * vcpu_svm_init_vmsa
+ *
+ * Initialize VMSA with initial VCPU state.
+ */
+int
+vcpu_svm_init_vmsa(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
+{
+ uint64_t *gprs = vrs->vrs_gprs;
+ struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
+ struct vmsa *vmsa;
+
+ if (!vcpu->vc_seves)
+ return 0;
+
+ vmsa = (struct vmsa *)vcpu->vc_svm_vmsa_va;
+ memcpy(vmsa, &vmcb->vmcb_layout, sizeof(vmcb->vmcb_layout));
+
+ vmsa->v_rax = gprs[VCPU_REGS_RAX];
+ vmsa->v_rbx = gprs[VCPU_REGS_RBX];
+ vmsa->v_rcx = gprs[VCPU_REGS_RCX];
+ vmsa->v_rdx = gprs[VCPU_REGS_RDX];
+ vmsa->v_rsp = gprs[VCPU_REGS_RSP];
+ vmsa->v_rbp = gprs[VCPU_REGS_RBP];
+ vmsa->v_rsi = gprs[VCPU_REGS_RSI];
+ vmsa->v_rdi = gprs[VCPU_REGS_RDI];
+
+ vmsa->v_r8 = gprs[VCPU_REGS_R8];
+ vmsa->v_r9 = gprs[VCPU_REGS_R9];
+ vmsa->v_r10 = gprs[VCPU_REGS_R10];
+ vmsa->v_r11 = gprs[VCPU_REGS_R11];
+ vmsa->v_r12 = gprs[VCPU_REGS_R12];
+ vmsa->v_r13 = gprs[VCPU_REGS_R13];
+ vmsa->v_r14 = gprs[VCPU_REGS_R14];
+ vmsa->v_r15 = gprs[VCPU_REGS_R15];
+
+ vmsa->v_rip = gprs[VCPU_REGS_RIP];
+
+ vmsa->v_xcr0 = vcpu->vc_gueststate.vg_xcr0;
+
+ /* initialize FPU */
+ vmsa->v_x87_fcw = __INITIAL_NPXCW__;
+ vmsa->v_mxcsr = __INITIAL_MXCSR__;
+
+ return 0;
+}
+
/*
* svm_setmsrbr
*
@@ -2759,6 +2847,10 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
{
int ret = 0;
+ /* Shall we enable SEV/SEV-ES? */
+ vcpu->vc_sev = vcp->vcp_sev;
+ vcpu->vc_seves = vcp->vcp_seves;
+
/* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */
if (vmm_alloc_asid(&vcpu->vc_vpid, vcpu))
return (ENOMEM);
@@ -2844,10 +2936,6 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
(uint64_t)vcpu->vc_svm_ioio_va,
(uint64_t)vcpu->vc_svm_ioio_pa);
- /* Shall we enable SEV/SEV-ES? */
- vcpu->vc_sev = vcp->vcp_sev;
- vcpu->vc_seves = vcp->vcp_seves;
-
if (vcpu->vc_seves) {
/* Allocate VM save area VA */
vcpu->vc_svm_vmsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
@@ -4221,6 +4309,12 @@ svm_handle_exit(struct vcpu *vcpu)
ret = vmm_inject_ud(vcpu);
update_rip = 0;
break;
+ case SVM_VMEXIT_EFER_WRITE_TRAP:
+ case SVM_VMEXIT_CR0_WRITE_TRAP:
+ case SVM_VMEXIT_CR4_WRITE_TRAP:
+ ret = svm_handle_efercr(vcpu, exit_reason);
+ update_rip = 0;
+ break;
default:
DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
exit_reason, (uint64_t)vcpu->vc_control_pa);
@@ -4246,6 +4340,35 @@ svm_handle_exit(struct vcpu *vcpu)
return (ret);
}
+/*
+ * svm_handle_efercr
+ *
+ * With SEV-ES the hypervisor can not intercept and modify writes
+ * to CR and EFER. However, a post write intercept notifies about
+ * the new state of these registers.
+ */
+int
+svm_handle_efercr(struct vcpu *vcpu, uint64_t exit_reason)
+{
+ struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
+
+ switch (exit_reason) {
+ case SVM_VMEXIT_EFER_WRITE_TRAP:
+ vmcb->v_efer = vmcb->v_exitinfo1;
+ break;
+ case SVM_VMEXIT_CR0_WRITE_TRAP:
+ vmcb->v_cr0 = vmcb->v_exitinfo1;
+ break;
+ case SVM_VMEXIT_CR4_WRITE_TRAP:
+ vmcb->v_cr4 = vmcb->v_exitinfo1;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
/*
* vmx_handle_exit
*
diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h
index 4208e2e13a1..1235cc7a65a 100644
--- a/sys/arch/amd64/include/specialreg.h
+++ b/sys/arch/amd64/include/specialreg.h
@@ -723,6 +723,10 @@
#define NB_CFG_DISIOREQLOCK 0x0000000000000004ULL
#define NB_CFG_DISDATMSK 0x0000001000000000ULL
+#define MSR_SEV_GHCB 0xc0010130
+#define SEV_CPUID_REQ 0x00000004
+#define SEV_CPUID_RESP 0x00000005
+
#define MSR_SEV_STATUS 0xc0010131
#define SEV_STAT_ENABLED 0x00000001
diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
index 21f2f7d9483..22c60907f1a 100644
--- a/sys/arch/amd64/include/vmmvar.h
+++ b/sys/arch/amd64/include/vmmvar.h
@@ -623,8 +623,11 @@ struct vmcb_segment {
uint64_t vs_base; /* 008h */
};
-#define SVM_ENABLE_NP (1ULL << 0)
-#define SVM_ENABLE_SEV (1ULL << 1)
+#define SVM_ENABLE_NP (1ULL << 0)
+#define SVM_ENABLE_SEV (1ULL << 1)
+#define SVM_SEVES_ENABLE (1ULL << 2)
+
+#define SVM_LBRVIRT_ENABLE (1ULL << 0)
struct vmcb {
union {
vmm(4): Configure SEV-ES 1/3