Index | Thread | Search

From:
Hans-Jörg Höxer <hshoexer@genua.de>
Subject:
vmm(4): Configure SEV-ES 1/3
To:
<tech@openbsd.org>
Cc:
<Hans-Joerg_Hoexer@genua.de>
Date:
Wed, 30 Apr 2025 16:22:03 +0200

Download raw body.

Thread
Hi,

this is diff 1/3 of a patchset.  It configures SEV-ES in vmm(4).
Right now, this change may not alter current VM setups as vmd(8) can
not configure SEV-ES, yet.  Thus any change would be a regression.

See commit message below for details.

The following diffs 2 and 3 will address guest interrupt flag state and
guest entry/exit.

The next patchset will then deal with SEV-ES specific guest exits.

Take care,
HJ.

----------------------------------------------------------------------------
commit 8a739a5c9ca492e77fd0cb65716cab53f837f2d9
Author: Hans-Joerg Hoexer <hshoexer@genua.de>
Date:   Wed Jan 22 11:43:03 2025 +0100

    vmm(4): Configure SEV-ES
    
    For SEV-ES we have to adjust the guest configuration:
    
    - Do not intercept XSETBV; we can not force access to XCR0 as it
      is part of the encrypted state.
    
    - We do not have direct access to EFER and CR[04], thus intercept
      EFER and CR[04] "post write"; the provided exit handler will keep
      track of the guest state.
    
    - Regarding MSRs:
      - Allow access to GHCB MSR (will be used for guest-host communication)
      - Allow reading XSS state (will be needed for CPUD Extended State
        Enumeration )
      - Allow full read/write for EFER; SVME bit cannot not be modified
        with SEV-ES; it will always be 1
    
    - SEV-ES requires LBR virtualization; thus enable
    
    When everything is set up, the initial state is copied to the VMSA,
    which is not yet encrypted.  Will have to be done by vmd(8) and
    psp(4).

diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c
index 79b348ba061..3d5c58de71d 100644
--- a/sys/arch/amd64/amd64/vmm_machdep.c
+++ b/sys/arch/amd64/amd64/vmm_machdep.c
@@ -81,6 +81,7 @@ int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
 int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
 int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
 int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
+int vcpu_svm_init_vmsa(struct vcpu *, struct vcpu_reg_state *);
 int vcpu_reload_vmcs_vmx(struct vcpu *);
 int vcpu_init(struct vcpu *, struct vm_create_params *);
 int vcpu_init_vmx(struct vcpu *);
@@ -96,6 +97,7 @@ int vmx_get_exit_info(uint64_t *, uint64_t *);
 int vmx_load_pdptes(struct vcpu *);
 int vmx_handle_exit(struct vcpu *);
 int svm_handle_exit(struct vcpu *);
+int svm_handle_efercr(struct vcpu *, uint64_t);
 int svm_handle_msr(struct vcpu *);
 int vmm_handle_xsetbv(struct vcpu *, uint64_t *);
 int vmx_handle_xsetbv(struct vcpu *);
@@ -1583,6 +1585,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
 	 * External NMI exiting (SVM_INTERCEPT_NMI)
 	 * CPUID instruction (SVM_INTERCEPT_CPUID)
 	 * HLT instruction (SVM_INTERCEPT_HLT)
+	 * INVLPGA instruction (SVM_INTERCEPT_INVLPGA)
 	 * I/O instructions (SVM_INTERCEPT_INOUT)
 	 * MSR access (SVM_INTERCEPT_MSR)
 	 * shutdown events (SVM_INTERCEPT_SHUTDOWN)
@@ -1612,9 +1615,17 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
 	    SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR |
 	    SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP;
 
-	if (xsave_mask)
+	/* With SEV-ES we cannot force access XCR0, thus no intercept */
+	if (xsave_mask && !vcpu->vc_seves)
 		vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV;
 
+	if (vcpu->vc_seves) {
+		/* With SEV-ES also intercept post EFER and CR[04] writes */
+		vmcb->v_intercept2 |= SVM_INTERCEPT_EFER_WRITE;
+		vmcb->v_intercept2 |= SVM_INTERCEPT_CR0_WRITE_POST;
+		vmcb->v_intercept2 |= SVM_INTERCEPT_CR4_WRITE_POST;
+	}
+
 	/* Setup I/O bitmap */
 	memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE);
 	vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa);
@@ -1634,8 +1645,26 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
 	svm_setmsrbrw(vcpu, MSR_GSBASE);
 	svm_setmsrbrw(vcpu, MSR_KERNELGSBASE);
 
-	/* EFER is R/O so we can ensure the guest always has SVME */
-	svm_setmsrbr(vcpu, MSR_EFER);
+	/* allow reading SEV status */
+	svm_setmsrbrw(vcpu, MSR_SEV_STATUS);
+
+	if (vcpu->vc_seves) {
+		/* Allow read/write GHCB guest physical address */
+		svm_setmsrbrw(vcpu, MSR_SEV_GHCB);
+
+		/* Allow reading MSR_XSS; for CPUID Extended State Enum. */
+		svm_setmsrbr(vcpu, MSR_XSS);
+
+		/*
+		 * With SEV-ES SVME can't be modified by the guest;
+		 * host can only intercept post-write (see
+		 * SVM_INTERCEPT_EFER_WRITE above).
+		 */
+		svm_setmsrbrw(vcpu, MSR_EFER);
+	} else {
+		/* EFER is R/O so we can ensure the guest always has SVME */
+		svm_setmsrbr(vcpu, MSR_EFER);
+	}
 
 	/* allow reading TSC */
 	svm_setmsrbr(vcpu, MSR_TSC);
@@ -1667,19 +1696,78 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
 	if (vcpu->vc_sev)
 		vmcb->v_np_enable |= SVM_ENABLE_SEV;
 
+	/* SEV-ES */
+	if (vcpu->vc_seves) {
+		vmcb->v_np_enable |= SVM_SEVES_ENABLE;
+		vmcb->v_lbr_virt_enable |= SVM_LBRVIRT_ENABLE;
+
+		/* Set VMSA. */
+		vmcb->v_vmsa_pa = vcpu->vc_svm_vmsa_pa;
+	}
+
 	/* Enable SVME in EFER (must always be set) */
 	vmcb->v_efer |= EFER_SVME;
 
-	ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs);
+	if ((ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs)) != 0)
+		return ret;
 
 	/* xcr0 power on default sets bit 0 (x87 state) */
 	vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
 
 	vcpu->vc_parent->vm_map->pmap->eptp = 0;
 
+	ret = vcpu_svm_init_vmsa(vcpu, vrs);
+
 	return ret;
 }
 
+/*
+ * vcpu_svm_init_vmsa
+ *
+ * Initialize VMSA with initial VCPU state.
+ */
+int
+vcpu_svm_init_vmsa(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
+{
+	uint64_t	*gprs = vrs->vrs_gprs;
+	struct vmcb	*vmcb = (struct vmcb *)vcpu->vc_control_va;
+	struct vmsa	*vmsa;
+
+	if (!vcpu->vc_seves)
+		return 0;
+
+	vmsa = (struct vmsa *)vcpu->vc_svm_vmsa_va;
+	memcpy(vmsa, &vmcb->vmcb_layout, sizeof(vmcb->vmcb_layout));
+
+	vmsa->v_rax = gprs[VCPU_REGS_RAX];
+	vmsa->v_rbx = gprs[VCPU_REGS_RBX];
+	vmsa->v_rcx = gprs[VCPU_REGS_RCX];
+	vmsa->v_rdx = gprs[VCPU_REGS_RDX];
+	vmsa->v_rsp = gprs[VCPU_REGS_RSP];
+	vmsa->v_rbp = gprs[VCPU_REGS_RBP];
+	vmsa->v_rsi = gprs[VCPU_REGS_RSI];
+	vmsa->v_rdi = gprs[VCPU_REGS_RDI];
+
+	vmsa->v_r8 = gprs[VCPU_REGS_R8];
+	vmsa->v_r9 = gprs[VCPU_REGS_R9];
+	vmsa->v_r10 = gprs[VCPU_REGS_R10];
+	vmsa->v_r11 = gprs[VCPU_REGS_R11];
+	vmsa->v_r12 = gprs[VCPU_REGS_R12];
+	vmsa->v_r13 = gprs[VCPU_REGS_R13];
+	vmsa->v_r14 = gprs[VCPU_REGS_R14];
+	vmsa->v_r15 = gprs[VCPU_REGS_R15];
+
+	vmsa->v_rip = gprs[VCPU_REGS_RIP];
+
+	vmsa->v_xcr0 = vcpu->vc_gueststate.vg_xcr0;
+
+	/* initialize FPU */
+	vmsa->v_x87_fcw = __INITIAL_NPXCW__;
+	vmsa->v_mxcsr = __INITIAL_MXCSR__;
+
+	return 0;
+}
+
 /*
  * svm_setmsrbr
  *
@@ -2759,6 +2847,10 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
 {
 	int ret = 0;
 
+	/* Shall we enable SEV/SEV-ES? */
+	vcpu->vc_sev = vcp->vcp_sev;
+	vcpu->vc_seves = vcp->vcp_seves;
+
 	/* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */
 	if (vmm_alloc_asid(&vcpu->vc_vpid, vcpu))
 		return (ENOMEM);
@@ -2844,10 +2936,6 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
 	    (uint64_t)vcpu->vc_svm_ioio_va,
 	    (uint64_t)vcpu->vc_svm_ioio_pa);
 
-	/* Shall we enable SEV/SEV-ES? */
-	vcpu->vc_sev = vcp->vcp_sev;
-	vcpu->vc_seves = vcp->vcp_seves;
-
 	if (vcpu->vc_seves) {
 		/* Allocate VM save area VA */
 		vcpu->vc_svm_vmsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
@@ -4221,6 +4309,12 @@ svm_handle_exit(struct vcpu *vcpu)
 		ret = vmm_inject_ud(vcpu);
 		update_rip = 0;
 		break;
+	case SVM_VMEXIT_EFER_WRITE_TRAP:
+	case SVM_VMEXIT_CR0_WRITE_TRAP:
+	case SVM_VMEXIT_CR4_WRITE_TRAP:
+		ret = svm_handle_efercr(vcpu, exit_reason);
+		update_rip = 0;
+		break;
 	default:
 		DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
 		    exit_reason, (uint64_t)vcpu->vc_control_pa);
@@ -4246,6 +4340,35 @@ svm_handle_exit(struct vcpu *vcpu)
 	return (ret);
 }
 
+/*
+ * svm_handle_efercr
+ *
+ * With SEV-ES the hypervisor can not intercept and modify writes
+ * to CR and EFER.  However, a post write intercept notifies about
+ * the new state of these registers.
+ */
+int
+svm_handle_efercr(struct vcpu *vcpu, uint64_t exit_reason)
+{
+	struct vmcb	*vmcb = (struct vmcb *)vcpu->vc_control_va;
+
+	switch (exit_reason) {
+	case SVM_VMEXIT_EFER_WRITE_TRAP:
+		vmcb->v_efer = vmcb->v_exitinfo1;
+		break;
+	case SVM_VMEXIT_CR0_WRITE_TRAP:
+		vmcb->v_cr0 = vmcb->v_exitinfo1;
+		break;
+	case SVM_VMEXIT_CR4_WRITE_TRAP:
+		vmcb->v_cr4 = vmcb->v_exitinfo1;
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	return (0);
+}
+
 /*
  * vmx_handle_exit
  *
diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h
index 4208e2e13a1..1235cc7a65a 100644
--- a/sys/arch/amd64/include/specialreg.h
+++ b/sys/arch/amd64/include/specialreg.h
@@ -723,6 +723,10 @@
 #define		NB_CFG_DISIOREQLOCK	0x0000000000000004ULL
 #define		NB_CFG_DISDATMSK	0x0000001000000000ULL
 
+#define MSR_SEV_GHCB	0xc0010130
+#define		SEV_CPUID_REQ		0x00000004
+#define		SEV_CPUID_RESP		0x00000005
+
 #define MSR_SEV_STATUS	0xc0010131
 #define		SEV_STAT_ENABLED	0x00000001
 
diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
index 21f2f7d9483..22c60907f1a 100644
--- a/sys/arch/amd64/include/vmmvar.h
+++ b/sys/arch/amd64/include/vmmvar.h
@@ -623,8 +623,11 @@ struct vmcb_segment {
 	uint64_t			vs_base;		/* 008h */
 };
 
-#define SVM_ENABLE_NP	(1ULL << 0)
-#define SVM_ENABLE_SEV	(1ULL << 1)
+#define SVM_ENABLE_NP		(1ULL << 0)
+#define SVM_ENABLE_SEV		(1ULL << 1)
+#define SVM_SEVES_ENABLE	(1ULL << 2)
+
+#define SVM_LBRVIRT_ENABLE	(1ULL << 0)
 
 struct vmcb {
 	union {