From: Hans-Jörg Höxer Subject: vmm(4): Configure SEV-ES 1/3 To: Cc: Date: Wed, 30 Apr 2025 16:22:03 +0200 Hi, this is diff 1/3 of a patchset. It configures SEV-ES in vmm(4). Right now, this change may not alter current VM setups as vmd(8) can not configure SEV-ES, yet. Thus any change would be a regression. See commit message below for details. The following diffs 2 and 3 will address guest interrupt flag state and guest entry/exit. The next patchset will then deal with SEV-ES specific guest exits. Take care, HJ. ---------------------------------------------------------------------------- commit 8a739a5c9ca492e77fd0cb65716cab53f837f2d9 Author: Hans-Joerg Hoexer Date: Wed Jan 22 11:43:03 2025 +0100 vmm(4): Configure SEV-ES For SEV-ES we have to adjust the guest configuration: - Do not intercept XSETBV; we can not force access to XCR0 as it is part of the encrypted state. - We do not have direct access to EFER and CR[04], thus intercept EFER and CR[04] "post write"; the provided exit handler will keep track of the guest state. - Regarding MSRs: - Allow access to GHCB MSR (will be used for guest-host communication) - Allow reading XSS state (will be needed for CPUD Extended State Enumeration ) - Allow full read/write for EFER; SVME bit cannot not be modified with SEV-ES; it will always be 1 - SEV-ES requires LBR virtualization; thus enable When everything is set up, the initial state is copied to the VMSA, which is not yet encrypted. Will have to be done by vmd(8) and psp(4). diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c index 79b348ba061..3d5c58de71d 100644 --- a/sys/arch/amd64/amd64/vmm_machdep.c +++ b/sys/arch/amd64/amd64/vmm_machdep.c @@ -81,6 +81,7 @@ int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *); int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *); int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *); int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *); +int vcpu_svm_init_vmsa(struct vcpu *, struct vcpu_reg_state *); int vcpu_reload_vmcs_vmx(struct vcpu *); int vcpu_init(struct vcpu *, struct vm_create_params *); int vcpu_init_vmx(struct vcpu *); @@ -96,6 +97,7 @@ int vmx_get_exit_info(uint64_t *, uint64_t *); int vmx_load_pdptes(struct vcpu *); int vmx_handle_exit(struct vcpu *); int svm_handle_exit(struct vcpu *); +int svm_handle_efercr(struct vcpu *, uint64_t); int svm_handle_msr(struct vcpu *); int vmm_handle_xsetbv(struct vcpu *, uint64_t *); int vmx_handle_xsetbv(struct vcpu *); @@ -1583,6 +1585,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) * External NMI exiting (SVM_INTERCEPT_NMI) * CPUID instruction (SVM_INTERCEPT_CPUID) * HLT instruction (SVM_INTERCEPT_HLT) + * INVLPGA instruction (SVM_INTERCEPT_INVLPGA) * I/O instructions (SVM_INTERCEPT_INOUT) * MSR access (SVM_INTERCEPT_MSR) * shutdown events (SVM_INTERCEPT_SHUTDOWN) @@ -1612,9 +1615,17 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR | SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP; - if (xsave_mask) + /* With SEV-ES we cannot force access XCR0, thus no intercept */ + if (xsave_mask && !vcpu->vc_seves) vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV; + if (vcpu->vc_seves) { + /* With SEV-ES also intercept post EFER and CR[04] writes */ + vmcb->v_intercept2 |= SVM_INTERCEPT_EFER_WRITE; + vmcb->v_intercept2 |= SVM_INTERCEPT_CR0_WRITE_POST; + vmcb->v_intercept2 |= SVM_INTERCEPT_CR4_WRITE_POST; + } + /* Setup I/O bitmap */ memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE); vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa); @@ -1634,8 +1645,26 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) svm_setmsrbrw(vcpu, MSR_GSBASE); svm_setmsrbrw(vcpu, MSR_KERNELGSBASE); - /* EFER is R/O so we can ensure the guest always has SVME */ - svm_setmsrbr(vcpu, MSR_EFER); + /* allow reading SEV status */ + svm_setmsrbrw(vcpu, MSR_SEV_STATUS); + + if (vcpu->vc_seves) { + /* Allow read/write GHCB guest physical address */ + svm_setmsrbrw(vcpu, MSR_SEV_GHCB); + + /* Allow reading MSR_XSS; for CPUID Extended State Enum. */ + svm_setmsrbr(vcpu, MSR_XSS); + + /* + * With SEV-ES SVME can't be modified by the guest; + * host can only intercept post-write (see + * SVM_INTERCEPT_EFER_WRITE above). + */ + svm_setmsrbrw(vcpu, MSR_EFER); + } else { + /* EFER is R/O so we can ensure the guest always has SVME */ + svm_setmsrbr(vcpu, MSR_EFER); + } /* allow reading TSC */ svm_setmsrbr(vcpu, MSR_TSC); @@ -1667,19 +1696,78 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) if (vcpu->vc_sev) vmcb->v_np_enable |= SVM_ENABLE_SEV; + /* SEV-ES */ + if (vcpu->vc_seves) { + vmcb->v_np_enable |= SVM_SEVES_ENABLE; + vmcb->v_lbr_virt_enable |= SVM_LBRVIRT_ENABLE; + + /* Set VMSA. */ + vmcb->v_vmsa_pa = vcpu->vc_svm_vmsa_pa; + } + /* Enable SVME in EFER (must always be set) */ vmcb->v_efer |= EFER_SVME; - ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs); + if ((ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs)) != 0) + return ret; /* xcr0 power on default sets bit 0 (x87 state) */ vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask; vcpu->vc_parent->vm_map->pmap->eptp = 0; + ret = vcpu_svm_init_vmsa(vcpu, vrs); + return ret; } +/* + * vcpu_svm_init_vmsa + * + * Initialize VMSA with initial VCPU state. + */ +int +vcpu_svm_init_vmsa(struct vcpu *vcpu, struct vcpu_reg_state *vrs) +{ + uint64_t *gprs = vrs->vrs_gprs; + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; + struct vmsa *vmsa; + + if (!vcpu->vc_seves) + return 0; + + vmsa = (struct vmsa *)vcpu->vc_svm_vmsa_va; + memcpy(vmsa, &vmcb->vmcb_layout, sizeof(vmcb->vmcb_layout)); + + vmsa->v_rax = gprs[VCPU_REGS_RAX]; + vmsa->v_rbx = gprs[VCPU_REGS_RBX]; + vmsa->v_rcx = gprs[VCPU_REGS_RCX]; + vmsa->v_rdx = gprs[VCPU_REGS_RDX]; + vmsa->v_rsp = gprs[VCPU_REGS_RSP]; + vmsa->v_rbp = gprs[VCPU_REGS_RBP]; + vmsa->v_rsi = gprs[VCPU_REGS_RSI]; + vmsa->v_rdi = gprs[VCPU_REGS_RDI]; + + vmsa->v_r8 = gprs[VCPU_REGS_R8]; + vmsa->v_r9 = gprs[VCPU_REGS_R9]; + vmsa->v_r10 = gprs[VCPU_REGS_R10]; + vmsa->v_r11 = gprs[VCPU_REGS_R11]; + vmsa->v_r12 = gprs[VCPU_REGS_R12]; + vmsa->v_r13 = gprs[VCPU_REGS_R13]; + vmsa->v_r14 = gprs[VCPU_REGS_R14]; + vmsa->v_r15 = gprs[VCPU_REGS_R15]; + + vmsa->v_rip = gprs[VCPU_REGS_RIP]; + + vmsa->v_xcr0 = vcpu->vc_gueststate.vg_xcr0; + + /* initialize FPU */ + vmsa->v_x87_fcw = __INITIAL_NPXCW__; + vmsa->v_mxcsr = __INITIAL_MXCSR__; + + return 0; +} + /* * svm_setmsrbr * @@ -2759,6 +2847,10 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp) { int ret = 0; + /* Shall we enable SEV/SEV-ES? */ + vcpu->vc_sev = vcp->vcp_sev; + vcpu->vc_seves = vcp->vcp_seves; + /* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */ if (vmm_alloc_asid(&vcpu->vc_vpid, vcpu)) return (ENOMEM); @@ -2844,10 +2936,6 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp) (uint64_t)vcpu->vc_svm_ioio_va, (uint64_t)vcpu->vc_svm_ioio_pa); - /* Shall we enable SEV/SEV-ES? */ - vcpu->vc_sev = vcp->vcp_sev; - vcpu->vc_seves = vcp->vcp_seves; - if (vcpu->vc_seves) { /* Allocate VM save area VA */ vcpu->vc_svm_vmsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, @@ -4221,6 +4309,12 @@ svm_handle_exit(struct vcpu *vcpu) ret = vmm_inject_ud(vcpu); update_rip = 0; break; + case SVM_VMEXIT_EFER_WRITE_TRAP: + case SVM_VMEXIT_CR0_WRITE_TRAP: + case SVM_VMEXIT_CR4_WRITE_TRAP: + ret = svm_handle_efercr(vcpu, exit_reason); + update_rip = 0; + break; default: DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__, exit_reason, (uint64_t)vcpu->vc_control_pa); @@ -4246,6 +4340,35 @@ svm_handle_exit(struct vcpu *vcpu) return (ret); } +/* + * svm_handle_efercr + * + * With SEV-ES the hypervisor can not intercept and modify writes + * to CR and EFER. However, a post write intercept notifies about + * the new state of these registers. + */ +int +svm_handle_efercr(struct vcpu *vcpu, uint64_t exit_reason) +{ + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; + + switch (exit_reason) { + case SVM_VMEXIT_EFER_WRITE_TRAP: + vmcb->v_efer = vmcb->v_exitinfo1; + break; + case SVM_VMEXIT_CR0_WRITE_TRAP: + vmcb->v_cr0 = vmcb->v_exitinfo1; + break; + case SVM_VMEXIT_CR4_WRITE_TRAP: + vmcb->v_cr4 = vmcb->v_exitinfo1; + break; + default: + return (EINVAL); + } + + return (0); +} + /* * vmx_handle_exit * diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h index 4208e2e13a1..1235cc7a65a 100644 --- a/sys/arch/amd64/include/specialreg.h +++ b/sys/arch/amd64/include/specialreg.h @@ -723,6 +723,10 @@ #define NB_CFG_DISIOREQLOCK 0x0000000000000004ULL #define NB_CFG_DISDATMSK 0x0000001000000000ULL +#define MSR_SEV_GHCB 0xc0010130 +#define SEV_CPUID_REQ 0x00000004 +#define SEV_CPUID_RESP 0x00000005 + #define MSR_SEV_STATUS 0xc0010131 #define SEV_STAT_ENABLED 0x00000001 diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h index 21f2f7d9483..22c60907f1a 100644 --- a/sys/arch/amd64/include/vmmvar.h +++ b/sys/arch/amd64/include/vmmvar.h @@ -623,8 +623,11 @@ struct vmcb_segment { uint64_t vs_base; /* 008h */ }; -#define SVM_ENABLE_NP (1ULL << 0) -#define SVM_ENABLE_SEV (1ULL << 1) +#define SVM_ENABLE_NP (1ULL << 0) +#define SVM_ENABLE_SEV (1ULL << 1) +#define SVM_SEVES_ENABLE (1ULL << 2) + +#define SVM_LBRVIRT_ENABLE (1ULL << 0) struct vmcb { union {