From: Mike Larkin Subject: Re: vmm(4): Configure SEV-ES 1/3 To: tech@openbsd.org, Hans-Joerg_Hoexer@genua.de Date: Sun, 18 May 2025 23:56:07 -0700 On Wed, Apr 30, 2025 at 04:22:03PM +0200, Hans-Jörg Höxer wrote: > Hi, > > this is diff 1/3 of a patchset. It configures SEV-ES in vmm(4). > Right now, this change may not alter current VM setups as vmd(8) can > not configure SEV-ES, yet. Thus any change would be a regression. > > See commit message below for details. > > The following diffs 2 and 3 will address guest interrupt flag state and > guest entry/exit. > > The next patchset will then deal with SEV-ES specific guest exits. > > Take care, > HJ. > This looks fine to me. ok mlarkin when you guys are ready. > ---------------------------------------------------------------------------- > commit 8a739a5c9ca492e77fd0cb65716cab53f837f2d9 > Author: Hans-Joerg Hoexer > Date: Wed Jan 22 11:43:03 2025 +0100 > > vmm(4): Configure SEV-ES > > For SEV-ES we have to adjust the guest configuration: > > - Do not intercept XSETBV; we can not force access to XCR0 as it > is part of the encrypted state. > > - We do not have direct access to EFER and CR[04], thus intercept > EFER and CR[04] "post write"; the provided exit handler will keep > track of the guest state. > > - Regarding MSRs: > - Allow access to GHCB MSR (will be used for guest-host communication) > - Allow reading XSS state (will be needed for CPUD Extended State > Enumeration ) > - Allow full read/write for EFER; SVME bit cannot not be modified > with SEV-ES; it will always be 1 > > - SEV-ES requires LBR virtualization; thus enable > > When everything is set up, the initial state is copied to the VMSA, > which is not yet encrypted. Will have to be done by vmd(8) and > psp(4). > > diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c > index 79b348ba061..3d5c58de71d 100644 > --- a/sys/arch/amd64/amd64/vmm_machdep.c > +++ b/sys/arch/amd64/amd64/vmm_machdep.c > @@ -81,6 +81,7 @@ int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *); > int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *); > int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *); > int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *); > +int vcpu_svm_init_vmsa(struct vcpu *, struct vcpu_reg_state *); > int vcpu_reload_vmcs_vmx(struct vcpu *); > int vcpu_init(struct vcpu *, struct vm_create_params *); > int vcpu_init_vmx(struct vcpu *); > @@ -96,6 +97,7 @@ int vmx_get_exit_info(uint64_t *, uint64_t *); > int vmx_load_pdptes(struct vcpu *); > int vmx_handle_exit(struct vcpu *); > int svm_handle_exit(struct vcpu *); > +int svm_handle_efercr(struct vcpu *, uint64_t); > int svm_handle_msr(struct vcpu *); > int vmm_handle_xsetbv(struct vcpu *, uint64_t *); > int vmx_handle_xsetbv(struct vcpu *); > @@ -1583,6 +1585,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) > * External NMI exiting (SVM_INTERCEPT_NMI) > * CPUID instruction (SVM_INTERCEPT_CPUID) > * HLT instruction (SVM_INTERCEPT_HLT) > + * INVLPGA instruction (SVM_INTERCEPT_INVLPGA) > * I/O instructions (SVM_INTERCEPT_INOUT) > * MSR access (SVM_INTERCEPT_MSR) > * shutdown events (SVM_INTERCEPT_SHUTDOWN) > @@ -1612,9 +1615,17 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) > SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR | > SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP; > > - if (xsave_mask) > + /* With SEV-ES we cannot force access XCR0, thus no intercept */ > + if (xsave_mask && !vcpu->vc_seves) > vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV; > > + if (vcpu->vc_seves) { > + /* With SEV-ES also intercept post EFER and CR[04] writes */ > + vmcb->v_intercept2 |= SVM_INTERCEPT_EFER_WRITE; > + vmcb->v_intercept2 |= SVM_INTERCEPT_CR0_WRITE_POST; > + vmcb->v_intercept2 |= SVM_INTERCEPT_CR4_WRITE_POST; > + } > + > /* Setup I/O bitmap */ > memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE); > vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa); > @@ -1634,8 +1645,26 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) > svm_setmsrbrw(vcpu, MSR_GSBASE); > svm_setmsrbrw(vcpu, MSR_KERNELGSBASE); > > - /* EFER is R/O so we can ensure the guest always has SVME */ > - svm_setmsrbr(vcpu, MSR_EFER); > + /* allow reading SEV status */ > + svm_setmsrbrw(vcpu, MSR_SEV_STATUS); > + > + if (vcpu->vc_seves) { > + /* Allow read/write GHCB guest physical address */ > + svm_setmsrbrw(vcpu, MSR_SEV_GHCB); > + > + /* Allow reading MSR_XSS; for CPUID Extended State Enum. */ > + svm_setmsrbr(vcpu, MSR_XSS); > + > + /* > + * With SEV-ES SVME can't be modified by the guest; > + * host can only intercept post-write (see > + * SVM_INTERCEPT_EFER_WRITE above). > + */ > + svm_setmsrbrw(vcpu, MSR_EFER); > + } else { > + /* EFER is R/O so we can ensure the guest always has SVME */ > + svm_setmsrbr(vcpu, MSR_EFER); > + } > > /* allow reading TSC */ > svm_setmsrbr(vcpu, MSR_TSC); > @@ -1667,19 +1696,78 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs) > if (vcpu->vc_sev) > vmcb->v_np_enable |= SVM_ENABLE_SEV; > > + /* SEV-ES */ > + if (vcpu->vc_seves) { > + vmcb->v_np_enable |= SVM_SEVES_ENABLE; > + vmcb->v_lbr_virt_enable |= SVM_LBRVIRT_ENABLE; > + > + /* Set VMSA. */ > + vmcb->v_vmsa_pa = vcpu->vc_svm_vmsa_pa; > + } > + > /* Enable SVME in EFER (must always be set) */ > vmcb->v_efer |= EFER_SVME; > > - ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs); > + if ((ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs)) != 0) > + return ret; > > /* xcr0 power on default sets bit 0 (x87 state) */ > vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask; > > vcpu->vc_parent->vm_map->pmap->eptp = 0; > > + ret = vcpu_svm_init_vmsa(vcpu, vrs); > + > return ret; > } > > +/* > + * vcpu_svm_init_vmsa > + * > + * Initialize VMSA with initial VCPU state. > + */ > +int > +vcpu_svm_init_vmsa(struct vcpu *vcpu, struct vcpu_reg_state *vrs) > +{ > + uint64_t *gprs = vrs->vrs_gprs; > + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; > + struct vmsa *vmsa; > + > + if (!vcpu->vc_seves) > + return 0; > + > + vmsa = (struct vmsa *)vcpu->vc_svm_vmsa_va; > + memcpy(vmsa, &vmcb->vmcb_layout, sizeof(vmcb->vmcb_layout)); > + > + vmsa->v_rax = gprs[VCPU_REGS_RAX]; > + vmsa->v_rbx = gprs[VCPU_REGS_RBX]; > + vmsa->v_rcx = gprs[VCPU_REGS_RCX]; > + vmsa->v_rdx = gprs[VCPU_REGS_RDX]; > + vmsa->v_rsp = gprs[VCPU_REGS_RSP]; > + vmsa->v_rbp = gprs[VCPU_REGS_RBP]; > + vmsa->v_rsi = gprs[VCPU_REGS_RSI]; > + vmsa->v_rdi = gprs[VCPU_REGS_RDI]; > + > + vmsa->v_r8 = gprs[VCPU_REGS_R8]; > + vmsa->v_r9 = gprs[VCPU_REGS_R9]; > + vmsa->v_r10 = gprs[VCPU_REGS_R10]; > + vmsa->v_r11 = gprs[VCPU_REGS_R11]; > + vmsa->v_r12 = gprs[VCPU_REGS_R12]; > + vmsa->v_r13 = gprs[VCPU_REGS_R13]; > + vmsa->v_r14 = gprs[VCPU_REGS_R14]; > + vmsa->v_r15 = gprs[VCPU_REGS_R15]; > + > + vmsa->v_rip = gprs[VCPU_REGS_RIP]; > + > + vmsa->v_xcr0 = vcpu->vc_gueststate.vg_xcr0; > + > + /* initialize FPU */ > + vmsa->v_x87_fcw = __INITIAL_NPXCW__; > + vmsa->v_mxcsr = __INITIAL_MXCSR__; > + > + return 0; > +} > + > /* > * svm_setmsrbr > * > @@ -2759,6 +2847,10 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp) > { > int ret = 0; > > + /* Shall we enable SEV/SEV-ES? */ > + vcpu->vc_sev = vcp->vcp_sev; > + vcpu->vc_seves = vcp->vcp_seves; > + > /* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */ > if (vmm_alloc_asid(&vcpu->vc_vpid, vcpu)) > return (ENOMEM); > @@ -2844,10 +2936,6 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp) > (uint64_t)vcpu->vc_svm_ioio_va, > (uint64_t)vcpu->vc_svm_ioio_pa); > > - /* Shall we enable SEV/SEV-ES? */ > - vcpu->vc_sev = vcp->vcp_sev; > - vcpu->vc_seves = vcp->vcp_seves; > - > if (vcpu->vc_seves) { > /* Allocate VM save area VA */ > vcpu->vc_svm_vmsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, > @@ -4221,6 +4309,12 @@ svm_handle_exit(struct vcpu *vcpu) > ret = vmm_inject_ud(vcpu); > update_rip = 0; > break; > + case SVM_VMEXIT_EFER_WRITE_TRAP: > + case SVM_VMEXIT_CR0_WRITE_TRAP: > + case SVM_VMEXIT_CR4_WRITE_TRAP: > + ret = svm_handle_efercr(vcpu, exit_reason); > + update_rip = 0; > + break; > default: > DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__, > exit_reason, (uint64_t)vcpu->vc_control_pa); > @@ -4246,6 +4340,35 @@ svm_handle_exit(struct vcpu *vcpu) > return (ret); > } > > +/* > + * svm_handle_efercr > + * > + * With SEV-ES the hypervisor can not intercept and modify writes > + * to CR and EFER. However, a post write intercept notifies about > + * the new state of these registers. > + */ > +int > +svm_handle_efercr(struct vcpu *vcpu, uint64_t exit_reason) > +{ > + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; > + > + switch (exit_reason) { > + case SVM_VMEXIT_EFER_WRITE_TRAP: > + vmcb->v_efer = vmcb->v_exitinfo1; > + break; > + case SVM_VMEXIT_CR0_WRITE_TRAP: > + vmcb->v_cr0 = vmcb->v_exitinfo1; > + break; > + case SVM_VMEXIT_CR4_WRITE_TRAP: > + vmcb->v_cr4 = vmcb->v_exitinfo1; > + break; > + default: > + return (EINVAL); > + } > + > + return (0); > +} > + > /* > * vmx_handle_exit > * > diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h > index 4208e2e13a1..1235cc7a65a 100644 > --- a/sys/arch/amd64/include/specialreg.h > +++ b/sys/arch/amd64/include/specialreg.h > @@ -723,6 +723,10 @@ > #define NB_CFG_DISIOREQLOCK 0x0000000000000004ULL > #define NB_CFG_DISDATMSK 0x0000001000000000ULL > > +#define MSR_SEV_GHCB 0xc0010130 > +#define SEV_CPUID_REQ 0x00000004 > +#define SEV_CPUID_RESP 0x00000005 > + > #define MSR_SEV_STATUS 0xc0010131 > #define SEV_STAT_ENABLED 0x00000001 > > diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h > index 21f2f7d9483..22c60907f1a 100644 > --- a/sys/arch/amd64/include/vmmvar.h > +++ b/sys/arch/amd64/include/vmmvar.h > @@ -623,8 +623,11 @@ struct vmcb_segment { > uint64_t vs_base; /* 008h */ > }; > > -#define SVM_ENABLE_NP (1ULL << 0) > -#define SVM_ENABLE_SEV (1ULL << 1) > +#define SVM_ENABLE_NP (1ULL << 0) > +#define SVM_ENABLE_SEV (1ULL << 1) > +#define SVM_SEVES_ENABLE (1ULL << 2) > + > +#define SVM_LBRVIRT_ENABLE (1ULL << 0) > > struct vmcb { > union {