Download raw body.
vmm(4): Configure SEV-ES 1/3
On Wed, Apr 30, 2025 at 04:22:03PM +0200, Hans-Jörg Höxer wrote:
> Hi,
>
> this is diff 1/3 of a patchset. It configures SEV-ES in vmm(4).
> Right now, this change may not alter current VM setups as vmd(8) can
> not configure SEV-ES, yet. Thus any change would be a regression.
>
> See commit message below for details.
>
> The following diffs 2 and 3 will address guest interrupt flag state and
> guest entry/exit.
>
> The next patchset will then deal with SEV-ES specific guest exits.
>
> Take care,
> HJ.
>
This looks fine to me. ok mlarkin when you guys are ready.
> ----------------------------------------------------------------------------
> commit 8a739a5c9ca492e77fd0cb65716cab53f837f2d9
> Author: Hans-Joerg Hoexer <hshoexer@genua.de>
> Date: Wed Jan 22 11:43:03 2025 +0100
>
> vmm(4): Configure SEV-ES
>
> For SEV-ES we have to adjust the guest configuration:
>
> - Do not intercept XSETBV; we can not force access to XCR0 as it
> is part of the encrypted state.
>
> - We do not have direct access to EFER and CR[04], thus intercept
> EFER and CR[04] "post write"; the provided exit handler will keep
> track of the guest state.
>
> - Regarding MSRs:
> - Allow access to GHCB MSR (will be used for guest-host communication)
> - Allow reading XSS state (will be needed for CPUD Extended State
> Enumeration )
> - Allow full read/write for EFER; SVME bit cannot not be modified
> with SEV-ES; it will always be 1
>
> - SEV-ES requires LBR virtualization; thus enable
>
> When everything is set up, the initial state is copied to the VMSA,
> which is not yet encrypted. Will have to be done by vmd(8) and
> psp(4).
>
> diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c
> index 79b348ba061..3d5c58de71d 100644
> --- a/sys/arch/amd64/amd64/vmm_machdep.c
> +++ b/sys/arch/amd64/amd64/vmm_machdep.c
> @@ -81,6 +81,7 @@ int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
> int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
> int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
> int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
> +int vcpu_svm_init_vmsa(struct vcpu *, struct vcpu_reg_state *);
> int vcpu_reload_vmcs_vmx(struct vcpu *);
> int vcpu_init(struct vcpu *, struct vm_create_params *);
> int vcpu_init_vmx(struct vcpu *);
> @@ -96,6 +97,7 @@ int vmx_get_exit_info(uint64_t *, uint64_t *);
> int vmx_load_pdptes(struct vcpu *);
> int vmx_handle_exit(struct vcpu *);
> int svm_handle_exit(struct vcpu *);
> +int svm_handle_efercr(struct vcpu *, uint64_t);
> int svm_handle_msr(struct vcpu *);
> int vmm_handle_xsetbv(struct vcpu *, uint64_t *);
> int vmx_handle_xsetbv(struct vcpu *);
> @@ -1583,6 +1585,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
> * External NMI exiting (SVM_INTERCEPT_NMI)
> * CPUID instruction (SVM_INTERCEPT_CPUID)
> * HLT instruction (SVM_INTERCEPT_HLT)
> + * INVLPGA instruction (SVM_INTERCEPT_INVLPGA)
> * I/O instructions (SVM_INTERCEPT_INOUT)
> * MSR access (SVM_INTERCEPT_MSR)
> * shutdown events (SVM_INTERCEPT_SHUTDOWN)
> @@ -1612,9 +1615,17 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
> SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR |
> SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP;
>
> - if (xsave_mask)
> + /* With SEV-ES we cannot force access XCR0, thus no intercept */
> + if (xsave_mask && !vcpu->vc_seves)
> vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV;
>
> + if (vcpu->vc_seves) {
> + /* With SEV-ES also intercept post EFER and CR[04] writes */
> + vmcb->v_intercept2 |= SVM_INTERCEPT_EFER_WRITE;
> + vmcb->v_intercept2 |= SVM_INTERCEPT_CR0_WRITE_POST;
> + vmcb->v_intercept2 |= SVM_INTERCEPT_CR4_WRITE_POST;
> + }
> +
> /* Setup I/O bitmap */
> memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE);
> vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa);
> @@ -1634,8 +1645,26 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
> svm_setmsrbrw(vcpu, MSR_GSBASE);
> svm_setmsrbrw(vcpu, MSR_KERNELGSBASE);
>
> - /* EFER is R/O so we can ensure the guest always has SVME */
> - svm_setmsrbr(vcpu, MSR_EFER);
> + /* allow reading SEV status */
> + svm_setmsrbrw(vcpu, MSR_SEV_STATUS);
> +
> + if (vcpu->vc_seves) {
> + /* Allow read/write GHCB guest physical address */
> + svm_setmsrbrw(vcpu, MSR_SEV_GHCB);
> +
> + /* Allow reading MSR_XSS; for CPUID Extended State Enum. */
> + svm_setmsrbr(vcpu, MSR_XSS);
> +
> + /*
> + * With SEV-ES SVME can't be modified by the guest;
> + * host can only intercept post-write (see
> + * SVM_INTERCEPT_EFER_WRITE above).
> + */
> + svm_setmsrbrw(vcpu, MSR_EFER);
> + } else {
> + /* EFER is R/O so we can ensure the guest always has SVME */
> + svm_setmsrbr(vcpu, MSR_EFER);
> + }
>
> /* allow reading TSC */
> svm_setmsrbr(vcpu, MSR_TSC);
> @@ -1667,19 +1696,78 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
> if (vcpu->vc_sev)
> vmcb->v_np_enable |= SVM_ENABLE_SEV;
>
> + /* SEV-ES */
> + if (vcpu->vc_seves) {
> + vmcb->v_np_enable |= SVM_SEVES_ENABLE;
> + vmcb->v_lbr_virt_enable |= SVM_LBRVIRT_ENABLE;
> +
> + /* Set VMSA. */
> + vmcb->v_vmsa_pa = vcpu->vc_svm_vmsa_pa;
> + }
> +
> /* Enable SVME in EFER (must always be set) */
> vmcb->v_efer |= EFER_SVME;
>
> - ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs);
> + if ((ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs)) != 0)
> + return ret;
>
> /* xcr0 power on default sets bit 0 (x87 state) */
> vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
>
> vcpu->vc_parent->vm_map->pmap->eptp = 0;
>
> + ret = vcpu_svm_init_vmsa(vcpu, vrs);
> +
> return ret;
> }
>
> +/*
> + * vcpu_svm_init_vmsa
> + *
> + * Initialize VMSA with initial VCPU state.
> + */
> +int
> +vcpu_svm_init_vmsa(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
> +{
> + uint64_t *gprs = vrs->vrs_gprs;
> + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
> + struct vmsa *vmsa;
> +
> + if (!vcpu->vc_seves)
> + return 0;
> +
> + vmsa = (struct vmsa *)vcpu->vc_svm_vmsa_va;
> + memcpy(vmsa, &vmcb->vmcb_layout, sizeof(vmcb->vmcb_layout));
> +
> + vmsa->v_rax = gprs[VCPU_REGS_RAX];
> + vmsa->v_rbx = gprs[VCPU_REGS_RBX];
> + vmsa->v_rcx = gprs[VCPU_REGS_RCX];
> + vmsa->v_rdx = gprs[VCPU_REGS_RDX];
> + vmsa->v_rsp = gprs[VCPU_REGS_RSP];
> + vmsa->v_rbp = gprs[VCPU_REGS_RBP];
> + vmsa->v_rsi = gprs[VCPU_REGS_RSI];
> + vmsa->v_rdi = gprs[VCPU_REGS_RDI];
> +
> + vmsa->v_r8 = gprs[VCPU_REGS_R8];
> + vmsa->v_r9 = gprs[VCPU_REGS_R9];
> + vmsa->v_r10 = gprs[VCPU_REGS_R10];
> + vmsa->v_r11 = gprs[VCPU_REGS_R11];
> + vmsa->v_r12 = gprs[VCPU_REGS_R12];
> + vmsa->v_r13 = gprs[VCPU_REGS_R13];
> + vmsa->v_r14 = gprs[VCPU_REGS_R14];
> + vmsa->v_r15 = gprs[VCPU_REGS_R15];
> +
> + vmsa->v_rip = gprs[VCPU_REGS_RIP];
> +
> + vmsa->v_xcr0 = vcpu->vc_gueststate.vg_xcr0;
> +
> + /* initialize FPU */
> + vmsa->v_x87_fcw = __INITIAL_NPXCW__;
> + vmsa->v_mxcsr = __INITIAL_MXCSR__;
> +
> + return 0;
> +}
> +
> /*
> * svm_setmsrbr
> *
> @@ -2759,6 +2847,10 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
> {
> int ret = 0;
>
> + /* Shall we enable SEV/SEV-ES? */
> + vcpu->vc_sev = vcp->vcp_sev;
> + vcpu->vc_seves = vcp->vcp_seves;
> +
> /* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */
> if (vmm_alloc_asid(&vcpu->vc_vpid, vcpu))
> return (ENOMEM);
> @@ -2844,10 +2936,6 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
> (uint64_t)vcpu->vc_svm_ioio_va,
> (uint64_t)vcpu->vc_svm_ioio_pa);
>
> - /* Shall we enable SEV/SEV-ES? */
> - vcpu->vc_sev = vcp->vcp_sev;
> - vcpu->vc_seves = vcp->vcp_seves;
> -
> if (vcpu->vc_seves) {
> /* Allocate VM save area VA */
> vcpu->vc_svm_vmsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
> @@ -4221,6 +4309,12 @@ svm_handle_exit(struct vcpu *vcpu)
> ret = vmm_inject_ud(vcpu);
> update_rip = 0;
> break;
> + case SVM_VMEXIT_EFER_WRITE_TRAP:
> + case SVM_VMEXIT_CR0_WRITE_TRAP:
> + case SVM_VMEXIT_CR4_WRITE_TRAP:
> + ret = svm_handle_efercr(vcpu, exit_reason);
> + update_rip = 0;
> + break;
> default:
> DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
> exit_reason, (uint64_t)vcpu->vc_control_pa);
> @@ -4246,6 +4340,35 @@ svm_handle_exit(struct vcpu *vcpu)
> return (ret);
> }
>
> +/*
> + * svm_handle_efercr
> + *
> + * With SEV-ES the hypervisor can not intercept and modify writes
> + * to CR and EFER. However, a post write intercept notifies about
> + * the new state of these registers.
> + */
> +int
> +svm_handle_efercr(struct vcpu *vcpu, uint64_t exit_reason)
> +{
> + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
> +
> + switch (exit_reason) {
> + case SVM_VMEXIT_EFER_WRITE_TRAP:
> + vmcb->v_efer = vmcb->v_exitinfo1;
> + break;
> + case SVM_VMEXIT_CR0_WRITE_TRAP:
> + vmcb->v_cr0 = vmcb->v_exitinfo1;
> + break;
> + case SVM_VMEXIT_CR4_WRITE_TRAP:
> + vmcb->v_cr4 = vmcb->v_exitinfo1;
> + break;
> + default:
> + return (EINVAL);
> + }
> +
> + return (0);
> +}
> +
> /*
> * vmx_handle_exit
> *
> diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h
> index 4208e2e13a1..1235cc7a65a 100644
> --- a/sys/arch/amd64/include/specialreg.h
> +++ b/sys/arch/amd64/include/specialreg.h
> @@ -723,6 +723,10 @@
> #define NB_CFG_DISIOREQLOCK 0x0000000000000004ULL
> #define NB_CFG_DISDATMSK 0x0000001000000000ULL
>
> +#define MSR_SEV_GHCB 0xc0010130
> +#define SEV_CPUID_REQ 0x00000004
> +#define SEV_CPUID_RESP 0x00000005
> +
> #define MSR_SEV_STATUS 0xc0010131
> #define SEV_STAT_ENABLED 0x00000001
>
> diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
> index 21f2f7d9483..22c60907f1a 100644
> --- a/sys/arch/amd64/include/vmmvar.h
> +++ b/sys/arch/amd64/include/vmmvar.h
> @@ -623,8 +623,11 @@ struct vmcb_segment {
> uint64_t vs_base; /* 008h */
> };
>
> -#define SVM_ENABLE_NP (1ULL << 0)
> -#define SVM_ENABLE_SEV (1ULL << 1)
> +#define SVM_ENABLE_NP (1ULL << 0)
> +#define SVM_ENABLE_SEV (1ULL << 1)
> +#define SVM_SEVES_ENABLE (1ULL << 2)
> +
> +#define SVM_LBRVIRT_ENABLE (1ULL << 0)
>
> struct vmcb {
> union {
vmm(4): Configure SEV-ES 1/3