Index | Thread | Search

From:
Mike Larkin <mlarkin@nested.page>
Subject:
Re: vmm(4): Configure SEV-ES 1/3
To:
tech@openbsd.org, Hans-Joerg_Hoexer@genua.de
Date:
Sun, 18 May 2025 23:56:07 -0700

Download raw body.

Thread
On Wed, Apr 30, 2025 at 04:22:03PM +0200, Hans-Jörg Höxer wrote:
> Hi,
>
> this is diff 1/3 of a patchset.  It configures SEV-ES in vmm(4).
> Right now, this change may not alter current VM setups as vmd(8) can
> not configure SEV-ES, yet.  Thus any change would be a regression.
>
> See commit message below for details.
>
> The following diffs 2 and 3 will address guest interrupt flag state and
> guest entry/exit.
>
> The next patchset will then deal with SEV-ES specific guest exits.
>
> Take care,
> HJ.
>

This looks fine to me. ok mlarkin when you guys are ready.

> ----------------------------------------------------------------------------
> commit 8a739a5c9ca492e77fd0cb65716cab53f837f2d9
> Author: Hans-Joerg Hoexer <hshoexer@genua.de>
> Date:   Wed Jan 22 11:43:03 2025 +0100
>
>     vmm(4): Configure SEV-ES
>
>     For SEV-ES we have to adjust the guest configuration:
>
>     - Do not intercept XSETBV; we can not force access to XCR0 as it
>       is part of the encrypted state.
>
>     - We do not have direct access to EFER and CR[04], thus intercept
>       EFER and CR[04] "post write"; the provided exit handler will keep
>       track of the guest state.
>
>     - Regarding MSRs:
>       - Allow access to GHCB MSR (will be used for guest-host communication)
>       - Allow reading XSS state (will be needed for CPUD Extended State
>         Enumeration )
>       - Allow full read/write for EFER; SVME bit cannot not be modified
>         with SEV-ES; it will always be 1
>
>     - SEV-ES requires LBR virtualization; thus enable
>
>     When everything is set up, the initial state is copied to the VMSA,
>     which is not yet encrypted.  Will have to be done by vmd(8) and
>     psp(4).
>
> diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c
> index 79b348ba061..3d5c58de71d 100644
> --- a/sys/arch/amd64/amd64/vmm_machdep.c
> +++ b/sys/arch/amd64/amd64/vmm_machdep.c
> @@ -81,6 +81,7 @@ int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
>  int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
>  int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
>  int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
> +int vcpu_svm_init_vmsa(struct vcpu *, struct vcpu_reg_state *);
>  int vcpu_reload_vmcs_vmx(struct vcpu *);
>  int vcpu_init(struct vcpu *, struct vm_create_params *);
>  int vcpu_init_vmx(struct vcpu *);
> @@ -96,6 +97,7 @@ int vmx_get_exit_info(uint64_t *, uint64_t *);
>  int vmx_load_pdptes(struct vcpu *);
>  int vmx_handle_exit(struct vcpu *);
>  int svm_handle_exit(struct vcpu *);
> +int svm_handle_efercr(struct vcpu *, uint64_t);
>  int svm_handle_msr(struct vcpu *);
>  int vmm_handle_xsetbv(struct vcpu *, uint64_t *);
>  int vmx_handle_xsetbv(struct vcpu *);
> @@ -1583,6 +1585,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
>  	 * External NMI exiting (SVM_INTERCEPT_NMI)
>  	 * CPUID instruction (SVM_INTERCEPT_CPUID)
>  	 * HLT instruction (SVM_INTERCEPT_HLT)
> +	 * INVLPGA instruction (SVM_INTERCEPT_INVLPGA)
>  	 * I/O instructions (SVM_INTERCEPT_INOUT)
>  	 * MSR access (SVM_INTERCEPT_MSR)
>  	 * shutdown events (SVM_INTERCEPT_SHUTDOWN)
> @@ -1612,9 +1615,17 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
>  	    SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR |
>  	    SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP;
>
> -	if (xsave_mask)
> +	/* With SEV-ES we cannot force access XCR0, thus no intercept */
> +	if (xsave_mask && !vcpu->vc_seves)
>  		vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV;
>
> +	if (vcpu->vc_seves) {
> +		/* With SEV-ES also intercept post EFER and CR[04] writes */
> +		vmcb->v_intercept2 |= SVM_INTERCEPT_EFER_WRITE;
> +		vmcb->v_intercept2 |= SVM_INTERCEPT_CR0_WRITE_POST;
> +		vmcb->v_intercept2 |= SVM_INTERCEPT_CR4_WRITE_POST;
> +	}
> +
>  	/* Setup I/O bitmap */
>  	memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE);
>  	vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa);
> @@ -1634,8 +1645,26 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
>  	svm_setmsrbrw(vcpu, MSR_GSBASE);
>  	svm_setmsrbrw(vcpu, MSR_KERNELGSBASE);
>
> -	/* EFER is R/O so we can ensure the guest always has SVME */
> -	svm_setmsrbr(vcpu, MSR_EFER);
> +	/* allow reading SEV status */
> +	svm_setmsrbrw(vcpu, MSR_SEV_STATUS);
> +
> +	if (vcpu->vc_seves) {
> +		/* Allow read/write GHCB guest physical address */
> +		svm_setmsrbrw(vcpu, MSR_SEV_GHCB);
> +
> +		/* Allow reading MSR_XSS; for CPUID Extended State Enum. */
> +		svm_setmsrbr(vcpu, MSR_XSS);
> +
> +		/*
> +		 * With SEV-ES SVME can't be modified by the guest;
> +		 * host can only intercept post-write (see
> +		 * SVM_INTERCEPT_EFER_WRITE above).
> +		 */
> +		svm_setmsrbrw(vcpu, MSR_EFER);
> +	} else {
> +		/* EFER is R/O so we can ensure the guest always has SVME */
> +		svm_setmsrbr(vcpu, MSR_EFER);
> +	}
>
>  	/* allow reading TSC */
>  	svm_setmsrbr(vcpu, MSR_TSC);
> @@ -1667,19 +1696,78 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
>  	if (vcpu->vc_sev)
>  		vmcb->v_np_enable |= SVM_ENABLE_SEV;
>
> +	/* SEV-ES */
> +	if (vcpu->vc_seves) {
> +		vmcb->v_np_enable |= SVM_SEVES_ENABLE;
> +		vmcb->v_lbr_virt_enable |= SVM_LBRVIRT_ENABLE;
> +
> +		/* Set VMSA. */
> +		vmcb->v_vmsa_pa = vcpu->vc_svm_vmsa_pa;
> +	}
> +
>  	/* Enable SVME in EFER (must always be set) */
>  	vmcb->v_efer |= EFER_SVME;
>
> -	ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs);
> +	if ((ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs)) != 0)
> +		return ret;
>
>  	/* xcr0 power on default sets bit 0 (x87 state) */
>  	vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
>
>  	vcpu->vc_parent->vm_map->pmap->eptp = 0;
>
> +	ret = vcpu_svm_init_vmsa(vcpu, vrs);
> +
>  	return ret;
>  }
>
> +/*
> + * vcpu_svm_init_vmsa
> + *
> + * Initialize VMSA with initial VCPU state.
> + */
> +int
> +vcpu_svm_init_vmsa(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
> +{
> +	uint64_t	*gprs = vrs->vrs_gprs;
> +	struct vmcb	*vmcb = (struct vmcb *)vcpu->vc_control_va;
> +	struct vmsa	*vmsa;
> +
> +	if (!vcpu->vc_seves)
> +		return 0;
> +
> +	vmsa = (struct vmsa *)vcpu->vc_svm_vmsa_va;
> +	memcpy(vmsa, &vmcb->vmcb_layout, sizeof(vmcb->vmcb_layout));
> +
> +	vmsa->v_rax = gprs[VCPU_REGS_RAX];
> +	vmsa->v_rbx = gprs[VCPU_REGS_RBX];
> +	vmsa->v_rcx = gprs[VCPU_REGS_RCX];
> +	vmsa->v_rdx = gprs[VCPU_REGS_RDX];
> +	vmsa->v_rsp = gprs[VCPU_REGS_RSP];
> +	vmsa->v_rbp = gprs[VCPU_REGS_RBP];
> +	vmsa->v_rsi = gprs[VCPU_REGS_RSI];
> +	vmsa->v_rdi = gprs[VCPU_REGS_RDI];
> +
> +	vmsa->v_r8 = gprs[VCPU_REGS_R8];
> +	vmsa->v_r9 = gprs[VCPU_REGS_R9];
> +	vmsa->v_r10 = gprs[VCPU_REGS_R10];
> +	vmsa->v_r11 = gprs[VCPU_REGS_R11];
> +	vmsa->v_r12 = gprs[VCPU_REGS_R12];
> +	vmsa->v_r13 = gprs[VCPU_REGS_R13];
> +	vmsa->v_r14 = gprs[VCPU_REGS_R14];
> +	vmsa->v_r15 = gprs[VCPU_REGS_R15];
> +
> +	vmsa->v_rip = gprs[VCPU_REGS_RIP];
> +
> +	vmsa->v_xcr0 = vcpu->vc_gueststate.vg_xcr0;
> +
> +	/* initialize FPU */
> +	vmsa->v_x87_fcw = __INITIAL_NPXCW__;
> +	vmsa->v_mxcsr = __INITIAL_MXCSR__;
> +
> +	return 0;
> +}
> +
>  /*
>   * svm_setmsrbr
>   *
> @@ -2759,6 +2847,10 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
>  {
>  	int ret = 0;
>
> +	/* Shall we enable SEV/SEV-ES? */
> +	vcpu->vc_sev = vcp->vcp_sev;
> +	vcpu->vc_seves = vcp->vcp_seves;
> +
>  	/* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */
>  	if (vmm_alloc_asid(&vcpu->vc_vpid, vcpu))
>  		return (ENOMEM);
> @@ -2844,10 +2936,6 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
>  	    (uint64_t)vcpu->vc_svm_ioio_va,
>  	    (uint64_t)vcpu->vc_svm_ioio_pa);
>
> -	/* Shall we enable SEV/SEV-ES? */
> -	vcpu->vc_sev = vcp->vcp_sev;
> -	vcpu->vc_seves = vcp->vcp_seves;
> -
>  	if (vcpu->vc_seves) {
>  		/* Allocate VM save area VA */
>  		vcpu->vc_svm_vmsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page,
> @@ -4221,6 +4309,12 @@ svm_handle_exit(struct vcpu *vcpu)
>  		ret = vmm_inject_ud(vcpu);
>  		update_rip = 0;
>  		break;
> +	case SVM_VMEXIT_EFER_WRITE_TRAP:
> +	case SVM_VMEXIT_CR0_WRITE_TRAP:
> +	case SVM_VMEXIT_CR4_WRITE_TRAP:
> +		ret = svm_handle_efercr(vcpu, exit_reason);
> +		update_rip = 0;
> +		break;
>  	default:
>  		DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
>  		    exit_reason, (uint64_t)vcpu->vc_control_pa);
> @@ -4246,6 +4340,35 @@ svm_handle_exit(struct vcpu *vcpu)
>  	return (ret);
>  }
>
> +/*
> + * svm_handle_efercr
> + *
> + * With SEV-ES the hypervisor can not intercept and modify writes
> + * to CR and EFER.  However, a post write intercept notifies about
> + * the new state of these registers.
> + */
> +int
> +svm_handle_efercr(struct vcpu *vcpu, uint64_t exit_reason)
> +{
> +	struct vmcb	*vmcb = (struct vmcb *)vcpu->vc_control_va;
> +
> +	switch (exit_reason) {
> +	case SVM_VMEXIT_EFER_WRITE_TRAP:
> +		vmcb->v_efer = vmcb->v_exitinfo1;
> +		break;
> +	case SVM_VMEXIT_CR0_WRITE_TRAP:
> +		vmcb->v_cr0 = vmcb->v_exitinfo1;
> +		break;
> +	case SVM_VMEXIT_CR4_WRITE_TRAP:
> +		vmcb->v_cr4 = vmcb->v_exitinfo1;
> +		break;
> +	default:
> +		return (EINVAL);
> +	}
> +
> +	return (0);
> +}
> +
>  /*
>   * vmx_handle_exit
>   *
> diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h
> index 4208e2e13a1..1235cc7a65a 100644
> --- a/sys/arch/amd64/include/specialreg.h
> +++ b/sys/arch/amd64/include/specialreg.h
> @@ -723,6 +723,10 @@
>  #define		NB_CFG_DISIOREQLOCK	0x0000000000000004ULL
>  #define		NB_CFG_DISDATMSK	0x0000001000000000ULL
>
> +#define MSR_SEV_GHCB	0xc0010130
> +#define		SEV_CPUID_REQ		0x00000004
> +#define		SEV_CPUID_RESP		0x00000005
> +
>  #define MSR_SEV_STATUS	0xc0010131
>  #define		SEV_STAT_ENABLED	0x00000001
>
> diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
> index 21f2f7d9483..22c60907f1a 100644
> --- a/sys/arch/amd64/include/vmmvar.h
> +++ b/sys/arch/amd64/include/vmmvar.h
> @@ -623,8 +623,11 @@ struct vmcb_segment {
>  	uint64_t			vs_base;		/* 008h */
>  };
>
> -#define SVM_ENABLE_NP	(1ULL << 0)
> -#define SVM_ENABLE_SEV	(1ULL << 1)
> +#define SVM_ENABLE_NP		(1ULL << 0)
> +#define SVM_ENABLE_SEV		(1ULL << 1)
> +#define SVM_SEVES_ENABLE	(1ULL << 2)
> +
> +#define SVM_LBRVIRT_ENABLE	(1ULL << 0)
>
>  struct vmcb {
>  	union {