Index | Thread | Search

From:
hshoexer <hshoexer@yerbouti.franken.de>
Subject:
Re: vmm(4): For SEV-ES enabled guest raise #VC on MMIO related #NPF
To:
tech@openbsd.org
Date:
Thu, 13 Nov 2025 13:07:39 +0100

Download raw body.

Thread
On Thu, Nov 13, 2025 at 01:04:08PM +0100, hshoexer wrote:
> Hi,
> 
> for SEV-ES enabled guests MMIO should raise a #VC trap with error
> code SVM_VMEXIT_NPF.  This is required as vmm(4) can not analyze
> the guest code causing the exit; the guest itself has to assist
> which requires a #VC trap being raised.
> 
> As SEV-ES guest we try to avoid MMIO by using paravirtualization.
> Nonetheless, vmm(4) should enforce #NPF/MMIO for SEV guests; e.g.
> to find bugs.
> 
> Enforcing #VC on MMIO is accomplished by setting up a mapping in
> the nested page table that has reserved bits set.  The bits of the
> physical address portion of the PTE that are affected by physical
> address bit reduction are reserved.  When these bits are set, a #VC
> trap with error code SVM_VMEXIT_NPF is raised when the guest resumes.
> Further MMIO to the same page will raise #VC right away.
> 
> This approach is recommended by [1] section 4.1.5.  It can be tested
> with regress/sys/arch/amd64/seves_mmio/.
>     
> For non-SEV-ES guests nothing changes, we forward the fault to
> vmd(8) as before.

and I'm asking for comments and/or ok.

Take care,
HJ.

> [1] https://docs.amd.com/v/u/en-US/56421
> 
> ----------------------------------
> diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c
> index ac4e845a1aa..86f05f1a7bf 100644
> --- a/sys/arch/amd64/amd64/identcpu.c
> +++ b/sys/arch/amd64/amd64/identcpu.c
> @@ -67,6 +67,8 @@ int cpuspeed;
>  
>  int amd64_has_xcrypt;
>  int amd64_pos_cbit;	/* C bit position for SEV */
> +int amd64_phys_addrsz;	/* Physcial address size */
> +int amd64_phys_red;	/* Physcial address size reduction */
>  int amd64_min_noes_asid;
>  int has_rdrand;
>  int has_rdseed;
> @@ -676,8 +678,9 @@ identifycpu(struct cpu_info *ci)
>  	/* speculation control features */
>  	if (ci->ci_vendor == CPUV_AMD) {
>  		if (ci->ci_pnfeatset >= 0x80000008) {
> -			CPUID(0x80000008, dummy, ci->ci_feature_amdspec_ebx,
> -			    dummy, dummy);
> +			CPUID(0x80000008, ci->ci_feature_amdspec_eax,
> +			    ci->ci_feature_amdspec_ebx, dummy, dummy);
> +			amd64_phys_addrsz = ci->ci_feature_amdspec_eax & 0xff;
>  			pcpuid(ci, "80000008", 'b',
>  			    CPUID_MEMBER(ci_feature_amdspec_ebx),
>  			    CPUID_AMDSPEC_EBX_BITS);
> @@ -711,6 +714,7 @@ identifycpu(struct cpu_info *ci)
>  		    'd', CPUID_MEMBER(ci_feature_amdsev_edx),
>  		    CPUID_AMDSEV_EDX_BITS);
>  		amd64_pos_cbit = (ci->ci_feature_amdsev_ebx & 0x3f);
> +		amd64_phys_red = ((ci->ci_feature_amdsev_ebx >> 6) & 0x3f);
>  		amd64_min_noes_asid = ci->ci_feature_amdsev_edx;
>  		if (cpu_sev_guestmode && CPU_IS_PRIMARY(ci))
>  			printf("\n%s: SEV%s guest mode", ci->ci_dev->dv_xname,
> diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c
> index abcd177a700..be6c0aea6ca 100644
> --- a/sys/arch/amd64/amd64/vmm_machdep.c
> +++ b/sys/arch/amd64/amd64/vmm_machdep.c
> @@ -5023,6 +5023,7 @@ int
>  svm_handle_np_fault(struct vcpu *vcpu)
>  {
>  	uint64_t gpa;
> +	paddr_t hpa;
>  	int gpa_memtype, ret = 0;
>  	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
>  	struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
> @@ -5039,14 +5040,25 @@ svm_handle_np_fault(struct vcpu *vcpu)
>  		ret = svm_fault_page(vcpu, gpa);
>  		break;
>  	case VMM_MEM_TYPE_MMIO:
> -		vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
> -		if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) {
> -			vee->vee_insn_len = vmcb->v_n_bytes_fetched;
> -			memcpy(&vee->vee_insn_bytes, vmcb->v_guest_ins_bytes,
> -			    sizeof(vee->vee_insn_bytes));
> -			vee->vee_insn_info |= VEE_BYTES_VALID;
> +		if (vcpu->vc_seves) {
> +			vee->vee_fault_type = VEE_FAULT_HANDLED;
> +			/* Setup invalid mapping to raise #VC in guest */
> +			hpa = (1ULL << amd64_phys_red) - 1;
> +			hpa <<= (amd64_phys_addrsz - amd64_phys_red);
> +			ret = pmap_enter(vcpu->vc_parent->vm_pmap,
> +			    trunc_page(gpa), hpa,
> +			    PROT_READ | PROT_WRITE | PROT_EXEC, 0);
> +		} else {
> +			vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
> +			if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) {
> +				vee->vee_insn_len = vmcb->v_n_bytes_fetched;
> +				memcpy(&vee->vee_insn_bytes,
> +				    vmcb->v_guest_ins_bytes,
> +				    sizeof(vee->vee_insn_bytes));
> +				vee->vee_insn_info |= VEE_BYTES_VALID;
> +			}
> +			ret = EAGAIN;
>  		}
> -		ret = EAGAIN;
>  		break;
>  	default:
>  		printf("%s: unknown memory type %d for GPA 0x%llx\n",
> diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
> index c7d34e634cf..2c8616dede9 100644
> --- a/sys/arch/amd64/include/cpu.h
> +++ b/sys/arch/amd64/include/cpu.h
> @@ -170,6 +170,7 @@ struct cpu_info {
>  	u_int32_t	ci_feature_sefflags_ebx;/* [I] */
>  	u_int32_t	ci_feature_sefflags_ecx;/* [I] */
>  	u_int32_t	ci_feature_sefflags_edx;/* [I] */
> +	u_int32_t	ci_feature_amdspec_eax;	/* [I] */
>  	u_int32_t	ci_feature_amdspec_ebx;	/* [I] */
>  	u_int32_t	ci_feature_amdsev_eax;	/* [I] */
>  	u_int32_t	ci_feature_amdsev_ebx;	/* [I] */
> @@ -418,6 +419,8 @@ void	identifycpu(struct cpu_info *);
>  int	cpu_amd64speed(int *);
>  extern int cpuspeed;
>  extern int amd64_pos_cbit;
> +extern int amd64_phys_addrsz;
> +extern int amd64_phys_red;
>  extern int amd64_min_noes_asid;
>  
>  /* machdep.c */
>