From: hshoexer Subject: Re: vmm(4): For SEV-ES enabled guest raise #VC on MMIO related #NPF To: tech@openbsd.org Date: Thu, 13 Nov 2025 13:07:39 +0100 On Thu, Nov 13, 2025 at 01:04:08PM +0100, hshoexer wrote: > Hi, > > for SEV-ES enabled guests MMIO should raise a #VC trap with error > code SVM_VMEXIT_NPF. This is required as vmm(4) can not analyze > the guest code causing the exit; the guest itself has to assist > which requires a #VC trap being raised. > > As SEV-ES guest we try to avoid MMIO by using paravirtualization. > Nonetheless, vmm(4) should enforce #NPF/MMIO for SEV guests; e.g. > to find bugs. > > Enforcing #VC on MMIO is accomplished by setting up a mapping in > the nested page table that has reserved bits set. The bits of the > physical address portion of the PTE that are affected by physical > address bit reduction are reserved. When these bits are set, a #VC > trap with error code SVM_VMEXIT_NPF is raised when the guest resumes. > Further MMIO to the same page will raise #VC right away. > > This approach is recommended by [1] section 4.1.5. It can be tested > with regress/sys/arch/amd64/seves_mmio/. > > For non-SEV-ES guests nothing changes, we forward the fault to > vmd(8) as before. and I'm asking for comments and/or ok. Take care, HJ. > [1] https://docs.amd.com/v/u/en-US/56421 > > ---------------------------------- > diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c > index ac4e845a1aa..86f05f1a7bf 100644 > --- a/sys/arch/amd64/amd64/identcpu.c > +++ b/sys/arch/amd64/amd64/identcpu.c > @@ -67,6 +67,8 @@ int cpuspeed; > > int amd64_has_xcrypt; > int amd64_pos_cbit; /* C bit position for SEV */ > +int amd64_phys_addrsz; /* Physcial address size */ > +int amd64_phys_red; /* Physcial address size reduction */ > int amd64_min_noes_asid; > int has_rdrand; > int has_rdseed; > @@ -676,8 +678,9 @@ identifycpu(struct cpu_info *ci) > /* speculation control features */ > if (ci->ci_vendor == CPUV_AMD) { > if (ci->ci_pnfeatset >= 0x80000008) { > - CPUID(0x80000008, dummy, ci->ci_feature_amdspec_ebx, > - dummy, dummy); > + CPUID(0x80000008, ci->ci_feature_amdspec_eax, > + ci->ci_feature_amdspec_ebx, dummy, dummy); > + amd64_phys_addrsz = ci->ci_feature_amdspec_eax & 0xff; > pcpuid(ci, "80000008", 'b', > CPUID_MEMBER(ci_feature_amdspec_ebx), > CPUID_AMDSPEC_EBX_BITS); > @@ -711,6 +714,7 @@ identifycpu(struct cpu_info *ci) > 'd', CPUID_MEMBER(ci_feature_amdsev_edx), > CPUID_AMDSEV_EDX_BITS); > amd64_pos_cbit = (ci->ci_feature_amdsev_ebx & 0x3f); > + amd64_phys_red = ((ci->ci_feature_amdsev_ebx >> 6) & 0x3f); > amd64_min_noes_asid = ci->ci_feature_amdsev_edx; > if (cpu_sev_guestmode && CPU_IS_PRIMARY(ci)) > printf("\n%s: SEV%s guest mode", ci->ci_dev->dv_xname, > diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c > index abcd177a700..be6c0aea6ca 100644 > --- a/sys/arch/amd64/amd64/vmm_machdep.c > +++ b/sys/arch/amd64/amd64/vmm_machdep.c > @@ -5023,6 +5023,7 @@ int > svm_handle_np_fault(struct vcpu *vcpu) > { > uint64_t gpa; > + paddr_t hpa; > int gpa_memtype, ret = 0; > struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; > struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee; > @@ -5039,14 +5040,25 @@ svm_handle_np_fault(struct vcpu *vcpu) > ret = svm_fault_page(vcpu, gpa); > break; > case VMM_MEM_TYPE_MMIO: > - vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST; > - if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) { > - vee->vee_insn_len = vmcb->v_n_bytes_fetched; > - memcpy(&vee->vee_insn_bytes, vmcb->v_guest_ins_bytes, > - sizeof(vee->vee_insn_bytes)); > - vee->vee_insn_info |= VEE_BYTES_VALID; > + if (vcpu->vc_seves) { > + vee->vee_fault_type = VEE_FAULT_HANDLED; > + /* Setup invalid mapping to raise #VC in guest */ > + hpa = (1ULL << amd64_phys_red) - 1; > + hpa <<= (amd64_phys_addrsz - amd64_phys_red); > + ret = pmap_enter(vcpu->vc_parent->vm_pmap, > + trunc_page(gpa), hpa, > + PROT_READ | PROT_WRITE | PROT_EXEC, 0); > + } else { > + vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST; > + if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) { > + vee->vee_insn_len = vmcb->v_n_bytes_fetched; > + memcpy(&vee->vee_insn_bytes, > + vmcb->v_guest_ins_bytes, > + sizeof(vee->vee_insn_bytes)); > + vee->vee_insn_info |= VEE_BYTES_VALID; > + } > + ret = EAGAIN; > } > - ret = EAGAIN; > break; > default: > printf("%s: unknown memory type %d for GPA 0x%llx\n", > diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h > index c7d34e634cf..2c8616dede9 100644 > --- a/sys/arch/amd64/include/cpu.h > +++ b/sys/arch/amd64/include/cpu.h > @@ -170,6 +170,7 @@ struct cpu_info { > u_int32_t ci_feature_sefflags_ebx;/* [I] */ > u_int32_t ci_feature_sefflags_ecx;/* [I] */ > u_int32_t ci_feature_sefflags_edx;/* [I] */ > + u_int32_t ci_feature_amdspec_eax; /* [I] */ > u_int32_t ci_feature_amdspec_ebx; /* [I] */ > u_int32_t ci_feature_amdsev_eax; /* [I] */ > u_int32_t ci_feature_amdsev_ebx; /* [I] */ > @@ -418,6 +419,8 @@ void identifycpu(struct cpu_info *); > int cpu_amd64speed(int *); > extern int cpuspeed; > extern int amd64_pos_cbit; > +extern int amd64_phys_addrsz; > +extern int amd64_phys_red; > extern int amd64_min_noes_asid; > > /* machdep.c */ >