From: Mike Larkin Subject: Re: vmm(4): For SEV-ES enabled guest raise #VC on MMIO related #NPF To: tech@openbsd.org Date: Sun, 16 Nov 2025 18:17:24 -0800 On Thu, Nov 13, 2025 at 01:04:08PM +0100, hshoexer wrote: > Hi, > > for SEV-ES enabled guests MMIO should raise a #VC trap with error > code SVM_VMEXIT_NPF. This is required as vmm(4) can not analyze > the guest code causing the exit; the guest itself has to assist > which requires a #VC trap being raised. > > As SEV-ES guest we try to avoid MMIO by using paravirtualization. > Nonetheless, vmm(4) should enforce #NPF/MMIO for SEV guests; e.g. > to find bugs. > > Enforcing #VC on MMIO is accomplished by setting up a mapping in > the nested page table that has reserved bits set. The bits of the > physical address portion of the PTE that are affected by physical > address bit reduction are reserved. When these bits are set, a #VC > trap with error code SVM_VMEXIT_NPF is raised when the guest resumes. > Further MMIO to the same page will raise #VC right away. > > This approach is recommended by [1] section 4.1.5. It can be tested > with regress/sys/arch/amd64/seves_mmio/. > > For non-SEV-ES guests nothing changes, we forward the fault to > vmd(8) as before. > > [1] https://docs.amd.com/v/u/en-US/56421 > I will need more detail as to what is going on in the diff below, before I can proceed. it looks like you are letting the guest map any gpa to (?) some HPA whose calculation is not obvious or clear. are you trying to force a bad mapping? why not just use PROT_NONE for the mapping and force it to some completely bogus HPA? that way you're sure the guest can't sneak some mappings past you? or is this what you are doing? I can't understand from the diff. is this what the "address reduction" part is? also, I'm not sure we can just "only" use pmap_enter anymore to put mappings in the guest; I know dv was doing work in this area a few months ago WRT uvm_share and its interaction with uvm_fault. do we need to use uvm_fault here? if so, what would the permissions be? (dv?) -ml > ---------------------------------- > diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c > index ac4e845a1aa..86f05f1a7bf 100644 > --- a/sys/arch/amd64/amd64/identcpu.c > +++ b/sys/arch/amd64/amd64/identcpu.c > @@ -67,6 +67,8 @@ int cpuspeed; > > int amd64_has_xcrypt; > int amd64_pos_cbit; /* C bit position for SEV */ > +int amd64_phys_addrsz; /* Physcial address size */ > +int amd64_phys_red; /* Physcial address size reduction */ > int amd64_min_noes_asid; > int has_rdrand; > int has_rdseed; > @@ -676,8 +678,9 @@ identifycpu(struct cpu_info *ci) > /* speculation control features */ > if (ci->ci_vendor == CPUV_AMD) { > if (ci->ci_pnfeatset >= 0x80000008) { > - CPUID(0x80000008, dummy, ci->ci_feature_amdspec_ebx, > - dummy, dummy); > + CPUID(0x80000008, ci->ci_feature_amdspec_eax, > + ci->ci_feature_amdspec_ebx, dummy, dummy); > + amd64_phys_addrsz = ci->ci_feature_amdspec_eax & 0xff; > pcpuid(ci, "80000008", 'b', > CPUID_MEMBER(ci_feature_amdspec_ebx), > CPUID_AMDSPEC_EBX_BITS); > @@ -711,6 +714,7 @@ identifycpu(struct cpu_info *ci) > 'd', CPUID_MEMBER(ci_feature_amdsev_edx), > CPUID_AMDSEV_EDX_BITS); > amd64_pos_cbit = (ci->ci_feature_amdsev_ebx & 0x3f); > + amd64_phys_red = ((ci->ci_feature_amdsev_ebx >> 6) & 0x3f); > amd64_min_noes_asid = ci->ci_feature_amdsev_edx; > if (cpu_sev_guestmode && CPU_IS_PRIMARY(ci)) > printf("\n%s: SEV%s guest mode", ci->ci_dev->dv_xname, > diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c > index abcd177a700..be6c0aea6ca 100644 > --- a/sys/arch/amd64/amd64/vmm_machdep.c > +++ b/sys/arch/amd64/amd64/vmm_machdep.c > @@ -5023,6 +5023,7 @@ int > svm_handle_np_fault(struct vcpu *vcpu) > { > uint64_t gpa; > + paddr_t hpa; > int gpa_memtype, ret = 0; > struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; > struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee; > @@ -5039,14 +5040,25 @@ svm_handle_np_fault(struct vcpu *vcpu) > ret = svm_fault_page(vcpu, gpa); > break; > case VMM_MEM_TYPE_MMIO: > - vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST; > - if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) { > - vee->vee_insn_len = vmcb->v_n_bytes_fetched; > - memcpy(&vee->vee_insn_bytes, vmcb->v_guest_ins_bytes, > - sizeof(vee->vee_insn_bytes)); > - vee->vee_insn_info |= VEE_BYTES_VALID; > + if (vcpu->vc_seves) { > + vee->vee_fault_type = VEE_FAULT_HANDLED; > + /* Setup invalid mapping to raise #VC in guest */ > + hpa = (1ULL << amd64_phys_red) - 1; > + hpa <<= (amd64_phys_addrsz - amd64_phys_red); > + ret = pmap_enter(vcpu->vc_parent->vm_pmap, > + trunc_page(gpa), hpa, > + PROT_READ | PROT_WRITE | PROT_EXEC, 0); > + } else { > + vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST; > + if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) { > + vee->vee_insn_len = vmcb->v_n_bytes_fetched; > + memcpy(&vee->vee_insn_bytes, > + vmcb->v_guest_ins_bytes, > + sizeof(vee->vee_insn_bytes)); > + vee->vee_insn_info |= VEE_BYTES_VALID; > + } > + ret = EAGAIN; > } > - ret = EAGAIN; > break; > default: > printf("%s: unknown memory type %d for GPA 0x%llx\n", > diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h > index c7d34e634cf..2c8616dede9 100644 > --- a/sys/arch/amd64/include/cpu.h > +++ b/sys/arch/amd64/include/cpu.h > @@ -170,6 +170,7 @@ struct cpu_info { > u_int32_t ci_feature_sefflags_ebx;/* [I] */ > u_int32_t ci_feature_sefflags_ecx;/* [I] */ > u_int32_t ci_feature_sefflags_edx;/* [I] */ > + u_int32_t ci_feature_amdspec_eax; /* [I] */ > u_int32_t ci_feature_amdspec_ebx; /* [I] */ > u_int32_t ci_feature_amdsev_eax; /* [I] */ > u_int32_t ci_feature_amdsev_ebx; /* [I] */ > @@ -418,6 +419,8 @@ void identifycpu(struct cpu_info *); > int cpu_amd64speed(int *); > extern int cpuspeed; > extern int amd64_pos_cbit; > +extern int amd64_phys_addrsz; > +extern int amd64_phys_red; > extern int amd64_min_noes_asid; > > /* machdep.c */ >