From: hshoexer Subject: Re: vmm(4): return error for VMCALL instead of injecting #UD To: tech@openbsd.org Date: Tue, 17 Mar 2026 14:52:18 +0100 Hi, On Mon, Mar 16, 2026 at 04:51:21PM -0400, Dave Voutila wrote: > ... > I think VMX will give us a valid instruction length we can > retrieve and trust. > > For SVM, I think nRIP is applicable here and should be accurate? I think > we're at a point where we might want to enforce nRIP support if we don't > already. (I think this has come up before.) like the diff below? If we want to go that direction, VMCALL and VMMCALL handling could be refactored in a single function. And there are some more places in the SVM specific code where we could use nRIP, too. @ Miguel: Using the metal-amd64.iso (talos linux?) I can reproduce that issue on both AMD/SVM and Intel/VMX vmm. And skipping the vm call as you suggest resolves that issue (ie. ptp_kvm fails gracefully). However, on AMD I see later on MMIO related exits that vmd can not handle yet. And the vm is terminated. I have not investigated this yet, but I guess for AMD/SVM vmm there is more to be done to get that talos linux running. Do you test on Intel or AMD machines? Take care, HJ. -------------------------------------------------------------------------- commit 29facd187fd38580ffc89ecf51b38b1640d57cbe Author: Hans-Joerg Hoexer Date: Tue Mar 17 11:40:52 2026 +0100 vmm(4): Skip VMCALL/VMMCALL issued at CPL 0; otherwise abort with signal On Intel we have to use #GP(0), on AMD #UD. Adjust regression test. diff --git a/regress/sys/arch/amd64/vmcall/vmcall.c b/regress/sys/arch/amd64/vmcall/vmcall.c index 74ede2da21b..eb8b81cb243 100644 --- a/regress/sys/arch/amd64/vmcall/vmcall.c +++ b/regress/sys/arch/amd64/vmcall/vmcall.c @@ -21,8 +21,25 @@ #include #include +/* On Intel CPUs we expect #GP(0) */ static void -handler(int sig, siginfo_t *sip, void *ctx) +sigsegv(int sig, siginfo_t *sip, void *ctx) +{ + printf("signo %d, code %d, errno %d\n", sip->si_signo, sip->si_code, + sip->si_errno); + if (sig != SIGSEGV) + errx(1, "expected SIGSEGV: %d", sig); + if (sip->si_code != SEGV_MAPERR) + errx(1, "expected SEGV_MAPERR: %d", sip->si_code); + if (sip->si_errno != 0) + errx(1, "expected errno 0: %d", sip->si_errno); + + exit(0); +} + +/* On AMD CPUs we expect #UD */ +static void +sigill(int sig, siginfo_t *sip, void *ctx) { printf("signo %d, code %d, errno %d\n", sip->si_signo, sip->si_code, sip->si_errno); @@ -30,10 +47,13 @@ handler(int sig, siginfo_t *sip, void *ctx) errx(1, "expected SIGILL: %d", sig); if (sip->si_code != ILL_PRVOPC) errx(1, "expected ILL_PRVOPC: %d", sip->si_code); + if (sip->si_errno != 0) + errx(1, "expected errno 0: %d", sip->si_errno); exit(0); } + __dead static void usage(void) { @@ -50,8 +70,11 @@ main(int argc, char **argv) usage(); memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = sigsegv; + if (sigaction(SIGSEGV, &sa, NULL) == -1) + err(2, "sigaction"); + sa.sa_sigaction = sigill; if (sigaction(SIGILL, &sa, NULL) == -1) err(2, "sigaction"); diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c index ac4e845a1aa..3c120817a2b 100644 --- a/sys/arch/amd64/amd64/identcpu.c +++ b/sys/arch/amd64/amd64/identcpu.c @@ -1016,6 +1016,9 @@ cpu_check_vmm_cap(struct cpu_info *ci) if (edx & AMD_SVM_DECODE_ASSIST_CAP) ci->ci_vmm_cap.vcc_svm.svm_decode_assist = 1; + + if (edx & AMD_SVM_NRIPS_CAP) + ci->ci_vmm_cap.vcc_svm.svm_nrip_save = 1; } /* diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c index ae2ec3c53b1..10bcc9faf72 100644 --- a/sys/arch/amd64/amd64/vmm_machdep.c +++ b/sys/arch/amd64/amd64/vmm_machdep.c @@ -88,6 +88,7 @@ int vcpu_init_vmx(struct vcpu *); int vcpu_init_svm(struct vcpu *, struct vm_create_params *); int vcpu_run_vmx(struct vcpu *, struct vm_run_params *); int vcpu_run_svm(struct vcpu *, struct vm_run_params *); +int vcpu_update_rip(struct vcpu *, uint64_t); void vcpu_deinit(struct vcpu *); void vcpu_deinit_svm(struct vcpu *); void vcpu_deinit_vmx(struct vcpu *); @@ -4301,8 +4302,14 @@ svm_handle_exit(struct vcpu *vcpu) vcpu->vc_gueststate.vg_rax == HVCALL_FORCED_ABORT) return (EINVAL); DPRINTF("SVM_VMEXIT_VMMCALL at cpl=%d\n", guest_cpl); - ret = vmm_inject_ud(vcpu); - update_rip = 0; + if (guest_cpl > 0) { + ret = vmm_inject_ud(vcpu); + update_rip = 0; + break; + } + vcpu->vc_gueststate.vg_rax = -1; + vcpu_update_rip(vcpu, 3); + update_rip = 1; break; default: DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__, @@ -4741,8 +4748,14 @@ vmx_handle_exit(struct vcpu *vcpu) vcpu->vc_gueststate.vg_rax == HVCALL_FORCED_ABORT) return (EINVAL); DPRINTF("VMX_EXIT_VMCALL at cpl=%d\n", guest_cpl); - ret = vmm_inject_ud(vcpu); - update_rip = 0; + if (guest_cpl > 0) { + ret = vmm_inject_gp(vcpu); + update_rip = 0; + break; + } + vcpu->vc_gueststate.vg_rax = -1; + vcpu_update_rip(vcpu, 3); + update_rip = 1; break; default: #ifdef VMM_DEBUG @@ -6896,6 +6909,40 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp) return (ret); } +/* + * vcpu_update_rip + * + * Updates RIP by size of the current instruction: + * + * - VMX: Add VMCS_INSTRUCTION_LENGTH. + * - SVM: If supported use the saved next RIP; otherwise use proivded + * alternative instruction size. + */ +int +vcpu_update_rip(struct vcpu *vcpu, uint64_t altsz) +{ + uint64_t insn_length; + struct vmcb *vmcb; + struct cpu_info *ci = curcpu(); + + if (vmm_softc->mode == VMM_MODE_EPT) { + if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) { + DPRINTF("%s: can't obtain instruction length\n", + __func__); + return (EINVAL); + } + vcpu->vc_gueststate.vg_rip += insn_length; + } else { + if (ci->ci_vmm_cap.vcc_svm.svm_nrip_save) { + vmcb = (struct vmcb *)vcpu->vc_control_va; + vcpu->vc_gueststate.vg_rip = vmcb->v_nrip; + } else + vcpu->vc_gueststate.vg_rip += altsz; + } + + return (0); +} + /* * vmm_alloc_vpid_vcpu * diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h index c0df3670517..1a2c2148ccd 100644 --- a/sys/arch/amd64/include/cpu.h +++ b/sys/arch/amd64/include/cpu.h @@ -86,6 +86,7 @@ struct svm { uint8_t svm_flush_by_asid; uint8_t svm_vmcb_clean; uint8_t svm_decode_assist; + uint8_t svm_nrip_save; }; union vmm_cpu_cap { diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h index cf86f5a14e3..4fd2046ffcc 100644 --- a/sys/arch/amd64/include/specialreg.h +++ b/sys/arch/amd64/include/specialreg.h @@ -1448,6 +1448,7 @@ #define MSR_AMD_VM_HSAVE_PA 0xc0010117 #define CPUID_AMD_SVM_CAP 0x8000000A #define AMD_SVM_NESTED_PAGING_CAP (1 << 0) +#define AMD_SVM_NRIPS_CAP (1 << 3) #define AMD_SVM_VMCB_CLEAN_CAP (1 << 5) #define AMD_SVM_FLUSH_BY_ASID_CAP (1 << 6) #define AMD_SVM_DECODE_ASSIST_CAP (1 << 7)