Index | Thread | Search

From:
hshoexer <hshoexer@yerbouti.franken.de>
Subject:
Re: vmm(4): return error for VMCALL instead of injecting #UD
To:
tech@openbsd.org
Date:
Tue, 17 Mar 2026 14:52:18 +0100

Download raw body.

Thread
Hi,

On Mon, Mar 16, 2026 at 04:51:21PM -0400, Dave Voutila wrote:
> ...
> I think VMX will give us a valid instruction length we can
> retrieve and trust.
> 
> For SVM, I think nRIP is applicable here and should be accurate? I think
> we're at a point where we might want to enforce nRIP support if we don't
> already. (I think this has come up before.)

like the diff below?  If we want to go that direction, VMCALL and
VMMCALL handling could be refactored in a single function.  And
there are some more places in the SVM specific code where we could
use nRIP, too.

@ Miguel:  Using the metal-amd64.iso (talos linux?) I can reproduce
that issue on both AMD/SVM and Intel/VMX vmm.  And skipping the vm
call as you suggest resolves that issue (ie. ptp_kvm fails gracefully).

However, on AMD I see later on MMIO related exits that vmd can not
handle yet.  And the vm is terminated.  I have not investigated
this yet, but I guess for AMD/SVM vmm there is more to be done to
get that talos linux running.

Do you test on Intel or AMD machines?

Take care,
HJ.

--------------------------------------------------------------------------
commit 29facd187fd38580ffc89ecf51b38b1640d57cbe
Author: Hans-Joerg Hoexer <hshoexer@genua.de>
Date:   Tue Mar 17 11:40:52 2026 +0100

    vmm(4):  Skip VMCALL/VMMCALL issued at CPL 0; otherwise abort with signal
    
    On Intel we have to use #GP(0), on AMD #UD.  Adjust regression test.

diff --git a/regress/sys/arch/amd64/vmcall/vmcall.c b/regress/sys/arch/amd64/vmcall/vmcall.c
index 74ede2da21b..eb8b81cb243 100644
--- a/regress/sys/arch/amd64/vmcall/vmcall.c
+++ b/regress/sys/arch/amd64/vmcall/vmcall.c
@@ -21,8 +21,25 @@
 #include <stdlib.h>
 #include <string.h>
 
+/* On Intel CPUs we expect #GP(0) */
 static void
-handler(int sig, siginfo_t *sip, void *ctx)
+sigsegv(int sig, siginfo_t *sip, void *ctx)
+{
+	printf("signo %d, code %d, errno %d\n", sip->si_signo, sip->si_code,
+	    sip->si_errno);
+	if (sig != SIGSEGV)
+		errx(1, "expected SIGSEGV: %d", sig);
+	if (sip->si_code != SEGV_MAPERR)
+		errx(1, "expected SEGV_MAPERR: %d", sip->si_code);
+	if (sip->si_errno != 0)
+		errx(1, "expected errno 0: %d", sip->si_errno);
+
+	exit(0);
+}
+
+/* On AMD CPUs we expect #UD */
+static void
+sigill(int sig, siginfo_t *sip, void *ctx)
 {
 	printf("signo %d, code %d, errno %d\n", sip->si_signo, sip->si_code,
 	    sip->si_errno);
@@ -30,10 +47,13 @@ handler(int sig, siginfo_t *sip, void *ctx)
 		errx(1, "expected SIGILL: %d", sig);
 	if (sip->si_code != ILL_PRVOPC)
 		errx(1, "expected ILL_PRVOPC: %d", sip->si_code);
+	if (sip->si_errno != 0)
+		errx(1, "expected errno 0: %d", sip->si_errno);
 
 	exit(0);
 }
 
+
 __dead static void
 usage(void)
 {
@@ -50,8 +70,11 @@ main(int argc, char **argv)
 		usage();
 
 	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
 	sa.sa_flags = SA_SIGINFO;
+	sa.sa_sigaction = sigsegv;
+	if (sigaction(SIGSEGV, &sa, NULL) == -1)
+		err(2, "sigaction");
+	sa.sa_sigaction = sigill;
 	if (sigaction(SIGILL, &sa, NULL) == -1)
 		err(2, "sigaction");
 
diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c
index ac4e845a1aa..3c120817a2b 100644
--- a/sys/arch/amd64/amd64/identcpu.c
+++ b/sys/arch/amd64/amd64/identcpu.c
@@ -1016,6 +1016,9 @@ cpu_check_vmm_cap(struct cpu_info *ci)
 
 		if (edx & AMD_SVM_DECODE_ASSIST_CAP)
 			ci->ci_vmm_cap.vcc_svm.svm_decode_assist = 1;
+
+		if (edx & AMD_SVM_NRIPS_CAP)
+			ci->ci_vmm_cap.vcc_svm.svm_nrip_save = 1;
 	}
 
 	/*
diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c
index ae2ec3c53b1..10bcc9faf72 100644
--- a/sys/arch/amd64/amd64/vmm_machdep.c
+++ b/sys/arch/amd64/amd64/vmm_machdep.c
@@ -88,6 +88,7 @@ int vcpu_init_vmx(struct vcpu *);
 int vcpu_init_svm(struct vcpu *, struct vm_create_params *);
 int vcpu_run_vmx(struct vcpu *, struct vm_run_params *);
 int vcpu_run_svm(struct vcpu *, struct vm_run_params *);
+int vcpu_update_rip(struct vcpu *, uint64_t);
 void vcpu_deinit(struct vcpu *);
 void vcpu_deinit_svm(struct vcpu *);
 void vcpu_deinit_vmx(struct vcpu *);
@@ -4301,8 +4302,14 @@ svm_handle_exit(struct vcpu *vcpu)
 		    vcpu->vc_gueststate.vg_rax == HVCALL_FORCED_ABORT)
 			return (EINVAL);
 		DPRINTF("SVM_VMEXIT_VMMCALL at cpl=%d\n", guest_cpl);
-		ret = vmm_inject_ud(vcpu);
-		update_rip = 0;
+		if (guest_cpl > 0) {
+			ret = vmm_inject_ud(vcpu);
+			update_rip = 0;
+			break;
+		}
+		vcpu->vc_gueststate.vg_rax = -1;
+		vcpu_update_rip(vcpu, 3);
+		update_rip = 1;
 		break;
 	default:
 		DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
@@ -4741,8 +4748,14 @@ vmx_handle_exit(struct vcpu *vcpu)
 		    vcpu->vc_gueststate.vg_rax == HVCALL_FORCED_ABORT)
 			return (EINVAL);
 		DPRINTF("VMX_EXIT_VMCALL at cpl=%d\n", guest_cpl);
-		ret = vmm_inject_ud(vcpu);
-		update_rip = 0;
+		if (guest_cpl > 0) {
+			ret = vmm_inject_gp(vcpu);
+			update_rip = 0;
+			break;
+		}
+		vcpu->vc_gueststate.vg_rax = -1;
+		vcpu_update_rip(vcpu, 3);
+		update_rip = 1;
 		break;
 	default:
 #ifdef VMM_DEBUG
@@ -6896,6 +6909,40 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
 	return (ret);
 }
 
+/*
+ * vcpu_update_rip
+ *
+ * Updates RIP by size of the current instruction:
+ *
+ * - VMX: Add VMCS_INSTRUCTION_LENGTH.
+ * - SVM: If supported use the saved next RIP; otherwise use proivded
+ *   alternative instruction size.
+ */
+int
+vcpu_update_rip(struct vcpu *vcpu, uint64_t altsz)
+{
+	uint64_t insn_length;
+	struct vmcb *vmcb;
+	struct cpu_info *ci = curcpu();
+
+	if (vmm_softc->mode == VMM_MODE_EPT) {
+		if (vmread(VMCS_INSTRUCTION_LENGTH, &insn_length)) {
+			DPRINTF("%s: can't obtain instruction length\n",
+			    __func__);
+			return (EINVAL);
+		}
+		vcpu->vc_gueststate.vg_rip += insn_length;
+	} else {
+		if (ci->ci_vmm_cap.vcc_svm.svm_nrip_save) {
+			vmcb = (struct vmcb *)vcpu->vc_control_va;
+			vcpu->vc_gueststate.vg_rip = vmcb->v_nrip;
+		} else
+			vcpu->vc_gueststate.vg_rip += altsz;
+	}
+
+	return (0);
+}
+
 /*
  * vmm_alloc_vpid_vcpu
  *
diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
index c0df3670517..1a2c2148ccd 100644
--- a/sys/arch/amd64/include/cpu.h
+++ b/sys/arch/amd64/include/cpu.h
@@ -86,6 +86,7 @@ struct svm {
 	uint8_t		svm_flush_by_asid;
 	uint8_t		svm_vmcb_clean;
 	uint8_t		svm_decode_assist;
+	uint8_t		svm_nrip_save;
 };
 
 union vmm_cpu_cap {
diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h
index cf86f5a14e3..4fd2046ffcc 100644
--- a/sys/arch/amd64/include/specialreg.h
+++ b/sys/arch/amd64/include/specialreg.h
@@ -1448,6 +1448,7 @@
 #define MSR_AMD_VM_HSAVE_PA		0xc0010117
 #define CPUID_AMD_SVM_CAP		0x8000000A
 #define AMD_SVM_NESTED_PAGING_CAP	(1 << 0)
+#define AMD_SVM_NRIPS_CAP		(1 << 3)
 #define AMD_SVM_VMCB_CLEAN_CAP		(1 << 5)
 #define AMD_SVM_FLUSH_BY_ASID_CAP	(1 << 6)
 #define AMD_SVM_DECODE_ASSIST_CAP	(1 << 7)