From: hshoexer <hshoexer@yerbouti.franken.de>
Subject: Re: vmm(4): return error for VMCALL instead of injecting #UD
To: tech@openbsd.org
Cc: dv@sisu.io, mlarkin@nested.page, miguel@miguel.cc
Date: Tue, 16 Jun 2026 20:07:05 +0200

Hi,

On Mon, Apr 20, 2026 at 02:17:04PM +0200, hshoexer wrote:
> Hi,
> 
> On Tue, Apr 07, 2026 at 02:45:01PM +0200, hshoexer wrote:
> > Hi,
> > 
> > here is reworked version based on the original diff:  I've implemented
> > a function vcpu_get_insnlen() to get the instruction length for
> > both VMX and SVM based virtualization.  It could be used in several
> > places where we retrieve the instruction length.  For now, I only
> > use it for handling vmcall/vmmcall.
> > 
> > ok?
> 
> any comments on this approach?  If the use of nRIP is too much or
> doesn't fit other plans, let me know.  Using a fixed instruction
> size as originally proposed would for now solve the problem at hand.
> 
> fwiw: the biggest chunk of this diff is just the regression test.

any comments on this approach? If it doesn't make sense, just let
me know.

Take care,
HJ.


> Take care,
> HJ.
> 
> > -------------------------------------------------------------------------
> > commit a0d6d904ad05f0d494037a552fb138d26c33acfa
> > Author: Hans-Joerg Hoexer <hshoexer@genua.de>
> > Date:   Tue Mar 17 11:40:52 2026 +0100
> > 
> >     vmm(4):  Skip VMCALL/VMMCALL issued at CPL 0; otherwise abort with signal
> >     
> >     On Intel we have to use #GP(0), on AMD #UD.  Adjust regression test.
> > 
> > diff --git a/regress/sys/arch/amd64/vmcall/vmcall.c b/regress/sys/arch/amd64/vmcall/vmcall.c
> > index 74ede2da21b..eb8b81cb243 100644
> > --- a/regress/sys/arch/amd64/vmcall/vmcall.c
> > +++ b/regress/sys/arch/amd64/vmcall/vmcall.c
> > @@ -21,8 +21,25 @@
> >  #include <stdlib.h>
> >  #include <string.h>
> >  
> > +/* On Intel CPUs we expect #GP(0) */
> >  static void
> > -handler(int sig, siginfo_t *sip, void *ctx)
> > +sigsegv(int sig, siginfo_t *sip, void *ctx)
> > +{
> > +	printf("signo %d, code %d, errno %d\n", sip->si_signo, sip->si_code,
> > +	    sip->si_errno);
> > +	if (sig != SIGSEGV)
> > +		errx(1, "expected SIGSEGV: %d", sig);
> > +	if (sip->si_code != SEGV_MAPERR)
> > +		errx(1, "expected SEGV_MAPERR: %d", sip->si_code);
> > +	if (sip->si_errno != 0)
> > +		errx(1, "expected errno 0: %d", sip->si_errno);
> > +
> > +	exit(0);
> > +}
> > +
> > +/* On AMD CPUs we expect #UD */
> > +static void
> > +sigill(int sig, siginfo_t *sip, void *ctx)
> >  {
> >  	printf("signo %d, code %d, errno %d\n", sip->si_signo, sip->si_code,
> >  	    sip->si_errno);
> > @@ -30,10 +47,13 @@ handler(int sig, siginfo_t *sip, void *ctx)
> >  		errx(1, "expected SIGILL: %d", sig);
> >  	if (sip->si_code != ILL_PRVOPC)
> >  		errx(1, "expected ILL_PRVOPC: %d", sip->si_code);
> > +	if (sip->si_errno != 0)
> > +		errx(1, "expected errno 0: %d", sip->si_errno);
> >  
> >  	exit(0);
> >  }
> >  
> > +
> >  __dead static void
> >  usage(void)
> >  {
> > @@ -50,8 +70,11 @@ main(int argc, char **argv)
> >  		usage();
> >  
> >  	memset(&sa, 0, sizeof(sa));
> > -	sa.sa_sigaction = handler;
> >  	sa.sa_flags = SA_SIGINFO;
> > +	sa.sa_sigaction = sigsegv;
> > +	if (sigaction(SIGSEGV, &sa, NULL) == -1)
> > +		err(2, "sigaction");
> > +	sa.sa_sigaction = sigill;
> >  	if (sigaction(SIGILL, &sa, NULL) == -1)
> >  		err(2, "sigaction");
> >  
> > diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c
> > index 51e8344d573..9b185e8c635 100644
> > --- a/sys/arch/amd64/amd64/identcpu.c
> > +++ b/sys/arch/amd64/amd64/identcpu.c
> > @@ -1048,6 +1048,9 @@ cpu_check_vmm_cap(struct cpu_info *ci)
> >  
> >  		if (edx & AMD_SVM_DECODE_ASSIST_CAP)
> >  			ci->ci_vmm_cap.vcc_svm.svm_decode_assist = 1;
> > +
> > +		if (edx & AMD_SVM_NRIPS_CAP)
> > +			ci->ci_vmm_cap.vcc_svm.svm_nrip_save = 1;
> >  	}
> >  
> >  	/*
> > diff --git a/sys/arch/amd64/amd64/vmm_machdep.c b/sys/arch/amd64/amd64/vmm_machdep.c
> > index ae2ec3c53b1..4346bc581f8 100644
> > --- a/sys/arch/amd64/amd64/vmm_machdep.c
> > +++ b/sys/arch/amd64/amd64/vmm_machdep.c
> > @@ -88,6 +88,7 @@ int vcpu_init_vmx(struct vcpu *);
> >  int vcpu_init_svm(struct vcpu *, struct vm_create_params *);
> >  int vcpu_run_vmx(struct vcpu *, struct vm_run_params *);
> >  int vcpu_run_svm(struct vcpu *, struct vm_run_params *);
> > +int vcpu_get_insnlen(struct vcpu *, uint64_t *);
> >  void vcpu_deinit(struct vcpu *);
> >  void vcpu_deinit_svm(struct vcpu *);
> >  void vcpu_deinit_vmx(struct vcpu *);
> > @@ -4213,7 +4214,7 @@ vmx_get_exit_info(uint64_t *rip, uint64_t *exit_reason)
> >  int
> >  svm_handle_exit(struct vcpu *vcpu)
> >  {
> > -	uint64_t exit_reason, rflags;
> > +	uint64_t exit_reason, rflags, insn_length;
> >  	int update_rip, ret = 0, guest_cpl;
> >  	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
> >  
> > @@ -4301,8 +4302,16 @@ svm_handle_exit(struct vcpu *vcpu)
> >  		    vcpu->vc_gueststate.vg_rax == HVCALL_FORCED_ABORT)
> >  			return (EINVAL);
> >  		DPRINTF("SVM_VMEXIT_VMMCALL at cpl=%d\n", guest_cpl);
> > -		ret = vmm_inject_ud(vcpu);
> > -		update_rip = 0;
> > +		if (guest_cpl > 0) {
> > +			ret = vmm_inject_ud(vcpu);
> > +			update_rip = 0;
> > +			break;
> > +		}
> > +		if (vcpu_get_insnlen(vcpu, &insn_length))
> > +			return (EINVAL);
> > +		vcpu->vc_gueststate.vg_rax = -1;
> > +		vcpu->vc_gueststate.vg_rip += insn_length;
> > +		update_rip = 1;
> >  		break;
> >  	default:
> >  		DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
> > @@ -4653,7 +4662,7 @@ svm_get_iflag(struct vcpu *vcpu, uint64_t rflags)
> >  int
> >  vmx_handle_exit(struct vcpu *vcpu)
> >  {
> > -	uint64_t exit_reason, rflags, istate;
> > +	uint64_t exit_reason, rflags, istate, insn_length;
> >  	int update_rip, ret = 0, guest_cpl;
> >  
> >  	update_rip = 0;
> > @@ -4741,8 +4750,16 @@ vmx_handle_exit(struct vcpu *vcpu)
> >  		    vcpu->vc_gueststate.vg_rax == HVCALL_FORCED_ABORT)
> >  			return (EINVAL);
> >  		DPRINTF("VMX_EXIT_VMCALL at cpl=%d\n", guest_cpl);
> > -		ret = vmm_inject_ud(vcpu);
> > -		update_rip = 0;
> > +		if (guest_cpl > 0) {
> > +			ret = vmm_inject_gp(vcpu);
> > +			update_rip = 0;
> > +			break;
> > +		}
> > +		if (vcpu_get_insnlen(vcpu, &insn_length))
> > +			return (EINVAL);
> > +		vcpu->vc_gueststate.vg_rax = -1;
> > +		vcpu->vc_gueststate.vg_rip += insn_length;
> > +		update_rip = 1;
> >  		break;
> >  	default:
> >  #ifdef VMM_DEBUG
> > @@ -6896,6 +6913,41 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
> >  	return (ret);
> >  }
> >  
> > +/*
> > + * vcpu_get_insnlen
> > + */
> > +int
> > +vcpu_get_insnlen(struct vcpu *vcpu, uint64_t *insnlen)
> > +{
> > +	struct vmcb *vmcb;
> > +	struct cpu_info *ci = curcpu();
> > +
> > +	KASSERT(insnlen);
> > +
> > +	if (vmm_softc->mode == VMM_MODE_EPT) {
> > +		if (vmread(VMCS_INSTRUCTION_LENGTH, insnlen))
> > +			return (EINVAL);
> > +	} else {
> > +		if (!ci->ci_vmm_cap.vcc_svm.svm_nrip_save)
> > +			return (EINVAL);
> > +		vmcb = (struct vmcb *)vcpu->vc_control_va;
> > +		/*
> > +		 * According to section 15.7.1 of APMv2 the nRIP
> > +		 * is saved for instruction intercepts, MSR and IOIO
> > +		 * intercepts and exceptions caused by INT3, INTO and
> > +		 * BOUND.  Otherwise nRIP is set to 0.
> > +		 */
> > +		if (vmcb->v_nrip > 0) {
> > +			if (vmcb->v_nrip <= vcpu->vc_gueststate.vg_rip)
> > +				return (EINVAL);
> > +			*insnlen = vmcb->v_nrip - vcpu->vc_gueststate.vg_rip;
> > +		} else
> > +			*insnlen = 0;
> > +	}
> > +
> > +	return (0);
> > +}
> > +
> >  /*
> >   * vmm_alloc_vpid_vcpu
> >   *
> > diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
> > index 15423f5d5f2..9120ef2bde6 100644
> > --- a/sys/arch/amd64/include/cpu.h
> > +++ b/sys/arch/amd64/include/cpu.h
> > @@ -86,6 +86,7 @@ struct svm {
> >  	uint8_t		svm_flush_by_asid;
> >  	uint8_t		svm_vmcb_clean;
> >  	uint8_t		svm_decode_assist;
> > +	uint8_t		svm_nrip_save;
> >  };
> >  
> >  union vmm_cpu_cap {
> > diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h
> > index cf86f5a14e3..4fd2046ffcc 100644
> > --- a/sys/arch/amd64/include/specialreg.h
> > +++ b/sys/arch/amd64/include/specialreg.h
> > @@ -1448,6 +1448,7 @@
> >  #define MSR_AMD_VM_HSAVE_PA		0xc0010117
> >  #define CPUID_AMD_SVM_CAP		0x8000000A
> >  #define AMD_SVM_NESTED_PAGING_CAP	(1 << 0)
> > +#define AMD_SVM_NRIPS_CAP		(1 << 3)
> >  #define AMD_SVM_VMCB_CLEAN_CAP		(1 << 5)
> >  #define AMD_SVM_FLUSH_BY_ASID_CAP	(1 << 6)
> >  #define AMD_SVM_DECODE_ASSIST_CAP	(1 << 7)
> > 
>