Index | Thread | Search

From:
Dave Voutila <dv@sisu.io>
Subject:
add exception injections to vmm/vmd
To:
tech@openbsd.org
Date:
Wed, 03 Apr 2024 09:02:47 -0400

Download raw body.

Thread
  • Dave Voutila:

    add exception injections to vmm/vmd

We need this to land my work on emulating string/rep-based io and mmio
emulation (the long road to smp) since we need a way to inject page
faults, etc. during emulation. Currently, there's no way to do that from
userland.

This diff combines interrupt and exception inject into a single object,
which fits the api provided by AMD and Intel. It's a similar pattern
used by other hypervisors like KVM and bhyve, etc.

No change should be noticeable from a guest perspective as this adds in
the api surface and updates vmd(8) to use it.

A regress change is provided (see end of diff) that demonstrates and
tests exception injection and interrupt injection while in real mode and
using an IVT.

mlarkin@ is afk for awhile, so looking for some testers & reviewers so I
can then get my INS/OUTS diff distributed. (If you test, make sure to
copy or symlink vmmvar.h into /usr/include/machine and rebuild vmd.)

notes for reviewers:
 - this cleans up some bastardization of using 0xff as an "unset" irq
   value so the vmm and vmd code should express intent more directly
 - it also smooths out the issues with how we are mixing 0xff and 0xffff
   when dealing with interupt vectors
 - this doesn't emulate all possible injections, like NMI and some
   nuances of privileged software exceptions like #BP. vmd doesn't use
   them at this point, nor do we need them for ins/outs, mmio, etc.

ok?

diff refs/heads/master refs/heads/vmm-inject-exceptions
commit - 49012907d58cccd872ac31bdbef8f4fe0bec0653
commit + 20fdfab0cff724efaea577fbcf7e6a5efb897714
blob - 55c775c65de87d2bccb0fbb7e41f7ce7c4ea15d4
blob + a83f35bffc75829b000c85118172d3398a14c079
--- sys/arch/amd64/amd64/vmm_machdep.c
+++ sys/arch/amd64/amd64/vmm_machdep.c
@@ -3694,6 +3694,10 @@ vm_run(struct vm_run_params *vrp)
 		}
 	}

+	vcpu->vc_inject.vie_type = vrp->vrp_inject.vie_type;
+	vcpu->vc_inject.vie_vector = vrp->vrp_inject.vie_vector;
+	vcpu->vc_inject.vie_errorcode = vrp->vrp_inject.vie_errorcode;
+
 	WRITE_ONCE(vcpu->vc_curcpu, curcpu());
 	/* Run the VCPU specified in vrp */
 	if (vcpu->vc_virt_mode == VMM_MODE_EPT) {
@@ -3966,8 +3970,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *
 	struct schedstate_percpu *spc;
 	struct vmx_msr_store *msr_store;
 	struct vmx_invvpid_descriptor vid;
-	uint64_t eii, procbased, int_st;
-	uint16_t irq;
+	uint64_t cr0, eii, procbased, int_st;
 	u_long s;

 	rw_assert_wrlock(&vcpu->vc_lock);
@@ -3983,8 +3986,6 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *
 	 * needs to be fixed up depends on what vmd populated in the
 	 * exit data structure.
 	 */
-	irq = vrp->vrp_irq;
-
 	if (vrp->vrp_intr_pending)
 		vcpu->vc_intr = 1;
 	else
@@ -4062,7 +4063,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *

 	/* Handle vmd(8) injected interrupts */
 	/* Is there an interrupt pending injection? */
-	if (irq != 0xFFFF) {
+	if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR) {
 		if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST, &int_st)) {
 			printf("%s: can't get interruptibility state\n",
 			    __func__);
@@ -4071,7 +4072,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *

 		/* Interruptibility state 0x3 covers NMIs and STI */
 		if (!(int_st & 0x3) && vcpu->vc_irqready) {
-			eii = (irq & 0xFF);
+			eii = vcpu->vc_inject.vie_vector;
 			eii |= (1ULL << 31);	/* Valid */
 			eii |= (0ULL << 8);	/* Hardware Interrupt */
 			if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
@@ -4080,7 +4081,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *
 				return (EINVAL);
 			}

-			irq = 0xFFFF;
+			vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
 		}
 	} else if (!vcpu->vc_intr) {
 		/*
@@ -4159,38 +4160,66 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *
 		}

 		/* Inject event if present */
-		if (vcpu->vc_event != 0) {
-			eii = (vcpu->vc_event & 0xFF);
+		if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) {
+			eii = vcpu->vc_inject.vie_vector;
 			eii |= (1ULL << 31);	/* Valid */

-			/* Set the "Send error code" flag for certain vectors */
-			switch (vcpu->vc_event & 0xFF) {
-				case VMM_EX_DF:
-				case VMM_EX_TS:
-				case VMM_EX_NP:
-				case VMM_EX_SS:
-				case VMM_EX_GP:
-				case VMM_EX_PF:
-				case VMM_EX_AC:
-					eii |= (1ULL << 11);
-			}
+			switch (vcpu->vc_inject.vie_vector) {
+			case VMM_EX_BP:
+			case VMM_EX_OF:
+				/* Software Exceptions */
+				eii |= (4ULL << 8);
+				break;
+			case VMM_EX_DF:
+			case VMM_EX_TS:
+			case VMM_EX_NP:
+			case VMM_EX_SS:
+			case VMM_EX_GP:
+			case VMM_EX_PF:
+			case VMM_EX_AC:
+				/* Hardware Exceptions */
+				eii |= (3ULL << 8);

-			eii |= (3ULL << 8);	/* Hardware Exception */
+				cr0 = 0;
+				if (vmread(VMCS_GUEST_IA32_CR0, &cr0)) {
+					printf("%s: vmread(VMCS_GUEST_IA32_CR0)"
+					    "\n", __func__);
+					ret = EINVAL;
+					break;
+				}
+
+				/* Don't set error codes if in real mode. */
+				if (ret == EINVAL || !(cr0 & CR0_PE))
+					break;
+				eii |= (1ULL << 11);
+
+				/* Enforce a 0 error code for #AC. */
+				if (vcpu->vc_inject.vie_vector == VMM_EX_AC)
+					vcpu->vc_inject.vie_errorcode = 0;
+				/*
+				 * XXX: Intel SDM says if IA32_VMX_BASIC[56] is
+				 * set, error codes can be injected for hw
+				 * exceptions with or without error code,
+				 * regardless of vector. See Vol 3D. A1. Ignore
+				 * this capability for now.
+				 */
+				if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE,
+				    vcpu->vc_inject.vie_errorcode)) {
+					printf("%s: can't write error code to "
+					    "guest\n", __func__);
+					ret = EINVAL;
+				}
+			} /* switch */
+			if (ret == EINVAL)
+				break;
+
 			if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO, eii)) {
 				printf("%s: can't vector event to guest\n",
 				    __func__);
 				ret = EINVAL;
 				break;
 			}
-
-			if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE, 0)) {
-				printf("%s: can't write error code to guest\n",
-				    __func__);
-				ret = EINVAL;
-				break;
-			}
-
-			vcpu->vc_event = 0;
+			vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
 		}

 		if (vcpu->vc_vmx_vpid_enabled) {
@@ -4771,7 +4800,8 @@ vmm_inject_gp(struct vcpu *vcpu)
 {
 	DPRINTF("%s: injecting #GP at guest %%rip 0x%llx\n", __func__,
 	    vcpu->vc_gueststate.vg_rip);
-	vcpu->vc_event = VMM_EX_GP;
+	vcpu->vc_inject.vie_vector = VMM_EX_GP;
+	vcpu->vc_inject.vie_type = VCPU_INJECT_EX;

 	return (0);
 }
@@ -4792,7 +4822,8 @@ vmm_inject_ud(struct vcpu *vcpu)
 {
 	DPRINTF("%s: injecting #UD at guest %%rip 0x%llx\n", __func__,
 	    vcpu->vc_gueststate.vg_rip);
-	vcpu->vc_event = VMM_EX_UD;
+	vcpu->vc_inject.vie_vector = VMM_EX_UD;
+	vcpu->vc_inject.vie_type = VCPU_INJECT_EX;

 	return (0);
 }
@@ -4813,7 +4844,8 @@ vmm_inject_db(struct vcpu *vcpu)
 {
 	DPRINTF("%s: injecting #DB at guest %%rip 0x%llx\n", __func__,
 	    vcpu->vc_gueststate.vg_rip);
-	vcpu->vc_event = VMM_EX_DB;
+	vcpu->vc_inject.vie_vector = VMM_EX_DB;
+	vcpu->vc_inject.vie_type = VCPU_INJECT_EX;

 	return (0);
 }
@@ -6463,11 +6495,8 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *
 	struct cpu_info *ci = NULL;
 	uint64_t exit_reason;
 	struct schedstate_percpu *spc;
-	uint16_t irq;
 	struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;

-	irq = vrp->vrp_irq;
-
 	if (vrp->vrp_intr_pending)
 		vcpu->vc_intr = 1;
 	else
@@ -6541,30 +6570,58 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *

 		/* Handle vmd(8) injected interrupts */
 		/* Is there an interrupt pending injection? */
-		if (irq != 0xFFFF && vcpu->vc_irqready) {
-			vmcb->v_eventinj = (irq & 0xFF) | (1U << 31);
-			irq = 0xFFFF;
+		if (vcpu->vc_inject.vie_type == VCPU_INJECT_INTR &&
+		    vcpu->vc_irqready) {
+			vmcb->v_eventinj = vcpu->vc_inject.vie_vector |
+			    (1U << 31);
+			vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
 		}

 		/* Inject event if present */
-		if (vcpu->vc_event != 0) {
-			DPRINTF("%s: inject event %d\n", __func__,
-			    vcpu->vc_event);
-			vmcb->v_eventinj = 0;
+		if (vcpu->vc_inject.vie_type == VCPU_INJECT_EX) {
+			vmcb->v_eventinj = vcpu->vc_inject.vie_vector;
+
 			/* Set the "Event Valid" flag for certain vectors */
-			switch (vcpu->vc_event & 0xFF) {
-				case VMM_EX_DF:
-				case VMM_EX_TS:
-				case VMM_EX_NP:
-				case VMM_EX_SS:
-				case VMM_EX_GP:
-				case VMM_EX_PF:
-				case VMM_EX_AC:
+			switch (vcpu->vc_inject.vie_vector) {
+			case VMM_EX_BP:
+			case VMM_EX_OF:
+			case VMM_EX_DB:
+				/*
+				 * Software exception.
+				 * XXX check nRIP support.
+				 */
+				vmcb->v_eventinj |= (4ULL << 8);
+				break;
+			case VMM_EX_AC:
+				vcpu->vc_inject.vie_errorcode = 0;
+				/* fallthrough */
+			case VMM_EX_DF:
+			case VMM_EX_TS:
+			case VMM_EX_NP:
+			case VMM_EX_SS:
+			case VMM_EX_GP:
+			case VMM_EX_PF:
+				/* Hardware exception. */
+				vmcb->v_eventinj |= (3ULL << 8);
+
+				if (vmcb->v_cr0 & CR0_PE) {
+					/* Error code valid. */
 					vmcb->v_eventinj |= (1ULL << 11);
-			}
-			vmcb->v_eventinj |= (vcpu->vc_event) | (1U << 31);
-			vmcb->v_eventinj |= (3ULL << 8); /* Exception */
-			vcpu->vc_event = 0;
+					vmcb->v_eventinj |= (uint64_t)
+					    vcpu->vc_inject.vie_errorcode << 32;
+				}
+				break;
+			default:
+				printf("%s: unsupported exception vector %u\n",
+				    __func__, vcpu->vc_inject.vie_vector);
+				ret = EINVAL;
+			} /* switch */
+			if (ret == EINVAL)
+				break;
+
+			/* Event is valid. */
+			vmcb->v_eventinj |= (1U << 31);
+			vcpu->vc_inject.vie_type = VCPU_INJECT_NONE;
 		}

 		TRACEPOINT(vmm, guest_enter, vcpu, vrp);
blob - 82aa105d39551e0754722483bd1ae677d0c239c7
blob + aba035e9525f5fb2db9c836b155a6160e116c047
--- sys/arch/amd64/include/vmmvar.h
+++ sys/arch/amd64/include/vmmvar.h
@@ -340,6 +340,7 @@ struct vm_exit_inout {
 	uint32_t		vei_data;	/* data */
 	uint8_t			vei_insn_len;	/* Count of instruction bytes */
 };
+
 /*
  *  vm_exit_eptviolation	: describes an EPT VIOLATION exit
  */
@@ -353,6 +354,19 @@ struct vm_exit_eptviolation {
 };

 /*
+ * struct vcpu_inject_event	: describes an exception or interrupt to inject.
+ */
+struct vcpu_inject_event {
+	uint8_t		vie_vector;	/* Exception or interrupt vector. */
+	uint32_t	vie_errorcode;	/* Optional error code. */
+	uint8_t		vie_type;
+#define VCPU_INJECT_NONE	0
+#define VCPU_INJECT_INTR	1	/* External hardware interrupt. */
+#define VCPU_INJECT_EX		2	/* HW or SW Exception */
+#define VCPU_INJECT_NMI		3	/* Non-maskable Interrupt */
+};
+
+/*
  * struct vcpu_segment_info
  *
  * Describes a segment + selector set, used in constructing the initial vcpu
@@ -465,7 +479,7 @@ struct vm_run_params {
 	uint32_t	vrp_vm_id;
 	uint32_t	vrp_vcpu_id;
 	uint8_t		vrp_continue;		/* Continuing from an exit */
-	uint16_t	vrp_irq;		/* IRQ to inject */
+	struct vcpu_inject_event	vrp_inject;
 	uint8_t		vrp_intr_pending;	/* Additional intrs pending? */

 	/* Input/output parameter to VMM_IOC_RUN */
@@ -873,9 +887,8 @@ struct vcpu {
 	uint64_t vc_h_xcr0;			/* [v] */

 	struct vcpu_gueststate vc_gueststate;	/* [v] */
+	struct vcpu_inject_event vc_inject;	/* [v] */

-	uint8_t vc_event;
-
 	uint32_t vc_pvclock_version;		/* [v] */
 	paddr_t vc_pvclock_system_gpa;		/* [v] */
 	uint32_t vc_pvclock_system_tsc_mul;	/* [v] */
blob - 86d57693474224dc5c4a1294a648e44b26212057
blob + 6913ddb471a2ed2d7fd78a81656859416560f95b
--- usr.sbin/vmd/vm.c
+++ usr.sbin/vmd/vm.c
@@ -1536,7 +1536,6 @@ vcpu_run_loop(void *arg)
 {
 	struct vm_run_params *vrp = (struct vm_run_params *)arg;
 	intptr_t ret = 0;
-	int irq;
 	uint32_t n;

 	vrp->vrp_continue = 0;
@@ -1611,10 +1610,10 @@ vcpu_run_loop(void *arg)
 		}

 		if (vrp->vrp_irqready && i8259_is_pending()) {
-			irq = i8259_ack();
-			vrp->vrp_irq = irq;
+			vrp->vrp_inject.vie_vector = i8259_ack();
+			vrp->vrp_inject.vie_type = VCPU_INJECT_INTR;
 		} else
-			vrp->vrp_irq = 0xFFFF;
+			vrp->vrp_inject.vie_type = VCPU_INJECT_NONE;

 		/* Still more interrupts pending? */
 		vrp->vrp_intr_pending = i8259_is_pending();
blob - 84bd9492a01da2191ee8a6e7ed4a58d00e68d46c
blob + c6ecdae7b2cb3b80a2716092bae484bca846cda2
--- regress/sys/arch/amd64/vmm/vcpu.c
+++ regress/sys/arch/amd64/vmm/vcpu.c
@@ -34,45 +34,54 @@
 #include <unistd.h>

 #define KIB		1024
-#define MIB		(1 << 20)
+#define MIB		(1UL << 20)
+#define GIB		(1024 * MIB)
 #define VMM_NODE	"/dev/vmm"

+#define LOW_MEM		0
+#define UPPER_MEM	1
+
 #define PCKBC_AUX	0x61
+#define PCJR_DISKCTRL	0xF0

 const char 		*VM_NAME = "regress";

+const uint8_t PUSHW_DX[] = { 0x66, 0x52 };		 // pushw %dx
+const uint8_t INS[] = { 0x6C };				 // ins es:[di],dx
+const uint8_t IN_PCJR[] = { 0xE4, 0xF0 };		 // in 0xF0
+
 /* Originally from vmd(8)'s vm.c */
 const struct vcpu_reg_state vcpu_init_flat16 = {
 	.vrs_gprs[VCPU_REGS_RFLAGS] = 0x2,
 	.vrs_gprs[VCPU_REGS_RIP] = 0xFFF0,
-	.vrs_gprs[VCPU_REGS_RSP] = 0x0,
+	.vrs_gprs[VCPU_REGS_RDX] = PCKBC_AUX,	/* Port used by INS */
+	.vrs_gprs[VCPU_REGS_RSP] =  0x800,	/* Set our stack in low mem. */
+	//.vrs_gprs[VCPU_REGS_RBP] =  0x800,	/* Set our stack in low mem. */
 	.vrs_crs[VCPU_REGS_CR0] = 0x60000010,
-	.vrs_crs[VCPU_REGS_CR3] = 0,
-	.vrs_sregs[VCPU_REGS_CS] = { 0xF000, 0xFFFF, 0x809F, 0xF0000},
-	.vrs_sregs[VCPU_REGS_DS] = { 0x0, 0xFFFF, 0x8093, 0x0},
-	.vrs_sregs[VCPU_REGS_ES] = { 0x0, 0xFFFF, 0x8093, 0x0},
-	.vrs_sregs[VCPU_REGS_FS] = { 0x0, 0xFFFF, 0x8093, 0x0},
-	.vrs_sregs[VCPU_REGS_GS] = { 0x0, 0xFFFF, 0x8093, 0x0},
-	.vrs_sregs[VCPU_REGS_SS] = { 0x0, 0xFFFF, 0x8093, 0x0},
-	.vrs_gdtr = { 0x0, 0xFFFF, 0x0, 0x0},
-	.vrs_idtr = { 0x0, 0xFFFF, 0x0, 0x0},
+	.vrs_sregs[VCPU_REGS_CS] = { 0xF000, 0xFFFF, 0x0093, 0xFFFF0000},
+	.vrs_sregs[VCPU_REGS_DS] = { 0x0, 0xFFFF, 0x0093, 0x0},
+	.vrs_sregs[VCPU_REGS_ES] = { 0x0, 0xFFFF, 0x0093, 0x0},
+	.vrs_sregs[VCPU_REGS_FS] = { 0x0, 0xFFFF, 0x0093, 0x0},
+	.vrs_sregs[VCPU_REGS_GS] = { 0x0, 0xFFFF, 0x0093, 0x0},
+	.vrs_sregs[VCPU_REGS_SS] = { 0x0, 0xFFFF, 0x0093, 0x0},
+	.vrs_gdtr = { 0x0, 0xFFFF, 0x0082, 0x0},
+	.vrs_idtr = { 0x0, 0xFFFF, 0x0082, 0x0},
 	.vrs_sregs[VCPU_REGS_LDTR] = { 0x0, 0xFFFF, 0x0082, 0x0},
 	.vrs_sregs[VCPU_REGS_TR] = { 0x0, 0xFFFF, 0x008B, 0x0},
-	.vrs_msrs[VCPU_REGS_EFER] = 0ULL,
-	.vrs_drs[VCPU_REGS_DR0] = 0x0,
-	.vrs_drs[VCPU_REGS_DR1] = 0x0,
-	.vrs_drs[VCPU_REGS_DR2] = 0x0,
-	.vrs_drs[VCPU_REGS_DR3] = 0x0,
 	.vrs_drs[VCPU_REGS_DR6] = 0xFFFF0FF0,
 	.vrs_drs[VCPU_REGS_DR7] = 0x400,
-	.vrs_msrs[VCPU_REGS_STAR] = 0ULL,
-	.vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,
-	.vrs_msrs[VCPU_REGS_CSTAR] = 0ULL,
-	.vrs_msrs[VCPU_REGS_SFMASK] = 0ULL,
-	.vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL,
-	.vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87
+	.vrs_crs[VCPU_REGS_XCR0] = XFEATURE_X87,
 };

+struct intr_handler {
+	uint16_t	offset;
+	uint16_t	segment;
+};
+
+const struct intr_handler ivt[256] = {
+	[VMM_EX_GP] = { .segment = 0x0, .offset = 0x0B5D },
+};
+
 int
 main(int argc, char **argv)
 {
@@ -87,7 +96,8 @@ main(int argc, char **argv)

 	struct vm_mem_range		*vmr;
 	int				 fd, ret = 1;
-	size_t				 i, j;
+	size_t				 i;
+	off_t				 off, reset = 0xFFFFFFF0, stack = 0x800;
 	void				*p;

 	fd = open(VMM_NODE, O_RDWR);
@@ -95,7 +105,7 @@ main(int argc, char **argv)
 		err(1, "open %s", VMM_NODE);

 	/*
-	 * 1. Create our VM with 1 vcpu and 2 MiB of memory.
+	 * 1. Create our VM with 1 vcpu and 64 MiB of memory.
 	 */
 	memset(&vcp, 0, sizeof(vcp));
 	strlcpy(vcp.vcp_name, VM_NAME, sizeof(vcp.vcp_name));
@@ -103,34 +113,53 @@ main(int argc, char **argv)

 	/* Split into two ranges, similar to how vmd(8) might do it. */
 	vcp.vcp_nmemranges = 2;
-	vcp.vcp_memranges[0].vmr_gpa = 0x0;
-	vcp.vcp_memranges[0].vmr_size = 640 * KIB;
-	vcp.vcp_memranges[1].vmr_gpa = 640 * KIB;
-	vcp.vcp_memranges[1].vmr_size = (2 * MIB) - (640 * KIB);
+	vcp.vcp_memranges[LOW_MEM].vmr_gpa = 0x0;
+	vcp.vcp_memranges[LOW_MEM].vmr_size = 640 * KIB;
+	vcp.vcp_memranges[UPPER_MEM].vmr_size = (64 * MIB) - (640 * KIB);
+	vcp.vcp_memranges[UPPER_MEM].vmr_gpa = (4 * GIB)
+	    - vcp.vcp_memranges[UPPER_MEM].vmr_size;

-	/* Allocate memory. */
+	/* Allocate and Initialize our guest memory. */
 	for (i = 0; i < vcp.vcp_nmemranges; i++) {
 		vmr = &vcp.vcp_memranges[i];
-		p = mmap(NULL, vmr->vmr_size, PROT_READ | PROT_WRITE,
-		    MAP_PRIVATE | MAP_ANON, -1, 0);
-		if (p == MAP_FAILED)
-			err(1, "mmap");
-
-		/*
-		 * Fill with 2-byte IN instructions that read from what would
-		 * be an ancient XT PC Keyboard status port. These reads will
-		 * trigger vm exits.
-		 */
 		if (vmr->vmr_size % 2 != 0)
 			errx(1, "memory ranges must be multiple of 2");
-		for (j = 0; j < vmr->vmr_size; j += 2) {
-			((uint8_t*)p)[j + 0] = 0xE4;
-			((uint8_t*)p)[j + 1] = PCKBC_AUX;
-		}
+
+		p = mmap(NULL, vmr->vmr_size, PROT_READ | PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANON, -1, 0);
+		if (p == MAP_FAILED)
+			err(1, "mmap");
+
 		vmr->vmr_va = (vaddr_t)p;
 		printf("created mapped region %zu: { gpa: 0x%08lx, size: %lu,"
 		    " hva: 0x%lx }\n", i, vmr->vmr_gpa, vmr->vmr_size,
 		    vmr->vmr_va);
+
+		/* Fill with int3 instructions. */
+		memset(p, 0xcc, vmr->vmr_size);
+
+		if (i == LOW_MEM) {
+			/* Write our IVT. */
+			memcpy(p, &ivt, sizeof(ivt));
+
+			/*
+			 * Set up a #GP handler that does a read from a
+			 * non-existent PC Jr. Disk Controller.
+			 */
+			p = (uint8_t*)((uint8_t*)p + 0xb5d);
+			memcpy(p, IN_PCJR, sizeof(IN_PCJR));
+		} else {
+			/*
+			 * Write our code to the reset vector:
+			 *   PUSHW %dx        ; inits the stack
+			 *   INS dx, es:[di]  ; read from port in dx
+			 */
+			off = reset - vmr->vmr_gpa;
+			p = (uint8_t*)p + off;
+			memcpy(p, PUSHW_DX, sizeof(PUSHW_DX));
+			p = (uint8_t*)p + sizeof(PUSHW_DX);
+			memcpy(p, INS, sizeof(INS));
+		}
 	}

 	if (ioctl(fd, VMM_IOC_CREATE, &vcp) == -1)
@@ -172,11 +201,15 @@ main(int argc, char **argv)
 		vmr = &vsp.vsp_memranges[i];
 		p = (void*)vmr->vmr_va;

-		for (j = 0; j < vmr->vmr_size; j += 2) {
-			if (((uint8_t*)p)[j + 0] != 0xE4)
-				errx(1, "bad byte");
-			if (((uint8_t*)p)[j + 1] != PCKBC_AUX)
-				errx(1, "bad byte");
+		if (i == LOW_MEM) {
+			/* Check if our IVT is there. */
+			if (memcmp(&ivt, p, sizeof(ivt)) != 0) {
+				warnx("invalid ivt");
+				goto out;
+			}
+		} else {
+			/* Check our code at the reset vector. */
+
 		}
 		printf("checked shared region %zu: { gpa: 0x%08lx, size: %lu,"
 		    " hva: 0x%lx }\n", i, vmr->vmr_gpa, vmr->vmr_size,
@@ -266,7 +299,6 @@ main(int argc, char **argv)
 	vrunp.vrp_exit = exit;
 	vrunp.vrp_vcpu_id = 0;		/* XXX SP */
 	vrunp.vrp_vm_id = vcp.vcp_id;
-	vrunp.vrp_irq = 0x0;
 	vrunp.vrp_irqready = 1;

 	if (ioctl(fd, VMM_IOC_RUN, &vrunp) == -1) {
@@ -283,8 +315,13 @@ main(int argc, char **argv)
 	switch (vrunp.vrp_exit_reason) {
 	case SVM_VMEXIT_IOIO:
 	case VMX_EXIT_IO:
-		printf("vcpu %d on vm %d exited for io assist\n",
-		    vrunp.vrp_vcpu_id, vrunp.vrp_vm_id);
+		printf("vcpu %d on vm %d exited for io assist @ ip = 0x%llx, "
+		    "cs.base = 0x%llx, ss.base = 0x%llx, rsp = 0x%llx\n",
+		    vrunp.vrp_vcpu_id, vrunp.vrp_vm_id,
+		    vrunp.vrp_exit->vrs.vrs_gprs[VCPU_REGS_RIP],
+		    vrunp.vrp_exit->vrs.vrs_sregs[VCPU_REGS_CS].vsi_base,
+		    vrunp.vrp_exit->vrs.vrs_sregs[VCPU_REGS_SS].vsi_base,
+		    vrunp.vrp_exit->vrs.vrs_gprs[VCPU_REGS_RSP]);
 		break;
 	default:
 		warnx("unexpected vm exit reason: 0%04x",
@@ -298,14 +335,73 @@ main(int argc, char **argv)
 		    exit->vei.vei_port);
 		goto out;
 	}
+	if (exit->vei.vei_string != 1) {
+		warnx("expected string instruction (INS)");
+		goto out;
+	} else
+		printf("got expected string instruction\n");

+	/* Advance RIP? */
+	printf("insn_len = %u\n", exit->vei.vei_insn_len);
+	exit->vrs.vrs_gprs[VCPU_REGS_RIP] += exit->vei.vei_insn_len;
+
 	/*
+	 * Inject a #GP and see if we end up at our isr.
+	 */
+	vrunp.vrp_inject.vie_vector = VMM_EX_GP;
+	vrunp.vrp_inject.vie_errorcode = 0x11223344;
+	vrunp.vrp_inject.vie_type = VCPU_INJECT_EX;
+	printf("injecting exception 0x%x\n", vrunp.vrp_inject.vie_vector);
+	if (ioctl(fd, VMM_IOC_RUN, &vrunp) == -1) {
+		warn("VMM_IOC_RUN 2");
+		goto out;
+	}
+
+	switch (vrunp.vrp_exit_reason) {
+	case SVM_VMEXIT_IOIO:
+	case VMX_EXIT_IO:
+		printf("vcpu %d on vm %d exited for io assist @ ip = 0x%llx, "
+		    "cs.base = 0x%llx\n", vrunp.vrp_vcpu_id, vrunp.vrp_vm_id,
+		    vrunp.vrp_exit->vrs.vrs_gprs[VCPU_REGS_RIP],
+		    vrunp.vrp_exit->vrs.vrs_sregs[VCPU_REGS_CS].vsi_base);
+		break;
+	default:
+		warnx("unexpected vm exit reason: 0%04x",
+		    vrunp.vrp_exit_reason);
+		goto out;
+	}
+
+	if (exit->vei.vei_port != PCJR_DISKCTRL) {
+		warnx("expected NMI handler to poke PCJR_DISKCTLR, got 0x%02x",
+		    exit->vei.vei_port);
+		printf("rip = 0x%llx\n", exit->vrs.vrs_gprs[VCPU_REGS_RIP]);
+		goto out;
+	}
+	printf("exception handler called\n");
+
+	/*
 	 * If we made it here, we're close to passing. Any failures during
 	 * cleanup will reset ret back to non-zero.
 	 */
 	ret = 0;

 out:
+	printf("--- RESET VECTOR @ gpa 0x%llx ---\n", reset);
+	for (i=0; i<10; i++) {
+		if (i > 0)
+			printf(" ");
+		printf("%02x", *(uint8_t*)
+		    (vsp.vsp_memranges[UPPER_MEM].vmr_va + off + i));
+	}
+	printf("\n--- STACK @ gpa 0x%llx ---\n", stack);
+	for (i=0; i<16; i++) {
+		if (i > 0)
+			printf(" ");
+		printf("%02x", *(uint8_t*)(vsp.vsp_memranges[LOW_MEM].vmr_va
+			+ stack - i - 1));
+	}
+	printf("\n");
+
 	/*
 	 * 6. Terminate our VM and clean up.
 	 */