Index | Thread | Search

From:
Dave Voutila <dv@sisu.io>
Subject:
Re: Move VMX vpid allocation to fix vpid leak in vmm.
To:
Alexander Bluhm <alexander.bluhm@gmx.net>
Cc:
Theo Buehler <tb@theobuehler.org>, tech <tech@openbsd.org>, Mike Larkin <mlarkin@openbsd.org>, Hans-J?rg H?xer <Hans-Joerg_Hoexer@genua.de>
Date:
Mon, 02 Sep 2024 16:55:47 -0400

Download raw body.

Thread
Alexander Bluhm <alexander.bluhm@gmx.net> writes:

> On Mon, Sep 02, 2024 at 08:25:00AM -0400, Dave Voutila wrote:
>>
>> Theo Buehler <tb@theobuehler.org> writes:
>>
>> > On Sun, Sep 01, 2024 at 09:46:16PM -0400, Dave Voutila wrote:
>> >> I noticed this while Hans-Joerg was working on SEV support for
>> >> vmm(4). Since vpid allocation is in the reset register ioctl handler,
>> >> and we don't free any previous vpid, a program on an Intel host that
>> >> keeps resetting a vcpu via VMM_IOC_RESETCPU will exhaust vpids.
>> >>
>> >> Instead of a check & free dance, this just moves the allocation to the
>> >> vcpu initialization since vmm frees vpids/asids in vcpu de-init. (The
>> >> SEV changes moved the SVM ASID stuff similarly.)
>> >>
>> >> It also removes the DPRINTF (also on the SVM side) since the allocation
>> >> function already has a printf on failure.
>> >>
>> >> Plus, since we're de-coupling allocation from configuration, the VMCS
>> >> reload can be avoided, making this a net-negative diff.
>> >>
>> >> ok?
>> >>
>> >
>> > Not sure if that matters, but previously vmm_init_vpx()'s ENOMEM was
>> > always mapped to EINVAL. Now there's a bit of an asymmetry between
>> > vcpu_init_vmx() and vcpu_init_svm() in that regard.
>> >
>>
>> Good point! EINVAL makes no sense to me in this case as it's resource
>> exhaustion. This diff makes VMX and SVM symmetric, trying to allocate
>> early before we call km_alloc(9).
>>
>> I also found the logic in the vmx vcpu deinit needs adjusting to the new
>> logic: we always allocate a vpid even if we don't need it. In practice,
>> this is fine as it's a 16 bit value and we currently cap total vcpu
>> count in vmm at 512 (rather arbitrarily).
>>
>> Still net-negative ;)
>
> Successfully tested on Intel vmd host.
>
> vcpu_init_vmx() does
>> +     if (vmm_alloc_vpid(&vcpu->vc_vpid))
>> +             return (ENOMEM);
>
> vcpu_init_svm() does
>> +     if (vmm_alloc_vpid(&asid))
>> +             return (ENOMEM);
>> +     vcpu->vc_vpid = asid;
>
> Can you remove the asid variable for consistency?
>

Done.

> And why is vcpu_deinit_vmx() doing
>> +     if (vcpu->vc_vpid)
>>               vmm_free_vpid(vcpu->vc_vpid);
> while vcpu_deinit_svm() calls vmm_free_vpid() unconditionally?
>

Good point. That was a holdover of logic from when we conditionally
allocated vpids on Intel in the vcpu reset handler.

Updated diff below.


diffstat refs/heads/master refs/heads/vpid-leak
 M  sys/arch/amd64/amd64/vmm_machdep.c  |  19+  35-

1 file changed, 19 insertions(+), 35 deletions(-)

diff refs/heads/master refs/heads/vpid-leak
commit - 84b38932e34d2692ffaf821e4542142c343fa76f
commit + 929c982aca50544a7e0770af67bfc71b0f2400d1
blob - c84fbb3273c799116e4628a1097db4eb9f135955
blob + 2fe7d8074968fdf3da08c179a9e8ee261de199f7
--- sys/arch/amd64/amd64/vmm_machdep.c
+++ sys/arch/amd64/amd64/vmm_machdep.c
@@ -2253,7 +2253,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg
 	uint32_t pinbased, procbased, procbased2, exit, entry;
 	uint32_t want1, want0;
 	uint64_t ctrlval, cr3, msr_misc_enable;
-	uint16_t ctrl, vpid;
+	uint16_t ctrl;
 	struct vmx_msr_store *msr_store;

 	rw_assert_wrlock(&vcpu->vc_lock);
@@ -2516,30 +2516,12 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg
 	    IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
 		if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
 		    IA32_VMX_ENABLE_VPID, 1)) {
-
-			/* We may sleep during allocation, so reload VMCS. */
-			vcpu->vc_last_pcpu = curcpu();
-			ret = vmm_alloc_vpid(&vpid);
-			if (vcpu_reload_vmcs_vmx(vcpu)) {
-				printf("%s: failed to reload vmcs\n", __func__);
-				ret = EINVAL;
-				goto exit;
-			}
-			if (ret) {
-				DPRINTF("%s: could not allocate VPID\n",
-				    __func__);
-				ret = EINVAL;
-				goto exit;
-			}
-
-			if (vmwrite(VMCS_GUEST_VPID, vpid)) {
+			if (vmwrite(VMCS_GUEST_VPID, vcpu->vc_vpid)) {
 				DPRINTF("%s: error setting guest VPID\n",
 				    __func__);
 				ret = EINVAL;
 				goto exit;
 			}
-
-			vcpu->vc_vpid = vpid;
 		}
 	}

@@ -2832,13 +2814,19 @@ vcpu_init_vmx(struct vcpu *vcpu)
 	uint32_t cr0, cr4;
 	int ret = 0;

+	/* Allocate a VPID early to avoid km_alloc if we're out of VPIDs. */
+	if (vmm_alloc_vpid(&vcpu->vc_vpid))
+		return (ENOMEM);
+
 	/* Allocate VMCS VA */
 	vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
 	    &kd_waitok);
 	vcpu->vc_vmx_vmcs_state = VMCS_CLEARED;

-	if (!vcpu->vc_control_va)
-		return (ENOMEM);
+	if (!vcpu->vc_control_va) {
+		ret = ENOMEM;
+		goto exit;
+	}

 	/* Compute VMCS PA */
 	if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va,
@@ -3091,15 +3079,20 @@ vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_sta
 int
 vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp)
 {
-	uint16_t asid;
 	int ret = 0;

+	/* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */
+	if (vmm_alloc_vpid(&vcpu->vc_vpid))
+		return (ENOMEM);
+
 	/* Allocate VMCB VA */
 	vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero,
 	    &kd_waitok);

-	if (!vcpu->vc_control_va)
-		return (ENOMEM);
+	if (!vcpu->vc_control_va) {
+		ret = ENOMEM;
+		goto exit;
+	}

 	/* Compute VMCB PA */
 	if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va,
@@ -3173,14 +3166,6 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_para
 	    (uint64_t)vcpu->vc_svm_ioio_va,
 	    (uint64_t)vcpu->vc_svm_ioio_pa);

-	/* Guest VCPU ASID */
-	if (vmm_alloc_vpid(&asid)) {
-		DPRINTF("%s: could not allocate asid\n", __func__);
-		ret = EINVAL;
-		goto exit;
-	}
-	vcpu->vc_vpid = asid;
-
 	/* Shall we enable SEV? */
 	vcpu->vc_sev = vcp->vcp_sev;

@@ -3260,8 +3245,7 @@ vcpu_deinit_vmx(struct vcpu *vcpu)
 	}
 #endif

-	if (vcpu->vc_vmx_vpid_enabled)
-		vmm_free_vpid(vcpu->vc_vpid);
+	vmm_free_vpid(vcpu->vc_vpid);
 }

 /*