From: Mike Larkin Subject: Re: Move VMX vpid allocation to fix vpid leak in vmm. To: Dave Voutila Cc: tech Date: Sun, 8 Sep 2024 23:34:49 -0700 On Mon, Sep 02, 2024 at 08:25:00AM -0400, Dave Voutila wrote: > > Theo Buehler writes: > > > On Sun, Sep 01, 2024 at 09:46:16PM -0400, Dave Voutila wrote: > >> I noticed this while Hans-Joerg was working on SEV support for > >> vmm(4). Since vpid allocation is in the reset register ioctl handler, > >> and we don't free any previous vpid, a program on an Intel host that > >> keeps resetting a vcpu via VMM_IOC_RESETCPU will exhaust vpids. > >> > >> Instead of a check & free dance, this just moves the allocation to the > >> vcpu initialization since vmm frees vpids/asids in vcpu de-init. (The > >> SEV changes moved the SVM ASID stuff similarly.) > >> > >> It also removes the DPRINTF (also on the SVM side) since the allocation > >> function already has a printf on failure. > >> > >> Plus, since we're de-coupling allocation from configuration, the VMCS > >> reload can be avoided, making this a net-negative diff. > >> > >> ok? > >> > > > > Not sure if that matters, but previously vmm_init_vpx()'s ENOMEM was > > always mapped to EINVAL. Now there's a bit of an asymmetry between > > vcpu_init_vmx() and vcpu_init_svm() in that regard. > > > > Good point! EINVAL makes no sense to me in this case as it's resource > exhaustion. This diff makes VMX and SVM symmetric, trying to allocate > early before we call km_alloc(9). > > I also found the logic in the vmx vcpu deinit needs adjusting to the new > logic: we always allocate a vpid even if we don't need it. In practice, > this is fine as it's a 16 bit value and we currently cap total vcpu > count in vmm at 512 (rather arbitrarily). > IIRC I did this because the oldest CPUs that supported EPT/RVI only had 512 VPIDs/ASIDs and no way to query this. I think the proper solution would be to use the new technique to query and if it returns -1 or something bogus, we can fallback to 512. Someone remind me when they reach 512 VMs on a vmm(4) host :) -ml > Still net-negative ;) > > > diffstat refs/heads/master refs/heads/vpid-leak > M sys/arch/amd64/amd64/vmm_machdep.c | 20+ 33- > > 1 file changed, 20 insertions(+), 33 deletions(-) > > diff refs/heads/master refs/heads/vpid-leak > commit - 84b38932e34d2692ffaf821e4542142c343fa76f > commit + a1cfee8f6209440282222ee4d850795be1fb95c4 > blob - c84fbb3273c799116e4628a1097db4eb9f135955 > blob + 923456b7aa54dd02a83117f4bd04fb289027d89d > --- sys/arch/amd64/amd64/vmm_machdep.c > +++ sys/arch/amd64/amd64/vmm_machdep.c > @@ -2253,7 +2253,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg > uint32_t pinbased, procbased, procbased2, exit, entry; > uint32_t want1, want0; > uint64_t ctrlval, cr3, msr_misc_enable; > - uint16_t ctrl, vpid; > + uint16_t ctrl; > struct vmx_msr_store *msr_store; > > rw_assert_wrlock(&vcpu->vc_lock); > @@ -2516,30 +2516,12 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg > IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) { > if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, > IA32_VMX_ENABLE_VPID, 1)) { > - > - /* We may sleep during allocation, so reload VMCS. */ > - vcpu->vc_last_pcpu = curcpu(); > - ret = vmm_alloc_vpid(&vpid); > - if (vcpu_reload_vmcs_vmx(vcpu)) { > - printf("%s: failed to reload vmcs\n", __func__); > - ret = EINVAL; > - goto exit; > - } > - if (ret) { > - DPRINTF("%s: could not allocate VPID\n", > - __func__); > - ret = EINVAL; > - goto exit; > - } > - > - if (vmwrite(VMCS_GUEST_VPID, vpid)) { > + if (vmwrite(VMCS_GUEST_VPID, vcpu->vc_vpid)) { > DPRINTF("%s: error setting guest VPID\n", > __func__); > ret = EINVAL; > goto exit; > } > - > - vcpu->vc_vpid = vpid; > } > } > > @@ -2832,13 +2814,19 @@ vcpu_init_vmx(struct vcpu *vcpu) > uint32_t cr0, cr4; > int ret = 0; > > + /* Allocate a VPID early to avoid km_alloc if we're out of VPIDs. */ > + if (vmm_alloc_vpid(&vcpu->vc_vpid)) > + return (ENOMEM); > + > /* Allocate VMCS VA */ > vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero, > &kd_waitok); > vcpu->vc_vmx_vmcs_state = VMCS_CLEARED; > > - if (!vcpu->vc_control_va) > - return (ENOMEM); > + if (!vcpu->vc_control_va) { > + ret = ENOMEM; > + goto exit; > + } > > /* Compute VMCS PA */ > if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va, > @@ -3094,12 +3082,19 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_para > uint16_t asid; > int ret = 0; > > + /* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */ > + if (vmm_alloc_vpid(&asid)) > + return (ENOMEM); > + vcpu->vc_vpid = asid; > + > /* Allocate VMCB VA */ > vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero, > &kd_waitok); > > - if (!vcpu->vc_control_va) > - return (ENOMEM); > + if (!vcpu->vc_control_va) { > + ret = ENOMEM; > + goto exit; > + } > > /* Compute VMCB PA */ > if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va, > @@ -3173,14 +3168,6 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_para > (uint64_t)vcpu->vc_svm_ioio_va, > (uint64_t)vcpu->vc_svm_ioio_pa); > > - /* Guest VCPU ASID */ > - if (vmm_alloc_vpid(&asid)) { > - DPRINTF("%s: could not allocate asid\n", __func__); > - ret = EINVAL; > - goto exit; > - } > - vcpu->vc_vpid = asid; > - > /* Shall we enable SEV? */ > vcpu->vc_sev = vcp->vcp_sev; > > @@ -3260,7 +3247,7 @@ vcpu_deinit_vmx(struct vcpu *vcpu) > } > #endif > > - if (vcpu->vc_vmx_vpid_enabled) > + if (vcpu->vc_vpid) > vmm_free_vpid(vcpu->vc_vpid); > }