From: Dave Voutila Subject: Re: Move VMX vpid allocation to fix vpid leak in vmm. To: Alexander Bluhm Cc: Theo Buehler , tech , Mike Larkin , Hans-J?rg H?xer Date: Mon, 02 Sep 2024 16:55:47 -0400 Alexander Bluhm writes: > On Mon, Sep 02, 2024 at 08:25:00AM -0400, Dave Voutila wrote: >> >> Theo Buehler writes: >> >> > On Sun, Sep 01, 2024 at 09:46:16PM -0400, Dave Voutila wrote: >> >> I noticed this while Hans-Joerg was working on SEV support for >> >> vmm(4). Since vpid allocation is in the reset register ioctl handler, >> >> and we don't free any previous vpid, a program on an Intel host that >> >> keeps resetting a vcpu via VMM_IOC_RESETCPU will exhaust vpids. >> >> >> >> Instead of a check & free dance, this just moves the allocation to the >> >> vcpu initialization since vmm frees vpids/asids in vcpu de-init. (The >> >> SEV changes moved the SVM ASID stuff similarly.) >> >> >> >> It also removes the DPRINTF (also on the SVM side) since the allocation >> >> function already has a printf on failure. >> >> >> >> Plus, since we're de-coupling allocation from configuration, the VMCS >> >> reload can be avoided, making this a net-negative diff. >> >> >> >> ok? >> >> >> > >> > Not sure if that matters, but previously vmm_init_vpx()'s ENOMEM was >> > always mapped to EINVAL. Now there's a bit of an asymmetry between >> > vcpu_init_vmx() and vcpu_init_svm() in that regard. >> > >> >> Good point! EINVAL makes no sense to me in this case as it's resource >> exhaustion. This diff makes VMX and SVM symmetric, trying to allocate >> early before we call km_alloc(9). >> >> I also found the logic in the vmx vcpu deinit needs adjusting to the new >> logic: we always allocate a vpid even if we don't need it. In practice, >> this is fine as it's a 16 bit value and we currently cap total vcpu >> count in vmm at 512 (rather arbitrarily). >> >> Still net-negative ;) > > Successfully tested on Intel vmd host. > > vcpu_init_vmx() does >> + if (vmm_alloc_vpid(&vcpu->vc_vpid)) >> + return (ENOMEM); > > vcpu_init_svm() does >> + if (vmm_alloc_vpid(&asid)) >> + return (ENOMEM); >> + vcpu->vc_vpid = asid; > > Can you remove the asid variable for consistency? > Done. > And why is vcpu_deinit_vmx() doing >> + if (vcpu->vc_vpid) >> vmm_free_vpid(vcpu->vc_vpid); > while vcpu_deinit_svm() calls vmm_free_vpid() unconditionally? > Good point. That was a holdover of logic from when we conditionally allocated vpids on Intel in the vcpu reset handler. Updated diff below. diffstat refs/heads/master refs/heads/vpid-leak M sys/arch/amd64/amd64/vmm_machdep.c | 19+ 35- 1 file changed, 19 insertions(+), 35 deletions(-) diff refs/heads/master refs/heads/vpid-leak commit - 84b38932e34d2692ffaf821e4542142c343fa76f commit + 929c982aca50544a7e0770af67bfc71b0f2400d1 blob - c84fbb3273c799116e4628a1097db4eb9f135955 blob + 2fe7d8074968fdf3da08c179a9e8ee261de199f7 --- sys/arch/amd64/amd64/vmm_machdep.c +++ sys/arch/amd64/amd64/vmm_machdep.c @@ -2253,7 +2253,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg uint32_t pinbased, procbased, procbased2, exit, entry; uint32_t want1, want0; uint64_t ctrlval, cr3, msr_misc_enable; - uint16_t ctrl, vpid; + uint16_t ctrl; struct vmx_msr_store *msr_store; rw_assert_wrlock(&vcpu->vc_lock); @@ -2516,30 +2516,12 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) { if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS, IA32_VMX_ENABLE_VPID, 1)) { - - /* We may sleep during allocation, so reload VMCS. */ - vcpu->vc_last_pcpu = curcpu(); - ret = vmm_alloc_vpid(&vpid); - if (vcpu_reload_vmcs_vmx(vcpu)) { - printf("%s: failed to reload vmcs\n", __func__); - ret = EINVAL; - goto exit; - } - if (ret) { - DPRINTF("%s: could not allocate VPID\n", - __func__); - ret = EINVAL; - goto exit; - } - - if (vmwrite(VMCS_GUEST_VPID, vpid)) { + if (vmwrite(VMCS_GUEST_VPID, vcpu->vc_vpid)) { DPRINTF("%s: error setting guest VPID\n", __func__); ret = EINVAL; goto exit; } - - vcpu->vc_vpid = vpid; } } @@ -2832,13 +2814,19 @@ vcpu_init_vmx(struct vcpu *vcpu) uint32_t cr0, cr4; int ret = 0; + /* Allocate a VPID early to avoid km_alloc if we're out of VPIDs. */ + if (vmm_alloc_vpid(&vcpu->vc_vpid)) + return (ENOMEM); + /* Allocate VMCS VA */ vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero, &kd_waitok); vcpu->vc_vmx_vmcs_state = VMCS_CLEARED; - if (!vcpu->vc_control_va) - return (ENOMEM); + if (!vcpu->vc_control_va) { + ret = ENOMEM; + goto exit; + } /* Compute VMCS PA */ if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va, @@ -3091,15 +3079,20 @@ vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_sta int vcpu_init_svm(struct vcpu *vcpu, struct vm_create_params *vcp) { - uint16_t asid; int ret = 0; + /* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */ + if (vmm_alloc_vpid(&vcpu->vc_vpid)) + return (ENOMEM); + /* Allocate VMCB VA */ vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, &kp_zero, &kd_waitok); - if (!vcpu->vc_control_va) - return (ENOMEM); + if (!vcpu->vc_control_va) { + ret = ENOMEM; + goto exit; + } /* Compute VMCB PA */ if (!pmap_extract(pmap_kernel(), vcpu->vc_control_va, @@ -3173,14 +3166,6 @@ vcpu_init_svm(struct vcpu *vcpu, struct vm_create_para (uint64_t)vcpu->vc_svm_ioio_va, (uint64_t)vcpu->vc_svm_ioio_pa); - /* Guest VCPU ASID */ - if (vmm_alloc_vpid(&asid)) { - DPRINTF("%s: could not allocate asid\n", __func__); - ret = EINVAL; - goto exit; - } - vcpu->vc_vpid = asid; - /* Shall we enable SEV? */ vcpu->vc_sev = vcp->vcp_sev; @@ -3260,8 +3245,7 @@ vcpu_deinit_vmx(struct vcpu *vcpu) } #endif - if (vcpu->vc_vmx_vpid_enabled) - vmm_free_vpid(vcpu->vc_vpid); + vmm_free_vpid(vcpu->vc_vpid); } /*