Download raw body.
[EXT] Reworking VMM's nested paging & guest memory (de-vmspace-ification)
[EXT] Reworking VMM's nested paging & guest memory (de-vmspace-ification)
Hans-Jörg Höxer <hshoexer@genua.de> writes:
> Hi,
>
> On Mon, Apr 14, 2025 at 08:18:46PM -0400, Dave Voutila wrote:
>> ...
>> OKs welcome, but I'd prefer some test reports before looking to commit,
>> but since we're recently unlocked from release prep, I'd prefer sooner
>> rather than later.
>
> I have tested the diff on my AMD setup with SEV enabled VMs. The diff
> works fine.
>
> cpu0: AMD EPYC 9124 16-Core Processor, 3000.01 MHz, 19-11-01, patch 0a101148
>
> Diff looks reasonable to me. One comment typo and one question inline
> below.
>
> Take care,
> HJ.
>
>> Note: this doesn't nuke uvm_share(), but that can come afterwards if
>> this lands and nobody needs/wants it.
>>
>> -dv
>>
>> diff refs/heads/master refs/heads/vmm-aobj
>> commit - ade9dbe6546b6b7741371ee61e71f48f219d977a
>> commit + ea39d23aa67862930c7cbccaaa31182d2d11a86f
>> blob - e3205f48eedda8f4158d9a8d42e6164534921844
>> blob + e58730ec23702850524db522dd82d750589d4c18
>> --- sys/arch/amd64/amd64/vmm_machdep.c
>> +++ sys/arch/amd64/amd64/vmm_machdep.c
>> @@ -115,6 +115,7 @@ int vmm_inject_db(struct vcpu *);
>> void vmx_handle_intr(struct vcpu *);
>> void vmx_handle_misc_enable_msr(struct vcpu *);
>> int vmm_get_guest_memtype(struct vm *, paddr_t);
>> +vaddr_t vmm_translate_gpa(struct vm *, paddr_t);
>> int vmx_get_guest_faulttype(void);
>> int svm_get_guest_faulttype(struct vmcb *);
>> int vmx_get_exit_qualification(uint64_t *);
>> @@ -905,54 +906,19 @@ vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *v
>> int
>> vm_impl_init(struct vm *vm, struct proc *p)
>> {
>> - int i, mode, ret;
>> - vaddr_t mingpa, maxgpa;
>> - struct vm_mem_range *vmr;
>> -
>> /* If not EPT or RVI, nothing to do here */
>> switch (vmm_softc->mode) {
>> case VMM_MODE_EPT:
>> - mode = PMAP_TYPE_EPT;
>> + pmap_convert(vm->vm_pmap, PMAP_TYPE_EPT);
>> break;
>> case VMM_MODE_RVI:
>> - mode = PMAP_TYPE_RVI;
>> + pmap_convert(vm->vm_pmap, PMAP_TYPE_RVI);
>> break;
>> default:
>> printf("%s: invalid vmm mode %d\n", __func__, vmm_softc->mode);
>> return (EINVAL);
>> }
>>
>> - vmr = &vm->vm_memranges[0];
>> - mingpa = vmr->vmr_gpa;
>> - vmr = &vm->vm_memranges[vm->vm_nmemranges - 1];
>> - maxgpa = vmr->vmr_gpa + vmr->vmr_size;
>> -
>> - /*
>> - * uvmspace_alloc (currently) always returns a valid vmspace
>> - */
>> - vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE, FALSE);
>> - vm->vm_map = &vm->vm_vmspace->vm_map;
>> -
>> - /* Map the new map with an anon */
>> - DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map);
>> - for (i = 0; i < vm->vm_nmemranges; i++) {
>> - vmr = &vm->vm_memranges[i];
>> - ret = uvm_share(vm->vm_map, vmr->vmr_gpa,
>> - PROT_READ | PROT_WRITE | PROT_EXEC,
>> - &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size);
>> - if (ret) {
>> - printf("%s: uvm_share failed (%d)\n", __func__, ret);
>> - /* uvmspace_free calls pmap_destroy for us */
>> - KERNEL_LOCK();
>> - uvmspace_free(vm->vm_vmspace);
>> - vm->vm_vmspace = NULL;
>> - KERNEL_UNLOCK();
>> - return (ENOMEM);
>> - }
>> - }
>> -
>> - pmap_convert(vm->vm_map->pmap, mode);
>> -
>> return (0);
>> }
>>
>> @@ -1666,7 +1632,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg
>>
>> /* NPT */
>> vmcb->v_np_enable = SVM_ENABLE_NP;
>> - vmcb->v_n_cr3 = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
>> + vmcb->v_n_cr3 = vcpu->vc_parent->vm_pmap->pm_pdirpa;
>>
>> /* SEV */
>> if (vcpu->vc_sev)
>> @@ -1680,7 +1646,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg
>> /* xcr0 power on default sets bit 0 (x87 state) */
>> vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask;
>>
>> - vcpu->vc_parent->vm_map->pmap->eptp = 0;
>> + vcpu->vc_parent->vm_pmap->eptp = 0;
>>
>> return ret;
>> }
>> @@ -2601,7 +2567,7 @@ vcpu_init_vmx(struct vcpu *vcpu)
>> }
>>
>> /* Configure EPT Pointer */
>> - eptp = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
>> + eptp = vcpu->vc_parent->vm_pmap->pm_pdirpa;
>> msr = rdmsr(IA32_VMX_EPT_VPID_CAP);
>> if (msr & IA32_EPT_VPID_CAP_PAGE_WALK_4) {
>> /* Page walk length 4 supported */
>> @@ -2625,7 +2591,7 @@ vcpu_init_vmx(struct vcpu *vcpu)
>> goto exit;
>> }
>>
>> - vcpu->vc_parent->vm_map->pmap->eptp = eptp;
>> + vcpu->vc_parent->vm_pmap->eptp = eptp;
>>
>> /* Host CR0 */
>> cr0 = rcr0() & ~CR0_TS;
>> @@ -3490,7 +3456,7 @@ vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint
>>
>> DPRINTF("%s: read pte level %d @ GPA 0x%llx\n", __func__,
>> level, pte_paddr);
>> - if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, pte_paddr,
>> + if (!pmap_extract(vcpu->vc_parent->vm_pmap, pte_paddr,
>> &hpa)) {
>> DPRINTF("%s: cannot extract HPA for GPA 0x%llx\n",
>> __func__, pte_paddr);
>> @@ -3671,11 +3637,11 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *
>>
>> /* We're now using this vcpu's EPT pmap on this cpu. */
>> atomic_swap_ptr(&ci->ci_ept_pmap,
>> - vcpu->vc_parent->vm_map->pmap);
>> + vcpu->vc_parent->vm_pmap);
>>
>> /* Invalidate EPT cache. */
>> vid_ept.vid_reserved = 0;
>> - vid_ept.vid_eptp = vcpu->vc_parent->vm_map->pmap->eptp;
>> + vid_ept.vid_eptp = vcpu->vc_parent->vm_pmap->eptp;
>> if (invept(ci->ci_vmm_cap.vcc_vmx.vmx_invept_mode,
>> &vid_ept)) {
>> printf("%s: invept\n", __func__);
>> @@ -4461,6 +4427,29 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
>> return (VMM_MEM_TYPE_UNKNOWN);
>> }
>>
>> +vaddr_t
>> +vmm_translate_gpa(struct vm *vm, paddr_t gpa)
>> +{
>> + int i = 0;
>> + vaddr_t hva = 0;
>> + struct vm_mem_range *vmr = NULL;
>> +
>> + /*
>> + * Translate GPA -> userland HVA in proc p. Find the memory range
>> + * and use it to translate to the HVA.
>> + */
>> + for (i = 0; i < vm->vm_nmemranges; i++) {
>> + vmr = &vm->vm_memranges[i];
>> + if (gpa >= vmr->vmr_gpa && gpa < vmr->vmr_gpa + vmr->vmr_size) {
>> + hva = vmr->vmr_va + (gpa - vmr->vmr_gpa);
>> + break;
>> + }
>> + }
>> +
>> + return (hva);
>> +}
>> +
>> +
>> /*
>> * vmx_get_exit_qualification
>> *
>> @@ -4542,15 +4531,45 @@ svm_get_guest_faulttype(struct vmcb *vmcb)
>> int
>> svm_fault_page(struct vcpu *vcpu, paddr_t gpa)
>> {
>> - paddr_t pa = trunc_page(gpa);
>> - int ret;
>> + struct proc *p = curproc;
>> + paddr_t hpa, pa = trunc_page(gpa);
>> + vaddr_t hva;
>> + int ret = 1;
>>
>> - ret = uvm_fault_wire(vcpu->vc_parent->vm_map, pa, pa + PAGE_SIZE,
>> - PROT_READ | PROT_WRITE | PROT_EXEC);
>> - if (ret)
>> - printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
>> - __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
>> + hva = vmm_translate_gpa(vcpu->vc_parent, pa);
>> + if (hva == 0) {
>> + printf("%s: unable to translate gpa 0x%llx\n", __func__,
>> + (uint64_t)pa);
>> + return (EINVAL);
>> + }
>>
>> + /* If we don't already have a backing page... */
>> + if (!pmap_extract(p->p_vmspace->vm_map.pmap, hva, &hpa)) {
>> + /* ...fault a RW page into the p's address space... */
>> + ret = uvm_fault_wire(&p->p_vmspace->vm_map, hva,
>> + hva + PAGE_SIZE, PROT_READ | PROT_WRITE);
>> + if (ret) {
>> + printf("%s: uvm_fault failed %d hva=0x%llx\n", __func__,
>> + ret, (uint64_t)hva);
>> + return (ret);
>> + }
>> +
>> + /* ...and then get the mapping. */
>> + if (!pmap_extract(p->p_vmspace->vm_map.pmap, hva, &hpa)) {
>> + printf("%s: failed to extract hpa for hva 0x%llx\n",
>> + __func__, (uint64_t)hva);
>> + return (EINVAL);
>> + }
>> + }
>> +
>> + /* Now we insert a RWX mapping into the guest's RVI pmap. */
>> + ret = pmap_enter(vcpu->vc_parent->vm_pmap, pa, hpa,
>> + PROT_READ | PROT_WRITE | PROT_EXEC, 0);
>> + if (ret) {
>> + printf("%s: pmap_enter failed pa=0x%llx, hpa=0x%llx\n",
>> + __func__, (uint64_t)pa, (uint64_t)hpa);
>> + }
>> +
>> return (ret);
>> }
>>
>> @@ -4617,8 +4636,10 @@ svm_handle_np_fault(struct vcpu *vcpu)
>> int
>> vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
>> {
>> + struct proc *p = curproc;
>> int fault_type, ret;
>> - paddr_t pa = trunc_page(gpa);
>> + paddr_t hpa, pa = trunc_page(gpa);
>> + vaddr_t hva;
>>
>> fault_type = vmx_get_guest_faulttype();
>> switch (fault_type) {
>> @@ -4633,19 +4654,45 @@ vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
>> break;
>> }
>>
>> - /* We may sleep during uvm_fault_wire(), so reload VMCS. */
>> - vcpu->vc_last_pcpu = curcpu();
>> - ret = uvm_fault_wire(vcpu->vc_parent->vm_map, pa, pa + PAGE_SIZE,
>> - PROT_READ | PROT_WRITE | PROT_EXEC);
>> - if (vcpu_reload_vmcs_vmx(vcpu)) {
>> - printf("%s: failed to reload vmcs\n", __func__);
>> + hva = vmm_translate_gpa(vcpu->vc_parent, pa);
>> + if (hva == 0) {
>> + printf("%s: unable to translate gpa 0x%llx\n", __func__,
>> + (uint64_t)pa);
>> return (EINVAL);
>> }
>>
>> - if (ret)
>> - printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
>> - __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
>> + /* If we don't already have a backing page... */
>> + if (!pmap_extract(p->p_vmspace->vm_map.pmap, hva, &hpa)) {
>> + /* ...fault a RW page into the p's address space... */
>> + vcpu->vc_last_pcpu = curcpu(); /* uvm_fault may sleep. */
>> + ret = uvm_fault_wire(&p->p_vmspace->vm_map, hva,
>> + hva + PAGE_SIZE, PROT_READ | PROT_WRITE);
>> + if (ret) {
>> + printf("%s: uvm_fault failed %d hva=0x%llx\n", __func__,
>> + ret, (uint64_t)hva);
>> + return (ret);
>> + }
>> + if (vcpu_reload_vmcs_vmx(vcpu)) {
>> + printf("%s: failed to reload vmcs\n", __func__);
>> + return (EINVAL);
>> + }
>>
>> + /* ...and then get the mapping. */
>> + if (!pmap_extract(p->p_vmspace->vm_map.pmap, hva, &hpa)) {
>> + printf("%s: failed to extract hpa for hva 0x%llx\n",
>> + __func__, (uint64_t)hva);
>> + return (EINVAL);
>> + }
>> + }
>> +
>> + /* Now we insert a RWX mapping into the guest's EPT pmap. */
>> + ret = pmap_enter(vcpu->vc_parent->vm_pmap, pa, hpa,
>> + PROT_READ | PROT_WRITE | PROT_EXEC, 0);
>> + if (ret) {
>> + printf("%s: pmap_enter failed pa=0x%llx, hpa=0x%llx\n",
>> + __func__, (uint64_t)pa, (uint64_t)hpa);
>> + }
>> +
>> return (ret);
>> }
>>
>> @@ -4930,7 +4977,7 @@ vmx_load_pdptes(struct vcpu *vcpu)
>> return (EINVAL);
>> }
>>
>> - if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, (vaddr_t)cr3,
>> + if (!pmap_extract(vcpu->vc_parent->vm_pmap, (vaddr_t)cr3,
>> (paddr_t *)&cr3_host_phys)) {
>> DPRINTF("%s: nonmapped guest CR3, setting PDPTEs to 0\n",
>> __func__);
>> @@ -6459,7 +6506,7 @@ vmm_update_pvclock(struct vcpu *vcpu)
>>
>> if (vcpu->vc_pvclock_system_gpa & PVCLOCK_SYSTEM_TIME_ENABLE) {
>> pvclock_gpa = vcpu->vc_pvclock_system_gpa & 0xFFFFFFFFFFFFFFF0;
>> - if (!pmap_extract(vm->vm_map->pmap, pvclock_gpa, &pvclock_hpa))
>> + if (!pmap_extract(vm->vm_pmap, pvclock_gpa, &pvclock_hpa))
>> return (EINVAL);
>> pvclock_ti = (void*) PMAP_DIRECT_MAP(pvclock_hpa);
>>
>> blob - 0a86ddbecd3fbd3627336902c7209f6d023bc3f0
>> blob + f5bfc6cce441f23bc97fd94ef63684e59b3161cd
>> --- sys/dev/vmm/vmm.c
>> +++ sys/dev/vmm/vmm.c
>> @@ -25,6 +25,9 @@
>> #include <sys/malloc.h>
>> #include <sys/signalvar.h>
>>
>> +#include <uvm/uvm_extern.h>
>> +#include <uvm/uvm_aobj.h>
>> +
>> #include <machine/vmmvar.h>
>>
>> #include <dev/vmm/vmm.h>
>> @@ -356,6 +359,8 @@ vm_create(struct vm_create_params *vcp, struct proc *p
>> size_t memsize;
>> struct vm *vm;
>> struct vcpu *vcpu;
>> + struct uvm_object *uao;
>> + unsigned int uvmflag = 0;
>>
>> memsize = vm_create_check_mem_ranges(vcp);
>> if (memsize == 0)
>> @@ -378,13 +383,48 @@ vm_create(struct vm_create_params *vcp, struct proc *p
>> /* Instantiate and configure the new vm. */
>> vm = pool_get(&vm_pool, PR_WAITOK | PR_ZERO);
>>
>> + /* Create the VM's identity. */
>> vm->vm_creator_pid = p->p_p->ps_pid;
>> + strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN - 1);
>> +
>> + /* Create the pmap for nested paging. */
>> + vm->vm_pmap = pmap_create();
>> +
>> + /* Initialize memory slots. */
>> vm->vm_nmemranges = vcp->vcp_nmemranges;
>> memcpy(vm->vm_memranges, vcp->vcp_memranges,
>> vm->vm_nmemranges * sizeof(vm->vm_memranges[0]));
>> - vm->vm_memory_size = memsize;
>> - strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN - 1);
>> + vm->vm_memory_size = memsize; /* Calculated above. */
>> + uvmflag = UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
>> + MAP_INHERIT_SHARE, 0, UVM_FLAG_FIXED);
>> + for (i = 0; i < vm->vm_nmemranges; i++) {
>> + uao = NULL;
>> + if (vm->vm_memranges[i].vmr_type != VM_MEM_MMIO) {
>> + uao = uao_create(vm->vm_memranges[i].vmr_size,
>> + UAO_FLAG_CANFAIL);
>> + if (uao == NULL) {
>> + printf("%s: failed to initialize memory slot\n",
>> + __func__);
>> + vm_teardown(&vm);
>> + return (ENOMEM);
>> + }
>>
>> + /* Map the UVM aobj into the process. */
>> + uao_reference(uao);
>> + ret = uvm_map(&p->p_vmspace->vm_map,
>> + &vm->vm_memranges[i].vmr_va,
>> + vm->vm_memranges[i].vmr_size, uao, 0, 0, uvmflag);
>> + if (ret) {
>> + printf("%s: uvm_map failed: %d\n", __func__,
>> + ret);
>> + vm_teardown(&vm);
>> + return (ENOMEM);
>> + }
>> + }
>> + vm->vm_memory_slot[i] = uao;
>> + }
>> +
>> +
>> if (vm_impl_init(vm, p)) {
>> printf("failed to init arch-specific features for vm %p\n", vm);
>> vm_teardown(&vm);
>> @@ -487,13 +527,13 @@ vm_create_check_mem_ranges(struct vm_create_params *vc
>> * Calling uvm_share() when creating the VM will take care of
>> * further checks.
>> */
>> - if (vmr->vmr_va < VM_MIN_ADDRESS ||
>> +/* if (vmr->vmr_va < VM_MIN_ADDRESS ||
>> vmr->vmr_va >= VM_MAXUSER_ADDRESS ||
>> vmr->vmr_size >= VM_MAXUSER_ADDRESS - vmr->vmr_va) {
>> DPRINTF("guest va not within range or wraps\n");
>> return (0);
>> }
>> -
>> +*/
>
> Why remove this check? Shouldn't these constraints still hold?
>
I'm shifting to a model where the userland process no longer dictates
the virtual address layout of the guest memory in the vmd process. The
vm_create() and vm_share_mem() functions should be determining the va
ranges in the vmd proess via uvm_map and now communicating them to the
calling process.
In other words, it's turning into an output from the ioctl and not an
input.
I'll be sending out a revised diff now that I've iterated on the uvm
portion with mpi that should make it clear.
>> /*
>> * Make sure that guest physical memory ranges do not overlap
>> * and that they are ascending.
>> @@ -529,10 +569,11 @@ vm_create_check_mem_ranges(struct vm_create_params *vc
>> void
>> vm_teardown(struct vm **target)
>> {
>> - size_t nvcpu = 0;
>> + size_t i, nvcpu = 0;
>> + vaddr_t sva, eva;
>> struct vcpu *vcpu, *tmp;
>> struct vm *vm = *target;
>> - struct vmspace *vm_vmspace;
>> + struct uvm_object *uao;
>>
>> KERNEL_ASSERT_UNLOCKED();
>>
>> @@ -545,17 +586,25 @@ vm_teardown(struct vm **target)
>> nvcpu++;
>> }
>>
>> - vm_impl_deinit(vm);
>> + /* Remove guest mappings from our nested page tables. */
>> + for (i = 0; i < vm->vm_nmemranges; i++) {
>> + sva = vm->vm_memranges[i].vmr_gpa;
>> + eva = sva + vm->vm_memranges[i].vmr_size - 1;
>> + pmap_remove(vm->vm_pmap, sva, eva);
>> + }
>>
>> - /* teardown guest vmspace */
>> - KERNEL_LOCK();
>> - vm_vmspace = vm->vm_vmspace;
>> - if (vm_vmspace != NULL) {
>> - vm->vm_vmspace = NULL;
>> - uvmspace_free(vm_vmspace);
>> + /* Release UVM anon objects backing our guest memory. */
>> + for (i = 0; i < vm->vm_nmemranges; i++) {
>> + uao = vm->vm_memory_slot[i];
>> + vm->vm_memory_slot[i] = NULL;
>> + if (uao != NULL)
>> + uao_detach(uao);
>> }
>> - KERNEL_UNLOCK();
>>
>> + /* At this point, no UVM-managed pages should reference our pmap. */
>> + pmap_destroy(vm->vm_pmap);
>> + vm->vm_pmap = NULL;
>> +
>> pool_put(&vm_pool, vm);
>> *target = NULL;
>> }
>> @@ -612,7 +661,7 @@ vm_get_info(struct vm_info_params *vip)
>>
>> out[i].vir_memory_size = vm->vm_memory_size;
>> out[i].vir_used_size =
>> - pmap_resident_count(vm->vm_map->pmap) * PAGE_SIZE;
>> + pmap_resident_count(vm->vm_pmap) * PAGE_SIZE;
>> out[i].vir_ncpus = vm->vm_vcpu_ct;
>> out[i].vir_id = vm->vm_id;
>> out[i].vir_creator_pid = vm->vm_creator_pid;
>> @@ -804,6 +853,8 @@ vm_share_mem(struct vm_sharemem_params *vsp, struct pr
>> size_t i, n;
>> struct vm *vm;
>> struct vm_mem_range *src, *dst;
>> + struct uvm_object *uao;
>> + unsigned int uvmflags;
>>
>> ret = vm_find(vsp->vsp_vm_id, &vm);
>> if (ret)
>> @@ -840,6 +891,8 @@ vm_share_mem(struct vm_sharemem_params *vsp, struct pr
>> * not need PROC_EXEC as the emulated devices do not need to execute
>> * instructions from guest memory.
>> */
>> + uvmflags = UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
>> + MAP_INHERIT_SHARE, 0, UVM_FLAG_FIXED);
>> for (i = 0; i < n; i++) {
>> src = &vm->vm_memranges[i];
>> dst = &vsp->vsp_memranges[i];
>> @@ -848,14 +901,16 @@ vm_share_mem(struct vm_sharemem_params *vsp, struct pr
>> if (src->vmr_type == VM_MEM_MMIO)
>> continue;
>>
>> - DPRINTF("sharing gpa=0x%lx for pid %d @ va=0x%lx\n",
>> - src->vmr_gpa, p->p_p->ps_pid, dst->vmr_va);
>> - ret = uvm_share(&p->p_vmspace->vm_map, dst->vmr_va,
>> - PROT_READ | PROT_WRITE, vm->vm_map, src->vmr_gpa,
>> - src->vmr_size);
>> + uao = vm->vm_memory_slot[i];
>> + KASSERT(uao != NULL);
>> +
>> + uao_reference(uao);
>> + ret = uvm_map(&p->p_p->ps_vmspace->vm_map, &dst->vmr_va,
>> + src->vmr_size, uao, 0, 0, uvmflags);
>> if (ret) {
>> - printf("%s: uvm_share failed (%d)\n", __func__, ret);
>> - break;
>> + printf("%s: uvm_map failed: %d\n", __func__, ret);
>> + uao_detach(uao);
>> + goto out;
>> }
>> }
>> ret = 0;
>> blob - ca5a152f550c3795714128f1a0d764dd2e593e59
>> blob + 9017362553a0b9dad5ecb34c8ca9da69c66180b9
>> --- sys/dev/vmm/vmm.h
>> +++ sys/dev/vmm/vmm.h
>> @@ -168,14 +168,17 @@ enum {
>> * V vmm_softc's vm_lock
>> */
>> struct vm {
>> - struct vmspace *vm_vmspace; /* [K] */
>> - vm_map_t vm_map; /* [K] */
>> + pmap_t vm_pmap; /* [r] */
>> +
>> uint32_t vm_id; /* [I] */
>> pid_t vm_creator_pid; /* [I] */
>> +
>> size_t vm_nmemranges; /* [I] */
>> size_t vm_memory_size; /* [I] */
>> - char vm_name[VMM_MAX_NAME_LEN];
>> struct vm_mem_range vm_memranges[VMM_MAX_MEM_RANGES];
>> + struct uvm_object *vm_memory_slot[VMM_MAX_MEM_RANGES]; /* [I] */
>> +
>> + char vm_name[VMM_MAX_NAME_LEN];
>> struct refcnt vm_refcnt; /* [a] */
>>
>> struct vcpu_head vm_vcpu_list; /* [v] */
>> blob - e399c0c0439a02117bc53c62cbb59c950b176773
>> blob + ed8cfb028d218f73fd9feeeb02af36030bfd4c7d
>> --- usr.sbin/vmd/vm.c
>> +++ usr.sbin/vmd/vm.c
>> @@ -808,6 +808,15 @@ alloc_guest_mem(struct vmd_vm *vm)
>> vmr->vmr_va = (vaddr_t)p;
>> }
>>
>> + /*
>> + * XXX for now, we munmap(2) because vmm(4) will remap uvm aobjs at
>> + * these addresses. Updating VMM_IOC_CREATE and VMM_IOC_SHAREMEM t0
>
> t0 -> to
>
>> + * do this for us will let us remove the mmap(2)/munmap(2) dance.
>> + */
>> + for (i = 0; i < vcp->vcp_nmemranges; i++) {
>> + vmr = &vcp->vcp_memranges[i];
>> + munmap((void *)vmr->vmr_va, vmr->vmr_size);
>> + }
>> return (ret);
>> }
>>
>> @@ -1448,6 +1457,9 @@ remap_guest_mem(struct vmd_vm *vm, int vmm_fd)
>> /*
>> * munmap(2) now that we have va's and ranges that don't overlap. vmm
>> * will use the va's and sizes to recreate the mappings for us.
>> + *
>> + * XXX can remove this once VMM_IOC_SHAREMEM handles mapping aobj's
>> + * for us.
>> */
>> for (i = 0; i < vsp.vsp_nmemranges; i++) {
>> vmr = &vsp.vsp_memranges[i];
>>
[EXT] Reworking VMM's nested paging & guest memory (de-vmspace-ification)
[EXT] Reworking VMM's nested paging & guest memory (de-vmspace-ification)