Download raw body.
teach EPT pmaps how to use pv_lists
Dave Voutila <dv@sisu.io> writes:
> Looking for some testing (of vmm/vmd on Intel hosts) and review of the
> following diff.
>
Still looking for a test or two before I push for a review. (Though a
review would be nice as well :)
> In short, the objective is to incrementally bring EPT pmap operations
> more in line with regular amd64 and AMD RVI pmaps. This diffs:
>
> - updates pmap_enter_ept() to add pv_entry's when inserting a pte in an
> EPT pmap
>
> - updates pmap_do_remove_ept() to support removing pv_entry's when
> removing a pte from an EPT pmap
>
> - updates pmap_page_remove() to support zapping an EPT pte if a given
> UVM vm_page is being removed and has a pv_entry pointing to an EPT pte
>
> For anyone reviewing:
>
> - I needed to change the PG_PVLIST bit so it would still be valid in an
> EPT pte, so I just swapped for PG_AVAIL3 as it looks available for use
> in EPT ptes
>
> - Validity checks on EPT ptes are not currently performed. For
> correctness, one cannot just use the same PG_V bit as EPT ptes are
> special and support true X-only permissions (in addition to breaking
> out R from W). That should be a different diff IMO.
>
> I'm not a big fan of throwing more junk directly into
> pmap_page_remove(), but given how it's designed it feels like the most
> sane place to put the logic for now.
>
>
> diffstat refs/heads/master refs/heads/ept-pvlist-3
> M sys/arch/amd64/amd64/pmap.c | 90+ 14-
> M sys/arch/amd64/include/pmap.h | 2+ 2-
>
> 2 files changed, 92 insertions(+), 16 deletions(-)
>
> diff refs/heads/master refs/heads/ept-pvlist-3
> commit - 3c9ccb50785e1c693c25fd18c467a46499c73493
> commit + 1fd874b9a12aadcaf92afdc5c9caa94bb5970c27
> blob - 35af9d10cdc6cb79f6288b964bc350a46bfce8d6
> blob + 4e843b19983e2a3c277c8cc9aea8165746c80aea
> --- sys/arch/amd64/amd64/pmap.c
> +++ sys/arch/amd64/amd64/pmap.c
> @@ -336,8 +336,8 @@ struct pv_entry *pmap_remove_pv(struct vm_page *, stru
> void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
> #if NVMM > 0
> void pmap_remove_ept(struct pmap *, vaddr_t, vaddr_t);
> -void pmap_do_remove_ept(struct pmap *, vaddr_t);
> -int pmap_enter_ept(struct pmap *, vaddr_t, paddr_t, vm_prot_t);
> +void pmap_do_remove_ept(struct pmap *, vaddr_t, struct pv_entry **);
> +int pmap_enter_ept(struct pmap *, vaddr_t, paddr_t, vm_prot_t, int);
> void pmap_shootept(struct pmap *, int);
> #endif /* NVMM > 0 */
> int pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
> @@ -1970,6 +1970,39 @@ pmap_page_remove(struct vm_page *pg)
> pm = pve->pv_pmap;
> mtx_leave(&pg->mdpage.pv_mtx);
>
> +#if NVMM > 0
> + if (pm->pm_type == PMAP_TYPE_EPT) {
> + mtx_enter(&pm->pm_mtx);
> +
> + /*
> + * See note below about dropping pvlist lock.
> + */
> + mtx_enter(&pg->mdpage.pv_mtx);
> + if ((pve = pg->mdpage.pv_list) == NULL ||
> + pve->pv_pmap != pm) {
> + mtx_leave(&pg->mdpage.pv_mtx);
> + mtx_leave(&pm->pm_mtx);
> + pmap_destroy(pm);
> + mtx_enter(&pg->mdpage.pv_mtx);
> + }
> +
> + /* Detach the pv_entry from the pv_list. */
> + pg->mdpage.pv_list = pve->pv_next;
> + mtx_leave(&pg->mdpage.pv_mtx);
> +
> + /* Zap it. */
> + pmap_do_remove_ept(pm, pve->pv_va, NULL);
> + pmap_shootept(pm, 1);
> +
> + /* Clean up before next iteration. */
> + mtx_leave(&pm->pm_mtx);
> + pmap_destroy(pm);
> + pool_put(&pmap_pv_pool, pve);
> + mtx_enter(&pg->mdpage.pv_mtx);
> + continue;
> + }
> +#endif /* NVMM > 0 */
> +
> /* XXX use direct map? */
> scr3 = pmap_map_ptes(pm); /* locks pmap */
> shootself = (scr3 == 0);
> @@ -2460,29 +2493,36 @@ void
> pmap_remove_ept(struct pmap *pmap, vaddr_t sgpa, vaddr_t egpa)
> {
> vaddr_t v;
> + struct pv_entry *pve, *free_pvs = NULL;
>
> mtx_enter(&pmap->pm_mtx);
>
> DPRINTF("%s: sgpa=0x%llx egpa=0x%llx\n", __func__, (uint64_t)sgpa,
> (uint64_t)egpa);
> for (v = sgpa; v < egpa + PAGE_SIZE; v += PAGE_SIZE)
> - pmap_do_remove_ept(pmap, v);
> + pmap_do_remove_ept(pmap, v, &free_pvs);
>
> pmap_shootept(pmap, 1);
>
> mtx_leave(&pmap->pm_mtx);
> -
> pmap_tlb_shootwait();
> +
> + while ((pve = free_pvs) != NULL) {
> + free_pvs = pve->pv_next;
> + pool_put(&pmap_pv_pool, pve);
> + }
> }
>
> void
> -pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
> +pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa, struct pv_entry **free_pvs)
> {
> uint64_t l4idx, l3idx, l2idx, l1idx;
> - struct vm_page *pg3, *pg2, *pg1;
> + struct vm_page *pg, *pg3, *pg2, *pg1;
> + struct pv_entry *pve;
> paddr_t npa3, npa2, npa1;
> pd_entry_t *pd4, *pd3, *pd2, *pd1;
> pd_entry_t *pptes;
> + pt_entry_t opte;
>
> MUTEX_ASSERT_LOCKED(&pmap->pm_mtx);
>
> @@ -2518,10 +2558,23 @@ pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
> pd1 = (pd_entry_t *)PMAP_DIRECT_MAP(npa1);
> pg1 = PHYS_TO_VM_PAGE(npa1);
>
> - if (pd1[l1idx] == 0)
> + opte = pmap_pte_set(&pd1[l1idx], 0);
> + if (opte == 0) // XXX check using an EPT valid bit.
> return;
>
> - pd1[l1idx] = 0;
> + if (opte & PG_PVLIST) {
> + pg = PHYS_TO_VM_PAGE(opte & pg_frame);
> + if (pg == NULL)
> + panic("%s: PG_PVLIST mapping with unmanaged page: gpa "
> + "0x%lx, opte 0x%llx, hpa 0x%llx", __func__, gpa,
> + opte, opte & pg_frame);
> + pve = pmap_remove_pv(pg, pmap, gpa);
> + if (pve != NULL && free_pvs != NULL) {
> + pve->pv_next = *free_pvs;
> + *free_pvs = pve;
> + }
> + }
> +
> pg1->wire_count--;
> pmap->pm_stats.resident_count--;
>
> @@ -2556,11 +2609,14 @@ pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
> }
>
> int
> -pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t hpa, vm_prot_t prot)
> +pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t hpa, vm_prot_t prot,
> + int flags)
> {
> uint64_t l4idx, l3idx, l2idx, l1idx;
> - pd_entry_t *pd, npte;
> - struct vm_page *ptp, *pptp;
> + pd_entry_t *pd;
> + pt_entry_t opte, npte;
> + struct vm_page *pg, *ptp, *pptp;
> + struct pv_entry *pve;
> paddr_t npa;
> struct uvm_object *obj;
> int ret = 0;
> @@ -2573,6 +2629,13 @@ pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t
> l2idx = (gpa & L2_MASK) >> L2_SHIFT; /* PDE idx */
> l1idx = (gpa & L1_MASK) >> L1_SHIFT; /* PTE idx */
>
> + pve = pool_get(&pmap_pv_pool, PR_NOWAIT);
> + if (pve == NULL) {
> + if (flags & PMAP_CANFAIL)
> + return ENOMEM;
> + panic("%s: no pv entries available", __func__);
> + }
> +
> mtx_enter(&pmap->pm_mtx);
>
> /* Start at PML4 / top level */
> @@ -2720,11 +2783,21 @@ pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t
> if (prot & PROT_EXEC)
> npte |= EPT_X;
>
> - if (pd[l1idx] == 0) {
> + opte = pd[l1idx]; // XXX check an EPT valid bit
> + if (opte == 0) {
> ptp->wire_count++;
> pmap->pm_stats.resident_count++;
> +
> + pg = PHYS_TO_VM_PAGE(hpa);
> + if (pg != NULL) {
> + pmap_enter_pv(pg, pve, pmap, gpa, ptp);
> + npte |= PG_PVLIST;
> + pve = NULL;
> + } else
> + panic("%s: cannot find vm page", __func__);
> } else {
> - /* XXX flush ept */
> + pmap_shootept(pmap, 1);
> + pmap_tlb_shootwait();
> }
>
> pd[l1idx] = npte;
> @@ -2732,6 +2805,9 @@ pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t
> unlock:
> mtx_leave(&pmap->pm_mtx);
>
> + if (pve != NULL)
> + pool_put(&pmap_pv_pool, pve);
> +
> return ret;
> }
> #endif /* NVMM > 0 */
> @@ -2758,7 +2834,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa,
>
> #if NVMM > 0
> if (pmap_is_ept(pmap))
> - return pmap_enter_ept(pmap, va, pa, prot);
> + return pmap_enter_ept(pmap, va, pa, prot, flags);
> #endif /* NVMM > 0 */
>
> KASSERT(!(wc && nocache));
> blob - cc358eab1e93376f4d0b511d0c0bcd06aa476b40
> blob + 6eeab997123e6d10a9dddde3f55bf25950f43978
> --- sys/arch/amd64/include/pmap.h
> +++ sys/arch/amd64/include/pmap.h
> @@ -245,8 +245,8 @@
> */
>
> #define PG_W PG_AVAIL1 /* "wired" mapping */
> -#define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */
> -/* PG_AVAIL3 not used */
> +/* PG_AVAIL2 not used */
> +#define PG_PVLIST PG_AVAIL3 /* mapping has entry on pvlist */
>
> /*
> * PCID assignments.
teach EPT pmaps how to use pv_lists