Download raw body.
teach EPT pmaps how to use pv_lists
Dave Voutila <dv@sisu.io> writes:
Updated diff below with changes suggested by mlarkin@. Testers and oks
needed :D
> Looking for some testing (of vmm/vmd on Intel hosts) and review of the
> following diff.
>
> In short, the objective is to incrementally bring EPT pmap operations
> more in line with regular amd64 and AMD RVI pmaps. This diffs:
>
> - updates pmap_enter_ept() to add pv_entry's when inserting a pte in an
> EPT pmap
>
> - updates pmap_do_remove_ept() to support removing pv_entry's when
> removing a pte from an EPT pmap
>
> - updates pmap_page_remove() to support zapping an EPT pte if a given
> UVM vm_page is being removed and has a pv_entry pointing to an EPT pte
>
> For anyone reviewing:
>
> - I needed to change the PG_PVLIST bit so it would still be valid in an
> EPT pte, so I just swapped for PG_AVAIL3 as it looks available for use
> in EPT ptes
>
> - Validity checks on EPT ptes are not currently performed. For
> correctness, one cannot just use the same PG_V bit as EPT ptes are
> special and support true X-only permissions (in addition to breaking
> out R from W). That should be a different diff IMO.
>
> I'm not a big fan of throwing more junk directly into
> pmap_page_remove(), but given how it's designed it feels like the most
> sane place to put the logic for now.
>
>
diffstat refs/heads/master refs/heads/ept-pvlist-3
M sys/arch/amd64/amd64/pmap.c | 91+ 14-
M sys/arch/amd64/include/pmap.h | 2+ 2-
2 files changed, 93 insertions(+), 16 deletions(-)
diff refs/heads/master refs/heads/ept-pvlist-3
commit - e2c061ecb9e807859d3525b9e868ae5d227b3878
commit + 4791f1f2f408c10c66cde3d074c062c2185f2a2a
blob - bfd667f99bf2dec1a468ca3d8d4f95bf34401026
blob + 01b8f6c2915c26427a9a1e8449fbaee452e5b17b
--- sys/arch/amd64/amd64/pmap.c
+++ sys/arch/amd64/amd64/pmap.c
@@ -336,8 +336,8 @@ struct pv_entry *pmap_remove_pv(struct vm_page *, stru
void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
#if NVMM > 0
void pmap_remove_ept(struct pmap *, vaddr_t, vaddr_t);
-void pmap_do_remove_ept(struct pmap *, vaddr_t);
-int pmap_enter_ept(struct pmap *, vaddr_t, paddr_t, vm_prot_t);
+void pmap_do_remove_ept(struct pmap *, vaddr_t, struct pv_entry **);
+int pmap_enter_ept(struct pmap *, vaddr_t, paddr_t, vm_prot_t, int);
void pmap_shootept(struct pmap *, int);
#endif /* NVMM > 0 */
int pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
@@ -1970,6 +1970,40 @@ pmap_page_remove(struct vm_page *pg)
pm = pve->pv_pmap;
mtx_leave(&pg->mdpage.pv_mtx);
+#if NVMM > 0
+ if (pm->pm_type == PMAP_TYPE_EPT) {
+ mtx_enter(&pm->pm_mtx);
+
+ /*
+ * See note below about dropping pvlist lock.
+ */
+ mtx_enter(&pg->mdpage.pv_mtx);
+ if ((pve = pg->mdpage.pv_list) == NULL ||
+ pve->pv_pmap != pm) {
+ mtx_leave(&pg->mdpage.pv_mtx);
+ mtx_leave(&pm->pm_mtx);
+ pmap_destroy(pm);
+ mtx_enter(&pg->mdpage.pv_mtx);
+ continue;
+ }
+
+ /* Detach the pv_entry from the pv_list. */
+ pg->mdpage.pv_list = pve->pv_next;
+ mtx_leave(&pg->mdpage.pv_mtx);
+
+ /* Zap it. */
+ pmap_do_remove_ept(pm, pve->pv_va, NULL);
+ pmap_shootept(pm, 1);
+
+ /* Clean up before next iteration. */
+ mtx_leave(&pm->pm_mtx);
+ pmap_destroy(pm);
+ pool_put(&pmap_pv_pool, pve);
+ mtx_enter(&pg->mdpage.pv_mtx);
+ continue;
+ }
+#endif /* NVMM > 0 */
+
/* XXX use direct map? */
scr3 = pmap_map_ptes(pm); /* locks pmap */
shootself = (scr3 == 0);
@@ -2439,29 +2473,36 @@ void
pmap_remove_ept(struct pmap *pmap, vaddr_t sgpa, vaddr_t egpa)
{
vaddr_t v;
+ struct pv_entry *pve, *free_pvs = NULL;
mtx_enter(&pmap->pm_mtx);
DPRINTF("%s: sgpa=0x%llx egpa=0x%llx\n", __func__, (uint64_t)sgpa,
(uint64_t)egpa);
for (v = sgpa; v < egpa + PAGE_SIZE; v += PAGE_SIZE)
- pmap_do_remove_ept(pmap, v);
+ pmap_do_remove_ept(pmap, v, &free_pvs);
pmap_shootept(pmap, 1);
mtx_leave(&pmap->pm_mtx);
-
pmap_tlb_shootwait();
+
+ while ((pve = free_pvs) != NULL) {
+ free_pvs = pve->pv_next;
+ pool_put(&pmap_pv_pool, pve);
+ }
}
void
-pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
+pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa, struct pv_entry **free_pvs)
{
uint64_t l4idx, l3idx, l2idx, l1idx;
- struct vm_page *pg3, *pg2, *pg1;
+ struct vm_page *pg, *pg3, *pg2, *pg1;
+ struct pv_entry *pve;
paddr_t npa3, npa2, npa1;
pd_entry_t *pd4, *pd3, *pd2, *pd1;
pd_entry_t *pptes;
+ pt_entry_t opte;
MUTEX_ASSERT_LOCKED(&pmap->pm_mtx);
@@ -2497,10 +2538,23 @@ pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
pd1 = (pd_entry_t *)PMAP_DIRECT_MAP(npa1);
pg1 = PHYS_TO_VM_PAGE(npa1);
- if (pd1[l1idx] == 0)
+ opte = pmap_pte_set(&pd1[l1idx], 0);
+ if (opte == 0) /* XXX check using an EPT valid bit. */
return;
- pd1[l1idx] = 0;
+ if (opte & PG_PVLIST) {
+ pg = PHYS_TO_VM_PAGE(opte & pg_frame);
+ if (pg == NULL)
+ panic("%s: PG_PVLIST mapping with unmanaged page: gpa "
+ "0x%lx, opte 0x%llx, hpa 0x%llx", __func__, gpa,
+ opte, opte & pg_frame);
+ pve = pmap_remove_pv(pg, pmap, gpa);
+ if (pve != NULL && free_pvs != NULL) {
+ pve->pv_next = *free_pvs;
+ *free_pvs = pve;
+ }
+ }
+
pg1->wire_count--;
pmap->pm_stats.resident_count--;
@@ -2535,11 +2589,14 @@ pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
}
int
-pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t hpa, vm_prot_t prot)
+pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t hpa, vm_prot_t prot,
+ int flags)
{
uint64_t l4idx, l3idx, l2idx, l1idx;
- pd_entry_t *pd, npte;
- struct vm_page *ptp, *pptp;
+ pd_entry_t *pd;
+ pt_entry_t opte, npte;
+ struct vm_page *pg, *ptp, *pptp;
+ struct pv_entry *pve;
paddr_t npa;
struct uvm_object *obj;
int ret = 0;
@@ -2552,6 +2609,13 @@ pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t
l2idx = (gpa & L2_MASK) >> L2_SHIFT; /* PDE idx */
l1idx = (gpa & L1_MASK) >> L1_SHIFT; /* PTE idx */
+ pve = pool_get(&pmap_pv_pool, PR_NOWAIT);
+ if (pve == NULL) {
+ if (flags & PMAP_CANFAIL)
+ return ENOMEM;
+ panic("%s: no pv entries available", __func__);
+ }
+
mtx_enter(&pmap->pm_mtx);
/* Start at PML4 / top level */
@@ -2699,11 +2763,21 @@ pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t
if (prot & PROT_EXEC)
npte |= EPT_X;
- if (pd[l1idx] == 0) {
+ opte = pd[l1idx]; /* XXX check an EPT valid bit */
+ if (opte == 0) {
ptp->wire_count++;
pmap->pm_stats.resident_count++;
+
+ pg = PHYS_TO_VM_PAGE(hpa);
+ if (pg != NULL) {
+ pmap_enter_pv(pg, pve, pmap, gpa, ptp);
+ npte |= PG_PVLIST;
+ pve = NULL;
+ } else
+ panic("%s: cannot find vm page", __func__);
} else {
- /* XXX flush ept */
+ pmap_shootept(pmap, 1);
+ pmap_tlb_shootwait();
}
pd[l1idx] = npte;
@@ -2711,6 +2785,9 @@ pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t
unlock:
mtx_leave(&pmap->pm_mtx);
+ if (pve != NULL)
+ pool_put(&pmap_pv_pool, pve);
+
return ret;
}
#endif /* NVMM > 0 */
@@ -2737,7 +2814,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa,
#if NVMM > 0
if (pmap_is_ept(pmap))
- return pmap_enter_ept(pmap, va, pa, prot);
+ return pmap_enter_ept(pmap, va, pa, prot, flags);
#endif /* NVMM > 0 */
KASSERT(!(wc && nocache));
blob - 1f1bb8159498669fb74d00e8fc91f7a5691a90e4
blob + 22646ec8dbc70cd471fc6f0a46dfca39ef8ca418
--- sys/arch/amd64/include/pmap.h
+++ sys/arch/amd64/include/pmap.h
@@ -245,8 +245,8 @@
*/
#define PG_W PG_AVAIL1 /* "wired" mapping */
-#define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */
-/* PG_AVAIL3 not used */
+/* PG_AVAIL2 not used */
+#define PG_PVLIST PG_AVAIL3 /* mapping has entry on pvlist */
/*
* PCID assignments.
teach EPT pmaps how to use pv_lists