Download raw body.
teach EPT pmaps how to use pv_lists
Looking for some testing (of vmm/vmd on Intel hosts) and review of the
following diff.
In short, the objective is to incrementally bring EPT pmap operations
more in line with regular amd64 and AMD RVI pmaps. This diffs:
- updates pmap_enter_ept() to add pv_entry's when inserting a pte in an
EPT pmap
- updates pmap_do_remove_ept() to support removing pv_entry's when
removing a pte from an EPT pmap
- updates pmap_page_remove() to support zapping an EPT pte if a given
UVM vm_page is being removed and has a pv_entry pointing to an EPT pte
For anyone reviewing:
- I needed to change the PG_PVLIST bit so it would still be valid in an
EPT pte, so I just swapped for PG_AVAIL3 as it looks available for use
in EPT ptes
- Validity checks on EPT ptes are not currently performed. For
correctness, one cannot just use the same PG_V bit as EPT ptes are
special and support true X-only permissions (in addition to breaking
out R from W). That should be a different diff IMO.
I'm not a big fan of throwing more junk directly into
pmap_page_remove(), but given how it's designed it feels like the most
sane place to put the logic for now.
diffstat refs/heads/master refs/heads/ept-pvlist-3
M sys/arch/amd64/amd64/pmap.c | 90+ 14-
M sys/arch/amd64/include/pmap.h | 2+ 2-
2 files changed, 92 insertions(+), 16 deletions(-)
diff refs/heads/master refs/heads/ept-pvlist-3
commit - 3c9ccb50785e1c693c25fd18c467a46499c73493
commit + 1fd874b9a12aadcaf92afdc5c9caa94bb5970c27
blob - 35af9d10cdc6cb79f6288b964bc350a46bfce8d6
blob + 4e843b19983e2a3c277c8cc9aea8165746c80aea
--- sys/arch/amd64/amd64/pmap.c
+++ sys/arch/amd64/amd64/pmap.c
@@ -336,8 +336,8 @@ struct pv_entry *pmap_remove_pv(struct vm_page *, stru
void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int);
#if NVMM > 0
void pmap_remove_ept(struct pmap *, vaddr_t, vaddr_t);
-void pmap_do_remove_ept(struct pmap *, vaddr_t);
-int pmap_enter_ept(struct pmap *, vaddr_t, paddr_t, vm_prot_t);
+void pmap_do_remove_ept(struct pmap *, vaddr_t, struct pv_entry **);
+int pmap_enter_ept(struct pmap *, vaddr_t, paddr_t, vm_prot_t, int);
void pmap_shootept(struct pmap *, int);
#endif /* NVMM > 0 */
int pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
@@ -1970,6 +1970,39 @@ pmap_page_remove(struct vm_page *pg)
pm = pve->pv_pmap;
mtx_leave(&pg->mdpage.pv_mtx);
+#if NVMM > 0
+ if (pm->pm_type == PMAP_TYPE_EPT) {
+ mtx_enter(&pm->pm_mtx);
+
+ /*
+ * See note below about dropping pvlist lock.
+ */
+ mtx_enter(&pg->mdpage.pv_mtx);
+ if ((pve = pg->mdpage.pv_list) == NULL ||
+ pve->pv_pmap != pm) {
+ mtx_leave(&pg->mdpage.pv_mtx);
+ mtx_leave(&pm->pm_mtx);
+ pmap_destroy(pm);
+ mtx_enter(&pg->mdpage.pv_mtx);
+ }
+
+ /* Detach the pv_entry from the pv_list. */
+ pg->mdpage.pv_list = pve->pv_next;
+ mtx_leave(&pg->mdpage.pv_mtx);
+
+ /* Zap it. */
+ pmap_do_remove_ept(pm, pve->pv_va, NULL);
+ pmap_shootept(pm, 1);
+
+ /* Clean up before next iteration. */
+ mtx_leave(&pm->pm_mtx);
+ pmap_destroy(pm);
+ pool_put(&pmap_pv_pool, pve);
+ mtx_enter(&pg->mdpage.pv_mtx);
+ continue;
+ }
+#endif /* NVMM > 0 */
+
/* XXX use direct map? */
scr3 = pmap_map_ptes(pm); /* locks pmap */
shootself = (scr3 == 0);
@@ -2460,29 +2493,36 @@ void
pmap_remove_ept(struct pmap *pmap, vaddr_t sgpa, vaddr_t egpa)
{
vaddr_t v;
+ struct pv_entry *pve, *free_pvs = NULL;
mtx_enter(&pmap->pm_mtx);
DPRINTF("%s: sgpa=0x%llx egpa=0x%llx\n", __func__, (uint64_t)sgpa,
(uint64_t)egpa);
for (v = sgpa; v < egpa + PAGE_SIZE; v += PAGE_SIZE)
- pmap_do_remove_ept(pmap, v);
+ pmap_do_remove_ept(pmap, v, &free_pvs);
pmap_shootept(pmap, 1);
mtx_leave(&pmap->pm_mtx);
-
pmap_tlb_shootwait();
+
+ while ((pve = free_pvs) != NULL) {
+ free_pvs = pve->pv_next;
+ pool_put(&pmap_pv_pool, pve);
+ }
}
void
-pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
+pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa, struct pv_entry **free_pvs)
{
uint64_t l4idx, l3idx, l2idx, l1idx;
- struct vm_page *pg3, *pg2, *pg1;
+ struct vm_page *pg, *pg3, *pg2, *pg1;
+ struct pv_entry *pve;
paddr_t npa3, npa2, npa1;
pd_entry_t *pd4, *pd3, *pd2, *pd1;
pd_entry_t *pptes;
+ pt_entry_t opte;
MUTEX_ASSERT_LOCKED(&pmap->pm_mtx);
@@ -2518,10 +2558,23 @@ pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
pd1 = (pd_entry_t *)PMAP_DIRECT_MAP(npa1);
pg1 = PHYS_TO_VM_PAGE(npa1);
- if (pd1[l1idx] == 0)
+ opte = pmap_pte_set(&pd1[l1idx], 0);
+ if (opte == 0) // XXX check using an EPT valid bit.
return;
- pd1[l1idx] = 0;
+ if (opte & PG_PVLIST) {
+ pg = PHYS_TO_VM_PAGE(opte & pg_frame);
+ if (pg == NULL)
+ panic("%s: PG_PVLIST mapping with unmanaged page: gpa "
+ "0x%lx, opte 0x%llx, hpa 0x%llx", __func__, gpa,
+ opte, opte & pg_frame);
+ pve = pmap_remove_pv(pg, pmap, gpa);
+ if (pve != NULL && free_pvs != NULL) {
+ pve->pv_next = *free_pvs;
+ *free_pvs = pve;
+ }
+ }
+
pg1->wire_count--;
pmap->pm_stats.resident_count--;
@@ -2556,11 +2609,14 @@ pmap_do_remove_ept(struct pmap *pmap, paddr_t gpa)
}
int
-pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t hpa, vm_prot_t prot)
+pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t hpa, vm_prot_t prot,
+ int flags)
{
uint64_t l4idx, l3idx, l2idx, l1idx;
- pd_entry_t *pd, npte;
- struct vm_page *ptp, *pptp;
+ pd_entry_t *pd;
+ pt_entry_t opte, npte;
+ struct vm_page *pg, *ptp, *pptp;
+ struct pv_entry *pve;
paddr_t npa;
struct uvm_object *obj;
int ret = 0;
@@ -2573,6 +2629,13 @@ pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t
l2idx = (gpa & L2_MASK) >> L2_SHIFT; /* PDE idx */
l1idx = (gpa & L1_MASK) >> L1_SHIFT; /* PTE idx */
+ pve = pool_get(&pmap_pv_pool, PR_NOWAIT);
+ if (pve == NULL) {
+ if (flags & PMAP_CANFAIL)
+ return ENOMEM;
+ panic("%s: no pv entries available", __func__);
+ }
+
mtx_enter(&pmap->pm_mtx);
/* Start at PML4 / top level */
@@ -2720,11 +2783,21 @@ pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t
if (prot & PROT_EXEC)
npte |= EPT_X;
- if (pd[l1idx] == 0) {
+ opte = pd[l1idx]; // XXX check an EPT valid bit
+ if (opte == 0) {
ptp->wire_count++;
pmap->pm_stats.resident_count++;
+
+ pg = PHYS_TO_VM_PAGE(hpa);
+ if (pg != NULL) {
+ pmap_enter_pv(pg, pve, pmap, gpa, ptp);
+ npte |= PG_PVLIST;
+ pve = NULL;
+ } else
+ panic("%s: cannot find vm page", __func__);
} else {
- /* XXX flush ept */
+ pmap_shootept(pmap, 1);
+ pmap_tlb_shootwait();
}
pd[l1idx] = npte;
@@ -2732,6 +2805,9 @@ pmap_enter_ept(struct pmap *pmap, paddr_t gpa, paddr_t
unlock:
mtx_leave(&pmap->pm_mtx);
+ if (pve != NULL)
+ pool_put(&pmap_pv_pool, pve);
+
return ret;
}
#endif /* NVMM > 0 */
@@ -2758,7 +2834,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa,
#if NVMM > 0
if (pmap_is_ept(pmap))
- return pmap_enter_ept(pmap, va, pa, prot);
+ return pmap_enter_ept(pmap, va, pa, prot, flags);
#endif /* NVMM > 0 */
KASSERT(!(wc && nocache));
blob - cc358eab1e93376f4d0b511d0c0bcd06aa476b40
blob + 6eeab997123e6d10a9dddde3f55bf25950f43978
--- sys/arch/amd64/include/pmap.h
+++ sys/arch/amd64/include/pmap.h
@@ -245,8 +245,8 @@
*/
#define PG_W PG_AVAIL1 /* "wired" mapping */
-#define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */
-/* PG_AVAIL3 not used */
+/* PG_AVAIL2 not used */
+#define PG_PVLIST PG_AVAIL3 /* mapping has entry on pvlist */
/*
* PCID assignments.
teach EPT pmaps how to use pv_lists