Download raw body.
Push `pageqlock' dances
Diff below pushes `pageqlock' dances inside uvm_page{de,}activate()
and uvm_pagewire() to avoid expensive locking as much as possible.
It also prepares the code for dealing with LRU updates in batches.
This builds on top of my previous changes for `wired_count' which is
a per page attribute protected (for managed pages) by the associated
owner lock. Checking this field inside the above mentioned functions
allows us to grab the `pageqlock' only if necessary.
I also added a check for already deactivated pages to avoid expensive
pmap_page_protect() operations.
Releasing the `pageqlock' in the page daemon and uvn_flush() is done to
avoid lock recursion. My mid term goal is to reduce these scopes. So
even if this is not pretty, it is going in the right direction.
ok?
Index: uvm/uvm_amap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_amap.c,v
diff -u -p -r1.97 uvm_amap.c
--- uvm/uvm_amap.c 25 May 2025 01:52:00 -0000 1.97
+++ uvm/uvm_amap.c 12 Nov 2025 11:28:56 -0000
@@ -805,9 +805,7 @@ ReStart:
*/
atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(npg, NULL);
- uvm_lock_pageq();
uvm_pageactivate(npg);
- uvm_unlock_pageq();
}
}
amap_unlock(amap);
Index: uvm/uvm_anon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_anon.c,v
diff -u -p -r1.64 uvm_anon.c
--- uvm/uvm_anon.c 27 Apr 2025 08:37:47 -0000 1.64
+++ uvm/uvm_anon.c 12 Nov 2025 11:28:56 -0000
@@ -208,9 +208,7 @@ uvm_anon_pagein(struct vm_amap *amap, st
/*
* Deactivate the page (to put it on a page queue).
*/
- uvm_lock_pageq();
uvm_pagedeactivate(pg);
- uvm_unlock_pageq();
rw_exit(anon->an_lock);
return FALSE;
Index: uvm/uvm_aobj.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
diff -u -p -r1.119 uvm_aobj.c
--- uvm/uvm_aobj.c 10 Nov 2025 10:53:53 -0000 1.119
+++ uvm/uvm_aobj.c 12 Nov 2025 11:28:56 -0000
@@ -919,18 +919,10 @@ uao_flush(struct uvm_object *uobj, voff_
* XXX in the future.
*/
case PGO_CLEANIT|PGO_FREE:
- /* FALLTHROUGH */
case PGO_CLEANIT|PGO_DEACTIVATE:
- /* FALLTHROUGH */
case PGO_DEACTIVATE:
deactivate_it:
- if (pg->wire_count != 0)
- continue;
-
- uvm_lock_pageq();
uvm_pagedeactivate(pg);
- uvm_unlock_pageq();
-
continue;
case PGO_FREE:
/*
@@ -1407,9 +1399,7 @@ uao_pagein_page(struct uvm_aobj *aobj, i
/*
* deactivate the page (to put it on a page queue).
*/
- uvm_lock_pageq();
uvm_pagedeactivate(pg);
- uvm_unlock_pageq();
return FALSE;
}
Index: uvm/uvm_fault.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
diff -u -p -r1.171 uvm_fault.c
--- uvm/uvm_fault.c 11 Sep 2025 17:04:35 -0000 1.171
+++ uvm/uvm_fault.c 12 Nov 2025 12:04:58 -0000
@@ -182,11 +182,7 @@ uvmfault_anonflush(struct vm_anon **anon
KASSERT(rw_lock_held(anons[lcv]->an_lock));
pg = anons[lcv]->an_page;
if (pg && (pg->pg_flags & PG_BUSY) == 0) {
- uvm_lock_pageq();
- if (pg->wire_count == 0) {
- uvm_pagedeactivate(pg);
- }
- uvm_unlock_pageq();
+ uvm_pagedeactivate(pg);
}
}
}
@@ -398,9 +394,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
* We have successfully read the page, activate it.
*/
pmap_clear_modify(pg);
- uvm_lock_pageq();
uvm_pageactivate(pg);
- uvm_unlock_pageq();
atomic_clearbits_int(&pg->pg_flags,
PG_WANTED|PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
@@ -979,9 +973,7 @@ uvm_fault_upper_lookup(struct uvm_faulti
*/
if (pg && (pg->pg_flags & (PG_RELEASED|PG_BUSY)) == 0 &&
!pmap_extract(ufi->orig_map->pmap, currva, &pa)) {
- uvm_lock_pageq();
uvm_pageactivate(pg); /* reactivate */
- uvm_unlock_pageq();
counters_inc(uvmexp_counters, flt_namap);
/* No fault-ahead when wired. */
@@ -1163,13 +1155,11 @@ retry:
/*
* ... update the page queues.
*/
- uvm_lock_pageq();
if (flt->wired) {
uvm_pagewire(pg);
} else {
uvm_pageactivate(pg);
}
- uvm_unlock_pageq();
if (flt->wired) {
/*
@@ -1254,11 +1244,7 @@ uvm_fault_lower_lookup(
* are neither busy nor released, so we don't need to check
* for this. we can just directly enter the pages.
*/
- if (pages[lcv]->wire_count == 0) {
- uvm_lock_pageq();
- uvm_pageactivate(pages[lcv]);
- uvm_unlock_pageq();
- }
+ uvm_pageactivate(pages[lcv]);
counters_inc(uvmexp_counters, flt_nomap);
/* No fault-ahead when wired. */
@@ -1389,9 +1375,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
/* update rusage counters */
curproc->p_ru.ru_minflt++;
if (uobjpage != PGO_DONTCARE) {
- uvm_lock_pageq();
uvm_pageactivate(uobjpage);
- uvm_unlock_pageq();
}
} else {
error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage);
@@ -1546,7 +1530,6 @@ uvm_fault_lower(struct uvm_faultinfo *uf
return ERESTART;
}
- uvm_lock_pageq();
if (flt->wired) {
uvm_pagewire(pg);
if (pg->pg_flags & PQ_AOBJ) {
@@ -1568,7 +1551,6 @@ uvm_fault_lower(struct uvm_faultinfo *uf
} else {
uvm_pageactivate(pg);
}
- uvm_unlock_pageq();
if (dropswap)
uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
@@ -1675,9 +1657,7 @@ uvm_fault_lower_io(
/* release the page now, still holding object lock */
if (pg != PGO_DONTCARE) {
- uvm_lock_pageq();
uvm_pageactivate(pg);
- uvm_unlock_pageq();
if (pg->pg_flags & PG_WANTED)
wakeup(pg);
@@ -1807,9 +1787,7 @@ uvm_fault_unwire_locked(vm_map_t map, va
pg = PHYS_TO_VM_PAGE(pa);
if (pg) {
- uvm_lock_pageq();
uvm_pageunwire(pg);
- uvm_unlock_pageq();
}
}
Index: uvm/uvm_map.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
diff -u -p -r1.348 uvm_map.c
--- uvm/uvm_map.c 9 Nov 2025 15:53:47 -0000 1.348
+++ uvm/uvm_map.c 12 Nov 2025 11:28:56 -0000
@@ -4438,16 +4438,8 @@ uvm_map_clean(struct vm_map *map, vaddr_
case PGO_CLEANIT|PGO_DEACTIVATE:
case PGO_DEACTIVATE:
deactivate_it:
- /* skip the page if it's wired */
- if (pg->wire_count != 0)
- break;
-
- uvm_lock_pageq();
-
KASSERT(pg->uanon == anon);
uvm_pagedeactivate(pg);
-
- uvm_unlock_pageq();
break;
case PGO_FREE:
/*
Index: uvm/uvm_object.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_object.c,v
diff -u -p -r1.27 uvm_object.c
--- uvm/uvm_object.c 10 Mar 2025 14:13:58 -0000 1.27
+++ uvm/uvm_object.c 12 Nov 2025 11:28:56 -0000
@@ -161,13 +161,11 @@ uvm_obj_wire(struct uvm_object *uobj, vo
}
/* Wire the pages */
- uvm_lock_pageq();
for (i = 0; i < npages; i++) {
uvm_pagewire(pgs[i]);
if (pageq != NULL)
TAILQ_INSERT_TAIL(pageq, pgs[i], pageq);
}
- uvm_unlock_pageq();
/* Unbusy the pages */
uvm_page_unbusy(pgs, npages);
@@ -198,7 +196,6 @@ uvm_obj_unwire(struct uvm_object *uobj,
off_t offset;
rw_enter(uobj->vmobjlock, RW_WRITE | RW_DUPOK);
- uvm_lock_pageq();
for (offset = start; offset < end; offset += PAGE_SIZE) {
pg = uvm_pagelookup(uobj, offset);
@@ -207,7 +204,6 @@ uvm_obj_unwire(struct uvm_object *uobj,
uvm_pageunwire(pg);
}
- uvm_unlock_pageq();
rw_exit(uobj->vmobjlock);
}
#endif /* !SMALL_KERNEL */
Index: uvm/uvm_page.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_page.c,v
diff -u -p -r1.183 uvm_page.c
--- uvm/uvm_page.c 27 Apr 2025 08:37:47 -0000 1.183
+++ uvm/uvm_page.c 12 Nov 2025 14:10:16 -0000
@@ -1221,17 +1221,16 @@ uvm_pagelookup(struct uvm_object *obj, v
/*
* uvm_pagewire: wire the page, thus removing it from the daemon's grasp
- *
- * => caller must lock page queues
*/
void
uvm_pagewire(struct vm_page *pg)
{
KASSERT(uvm_page_owner_locked_p(pg, TRUE));
- MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
if (pg->wire_count == 0) {
+ uvm_lock_pageq();
uvm_pagedequeue(pg);
+ uvm_unlock_pageq();
atomic_inc_int(&uvmexp.wired);
}
pg->wire_count++;
@@ -1241,13 +1240,11 @@ uvm_pagewire(struct vm_page *pg)
* uvm_pageunwire: unwire the page.
*
* => activate if wire count goes to zero.
- * => caller must lock page queues
*/
void
uvm_pageunwire(struct vm_page *pg)
{
KASSERT(uvm_page_owner_locked_p(pg, TRUE));
- MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
pg->wire_count--;
if (pg->wire_count == 0) {
@@ -1257,18 +1254,27 @@ uvm_pageunwire(struct vm_page *pg)
}
/*
- * uvm_pagedeactivate: deactivate page.
+ * uvm_pagedeactivate: deactivate page (unless wired)
*
- * => caller must lock page queues
- * => caller must check to make sure page is not wired
- * => object that page belongs to must be locked (so we can adjust pg->flags)
+ * => object that page belongs to must be locked
*/
void
uvm_pagedeactivate(struct vm_page *pg)
{
KASSERT(uvm_page_owner_locked_p(pg, FALSE));
- MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
+ if (pg->wire_count > 0) {
+ KASSERT((pg->pg_flags & (PQ_INACTIVE|PQ_ACTIVE)) == 0);
+ return;
+ }
+
+ uvm_lock_pageq();
+ if (pg->pg_flags & PQ_INACTIVE) {
+ uvm_unlock_pageq();
+ return;
+ }
+
+ /* Make sure next access to this page will fault. */
pmap_page_protect(pg, PROT_NONE);
if (pg->pg_flags & PQ_ACTIVE) {
@@ -1276,42 +1282,40 @@ uvm_pagedeactivate(struct vm_page *pg)
atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
uvmexp.active--;
}
- if ((pg->pg_flags & PQ_INACTIVE) == 0) {
- KASSERT(pg->wire_count == 0);
- TAILQ_INSERT_TAIL(&uvm.page_inactive, pg, pageq);
- atomic_setbits_int(&pg->pg_flags, PQ_INACTIVE);
- uvmexp.inactive++;
- pmap_clear_reference(pg);
- /*
- * update the "clean" bit. this isn't 100%
- * accurate, and doesn't have to be. we'll
- * re-sync it after we zap all mappings when
- * scanning the inactive list.
- */
- if ((pg->pg_flags & PG_CLEAN) != 0 &&
- pmap_is_modified(pg))
- atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
- }
+ TAILQ_INSERT_TAIL(&uvm.page_inactive, pg, pageq);
+ atomic_setbits_int(&pg->pg_flags, PQ_INACTIVE);
+ uvmexp.inactive++;
+ uvm_unlock_pageq();
+
+ pmap_clear_reference(pg);
+ /*
+ * update the "clean" bit. this isn't 100% accurate, and
+ * doesn't have to be. we'll re-sync it after we zap all
+ * mappings when scanning the inactive list.
+ */
+ if ((pg->pg_flags & PG_CLEAN) != 0 && pmap_is_modified(pg))
+ atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
}
/*
- * uvm_pageactivate: activate page
- *
- * => caller must lock page queues
+ * uvm_pageactivate: activate page (unless wired)
*/
void
uvm_pageactivate(struct vm_page *pg)
{
KASSERT(uvm_page_owner_locked_p(pg, FALSE));
- MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
-
- uvm_pagedequeue(pg);
- if (pg->wire_count == 0) {
- TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
- atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE);
- uvmexp.active++;
+ if (pg->wire_count > 0) {
+ KASSERT((pg->pg_flags & (PQ_INACTIVE|PQ_ACTIVE)) == 0);
+ return;
}
+
+ uvm_lock_pageq();
+ uvm_pagedequeue(pg);
+ TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
+ atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE);
+ uvmexp.active++;
+ uvm_unlock_pageq();
}
/*
@@ -1320,6 +1324,9 @@ uvm_pageactivate(struct vm_page *pg)
void
uvm_pagedequeue(struct vm_page *pg)
{
+ KASSERT(uvm_page_owner_locked_p(pg, FALSE));
+ MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
+
if (pg->pg_flags & PQ_ACTIVE) {
TAILQ_REMOVE(&uvm.page_active, pg, pageq);
atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
diff -u -p -r1.138 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c 5 Oct 2025 14:13:22 -0000 1.138
+++ uvm/uvm_pdaemon.c 12 Nov 2025 11:28:56 -0000
@@ -592,8 +592,10 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
* and skip to next page.
*/
if (pmap_is_referenced(p)) {
+ uvm_unlock_pageq();
uvm_pageactivate(p);
rw_exit(slock);
+ uvm_lock_pageq();
uvmexp.pdreact++;
continue;
}
@@ -664,8 +666,10 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
*/
if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
dirtyreacts++;
+ uvm_unlock_pageq();
uvm_pageactivate(p);
rw_exit(slock);
+ uvm_lock_pageq();
continue;
}
@@ -867,13 +871,9 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
uvm_anfree(anon); /* kills anon */
pmap_page_protect(p, PROT_NONE);
anon = NULL;
- uvm_lock_pageq();
- /* dequeue first to prevent lock recursion */
- uvm_pagedequeue(p);
/* free released page */
uvm_pagefree(p);
} else { /* page was not released during I/O */
- uvm_lock_pageq();
if (result != VM_PAGER_OK) {
/* pageout was a failure... */
if (result != VM_PAGER_AGAIN)
@@ -888,13 +888,12 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
}
}
rw_exit(slock);
- } else {
- /*
- * lock page queues here just so they're always locked
- * at the end of the loop.
- */
- uvm_lock_pageq();
}
+ /*
+ * lock page queues here just so they're always locked
+ * at the end of the loop.
+ */
+ uvm_lock_pageq();
}
TAILQ_REMOVE(pglst, &iter, pageq);
@@ -1004,8 +1003,9 @@ uvmpd_scan_active(struct uvm_pmalloc *pm
* inactive pages.
*/
if (inactive_shortage > 0) {
- /* no need to check wire_count as pg is "active" */
+ uvm_unlock_pageq();
uvm_pagedeactivate(p);
+ uvm_lock_pageq();
uvmexp.pddeact++;
inactive_shortage--;
}
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
diff -u -p -r1.148 uvm_vnode.c
--- uvm/uvm_vnode.c 10 Nov 2025 15:53:06 -0000 1.148
+++ uvm/uvm_vnode.c 12 Nov 2025 11:28:56 -0000
@@ -663,9 +663,9 @@ uvn_flush(struct uvm_object *uobj, voff_
/* if we don't need a clean, deactivate/free pages then cont. */
if (!needs_clean) {
if (flags & PGO_DEACTIVATE) {
- if (pp->wire_count == 0) {
- uvm_pagedeactivate(pp);
- }
+ uvm_unlock_pageq();
+ uvm_pagedeactivate(pp);
+ uvm_lock_pageq();
} else if (flags & PGO_FREE) {
if (pp->pg_flags & PG_BUSY) {
uvm_unlock_pageq();
@@ -788,9 +788,9 @@ ReTry:
/* dispose of page */
if (flags & PGO_DEACTIVATE) {
- if (ptmp->wire_count == 0) {
- uvm_pagedeactivate(ptmp);
- }
+ uvm_unlock_pageq();
+ uvm_pagedeactivate(ptmp);
+ uvm_lock_pageq();
} else if (flags & PGO_FREE &&
result != VM_PAGER_PEND) {
if (result != VM_PAGER_OK) {
Push `pageqlock' dances