From: Martin Pieuchot Subject: Push `pageqlock' dances To: tech@openbsd.org Date: Wed, 12 Nov 2025 14:17:45 +0000 Diff below pushes `pageqlock' dances inside uvm_page{de,}activate() and uvm_pagewire() to avoid expensive locking as much as possible. It also prepares the code for dealing with LRU updates in batches. This builds on top of my previous changes for `wired_count' which is a per page attribute protected (for managed pages) by the associated owner lock. Checking this field inside the above mentioned functions allows us to grab the `pageqlock' only if necessary. I also added a check for already deactivated pages to avoid expensive pmap_page_protect() operations. Releasing the `pageqlock' in the page daemon and uvn_flush() is done to avoid lock recursion. My mid term goal is to reduce these scopes. So even if this is not pretty, it is going in the right direction. ok? Index: uvm/uvm_amap.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_amap.c,v diff -u -p -r1.97 uvm_amap.c --- uvm/uvm_amap.c 25 May 2025 01:52:00 -0000 1.97 +++ uvm/uvm_amap.c 12 Nov 2025 11:28:56 -0000 @@ -805,9 +805,7 @@ ReStart: */ atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE); UVM_PAGE_OWN(npg, NULL); - uvm_lock_pageq(); uvm_pageactivate(npg); - uvm_unlock_pageq(); } } amap_unlock(amap); Index: uvm/uvm_anon.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_anon.c,v diff -u -p -r1.64 uvm_anon.c --- uvm/uvm_anon.c 27 Apr 2025 08:37:47 -0000 1.64 +++ uvm/uvm_anon.c 12 Nov 2025 11:28:56 -0000 @@ -208,9 +208,7 @@ uvm_anon_pagein(struct vm_amap *amap, st /* * Deactivate the page (to put it on a page queue). */ - uvm_lock_pageq(); uvm_pagedeactivate(pg); - uvm_unlock_pageq(); rw_exit(anon->an_lock); return FALSE; Index: uvm/uvm_aobj.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v diff -u -p -r1.119 uvm_aobj.c --- uvm/uvm_aobj.c 10 Nov 2025 10:53:53 -0000 1.119 +++ uvm/uvm_aobj.c 12 Nov 2025 11:28:56 -0000 @@ -919,18 +919,10 @@ uao_flush(struct uvm_object *uobj, voff_ * XXX in the future. */ case PGO_CLEANIT|PGO_FREE: - /* FALLTHROUGH */ case PGO_CLEANIT|PGO_DEACTIVATE: - /* FALLTHROUGH */ case PGO_DEACTIVATE: deactivate_it: - if (pg->wire_count != 0) - continue; - - uvm_lock_pageq(); uvm_pagedeactivate(pg); - uvm_unlock_pageq(); - continue; case PGO_FREE: /* @@ -1407,9 +1399,7 @@ uao_pagein_page(struct uvm_aobj *aobj, i /* * deactivate the page (to put it on a page queue). */ - uvm_lock_pageq(); uvm_pagedeactivate(pg); - uvm_unlock_pageq(); return FALSE; } Index: uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v diff -u -p -r1.171 uvm_fault.c --- uvm/uvm_fault.c 11 Sep 2025 17:04:35 -0000 1.171 +++ uvm/uvm_fault.c 12 Nov 2025 12:04:58 -0000 @@ -182,11 +182,7 @@ uvmfault_anonflush(struct vm_anon **anon KASSERT(rw_lock_held(anons[lcv]->an_lock)); pg = anons[lcv]->an_page; if (pg && (pg->pg_flags & PG_BUSY) == 0) { - uvm_lock_pageq(); - if (pg->wire_count == 0) { - uvm_pagedeactivate(pg); - } - uvm_unlock_pageq(); + uvm_pagedeactivate(pg); } } } @@ -398,9 +394,7 @@ uvmfault_anonget(struct uvm_faultinfo *u * We have successfully read the page, activate it. */ pmap_clear_modify(pg); - uvm_lock_pageq(); uvm_pageactivate(pg); - uvm_unlock_pageq(); atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); @@ -979,9 +973,7 @@ uvm_fault_upper_lookup(struct uvm_faulti */ if (pg && (pg->pg_flags & (PG_RELEASED|PG_BUSY)) == 0 && !pmap_extract(ufi->orig_map->pmap, currva, &pa)) { - uvm_lock_pageq(); uvm_pageactivate(pg); /* reactivate */ - uvm_unlock_pageq(); counters_inc(uvmexp_counters, flt_namap); /* No fault-ahead when wired. */ @@ -1163,13 +1155,11 @@ retry: /* * ... update the page queues. */ - uvm_lock_pageq(); if (flt->wired) { uvm_pagewire(pg); } else { uvm_pageactivate(pg); } - uvm_unlock_pageq(); if (flt->wired) { /* @@ -1254,11 +1244,7 @@ uvm_fault_lower_lookup( * are neither busy nor released, so we don't need to check * for this. we can just directly enter the pages. */ - if (pages[lcv]->wire_count == 0) { - uvm_lock_pageq(); - uvm_pageactivate(pages[lcv]); - uvm_unlock_pageq(); - } + uvm_pageactivate(pages[lcv]); counters_inc(uvmexp_counters, flt_nomap); /* No fault-ahead when wired. */ @@ -1389,9 +1375,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf /* update rusage counters */ curproc->p_ru.ru_minflt++; if (uobjpage != PGO_DONTCARE) { - uvm_lock_pageq(); uvm_pageactivate(uobjpage); - uvm_unlock_pageq(); } } else { error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage); @@ -1546,7 +1530,6 @@ uvm_fault_lower(struct uvm_faultinfo *uf return ERESTART; } - uvm_lock_pageq(); if (flt->wired) { uvm_pagewire(pg); if (pg->pg_flags & PQ_AOBJ) { @@ -1568,7 +1551,6 @@ uvm_fault_lower(struct uvm_faultinfo *uf } else { uvm_pageactivate(pg); } - uvm_unlock_pageq(); if (dropswap) uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); @@ -1675,9 +1657,7 @@ uvm_fault_lower_io( /* release the page now, still holding object lock */ if (pg != PGO_DONTCARE) { - uvm_lock_pageq(); uvm_pageactivate(pg); - uvm_unlock_pageq(); if (pg->pg_flags & PG_WANTED) wakeup(pg); @@ -1807,9 +1787,7 @@ uvm_fault_unwire_locked(vm_map_t map, va pg = PHYS_TO_VM_PAGE(pa); if (pg) { - uvm_lock_pageq(); uvm_pageunwire(pg); - uvm_unlock_pageq(); } } Index: uvm/uvm_map.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_map.c,v diff -u -p -r1.348 uvm_map.c --- uvm/uvm_map.c 9 Nov 2025 15:53:47 -0000 1.348 +++ uvm/uvm_map.c 12 Nov 2025 11:28:56 -0000 @@ -4438,16 +4438,8 @@ uvm_map_clean(struct vm_map *map, vaddr_ case PGO_CLEANIT|PGO_DEACTIVATE: case PGO_DEACTIVATE: deactivate_it: - /* skip the page if it's wired */ - if (pg->wire_count != 0) - break; - - uvm_lock_pageq(); - KASSERT(pg->uanon == anon); uvm_pagedeactivate(pg); - - uvm_unlock_pageq(); break; case PGO_FREE: /* Index: uvm/uvm_object.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_object.c,v diff -u -p -r1.27 uvm_object.c --- uvm/uvm_object.c 10 Mar 2025 14:13:58 -0000 1.27 +++ uvm/uvm_object.c 12 Nov 2025 11:28:56 -0000 @@ -161,13 +161,11 @@ uvm_obj_wire(struct uvm_object *uobj, vo } /* Wire the pages */ - uvm_lock_pageq(); for (i = 0; i < npages; i++) { uvm_pagewire(pgs[i]); if (pageq != NULL) TAILQ_INSERT_TAIL(pageq, pgs[i], pageq); } - uvm_unlock_pageq(); /* Unbusy the pages */ uvm_page_unbusy(pgs, npages); @@ -198,7 +196,6 @@ uvm_obj_unwire(struct uvm_object *uobj, off_t offset; rw_enter(uobj->vmobjlock, RW_WRITE | RW_DUPOK); - uvm_lock_pageq(); for (offset = start; offset < end; offset += PAGE_SIZE) { pg = uvm_pagelookup(uobj, offset); @@ -207,7 +204,6 @@ uvm_obj_unwire(struct uvm_object *uobj, uvm_pageunwire(pg); } - uvm_unlock_pageq(); rw_exit(uobj->vmobjlock); } #endif /* !SMALL_KERNEL */ Index: uvm/uvm_page.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_page.c,v diff -u -p -r1.183 uvm_page.c --- uvm/uvm_page.c 27 Apr 2025 08:37:47 -0000 1.183 +++ uvm/uvm_page.c 12 Nov 2025 14:10:16 -0000 @@ -1221,17 +1221,16 @@ uvm_pagelookup(struct uvm_object *obj, v /* * uvm_pagewire: wire the page, thus removing it from the daemon's grasp - * - * => caller must lock page queues */ void uvm_pagewire(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg, TRUE)); - MUTEX_ASSERT_LOCKED(&uvm.pageqlock); if (pg->wire_count == 0) { + uvm_lock_pageq(); uvm_pagedequeue(pg); + uvm_unlock_pageq(); atomic_inc_int(&uvmexp.wired); } pg->wire_count++; @@ -1241,13 +1240,11 @@ uvm_pagewire(struct vm_page *pg) * uvm_pageunwire: unwire the page. * * => activate if wire count goes to zero. - * => caller must lock page queues */ void uvm_pageunwire(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg, TRUE)); - MUTEX_ASSERT_LOCKED(&uvm.pageqlock); pg->wire_count--; if (pg->wire_count == 0) { @@ -1257,18 +1254,27 @@ uvm_pageunwire(struct vm_page *pg) } /* - * uvm_pagedeactivate: deactivate page. + * uvm_pagedeactivate: deactivate page (unless wired) * - * => caller must lock page queues - * => caller must check to make sure page is not wired - * => object that page belongs to must be locked (so we can adjust pg->flags) + * => object that page belongs to must be locked */ void uvm_pagedeactivate(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg, FALSE)); - MUTEX_ASSERT_LOCKED(&uvm.pageqlock); + if (pg->wire_count > 0) { + KASSERT((pg->pg_flags & (PQ_INACTIVE|PQ_ACTIVE)) == 0); + return; + } + + uvm_lock_pageq(); + if (pg->pg_flags & PQ_INACTIVE) { + uvm_unlock_pageq(); + return; + } + + /* Make sure next access to this page will fault. */ pmap_page_protect(pg, PROT_NONE); if (pg->pg_flags & PQ_ACTIVE) { @@ -1276,42 +1282,40 @@ uvm_pagedeactivate(struct vm_page *pg) atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE); uvmexp.active--; } - if ((pg->pg_flags & PQ_INACTIVE) == 0) { - KASSERT(pg->wire_count == 0); - TAILQ_INSERT_TAIL(&uvm.page_inactive, pg, pageq); - atomic_setbits_int(&pg->pg_flags, PQ_INACTIVE); - uvmexp.inactive++; - pmap_clear_reference(pg); - /* - * update the "clean" bit. this isn't 100% - * accurate, and doesn't have to be. we'll - * re-sync it after we zap all mappings when - * scanning the inactive list. - */ - if ((pg->pg_flags & PG_CLEAN) != 0 && - pmap_is_modified(pg)) - atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); - } + TAILQ_INSERT_TAIL(&uvm.page_inactive, pg, pageq); + atomic_setbits_int(&pg->pg_flags, PQ_INACTIVE); + uvmexp.inactive++; + uvm_unlock_pageq(); + + pmap_clear_reference(pg); + /* + * update the "clean" bit. this isn't 100% accurate, and + * doesn't have to be. we'll re-sync it after we zap all + * mappings when scanning the inactive list. + */ + if ((pg->pg_flags & PG_CLEAN) != 0 && pmap_is_modified(pg)) + atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); } /* - * uvm_pageactivate: activate page - * - * => caller must lock page queues + * uvm_pageactivate: activate page (unless wired) */ void uvm_pageactivate(struct vm_page *pg) { KASSERT(uvm_page_owner_locked_p(pg, FALSE)); - MUTEX_ASSERT_LOCKED(&uvm.pageqlock); - - uvm_pagedequeue(pg); - if (pg->wire_count == 0) { - TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq); - atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE); - uvmexp.active++; + if (pg->wire_count > 0) { + KASSERT((pg->pg_flags & (PQ_INACTIVE|PQ_ACTIVE)) == 0); + return; } + + uvm_lock_pageq(); + uvm_pagedequeue(pg); + TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq); + atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE); + uvmexp.active++; + uvm_unlock_pageq(); } /* @@ -1320,6 +1324,9 @@ uvm_pageactivate(struct vm_page *pg) void uvm_pagedequeue(struct vm_page *pg) { + KASSERT(uvm_page_owner_locked_p(pg, FALSE)); + MUTEX_ASSERT_LOCKED(&uvm.pageqlock); + if (pg->pg_flags & PQ_ACTIVE) { TAILQ_REMOVE(&uvm.page_active, pg, pageq); atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE); Index: uvm/uvm_pdaemon.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v diff -u -p -r1.138 uvm_pdaemon.c --- uvm/uvm_pdaemon.c 5 Oct 2025 14:13:22 -0000 1.138 +++ uvm/uvm_pdaemon.c 12 Nov 2025 11:28:56 -0000 @@ -592,8 +592,10 @@ uvmpd_scan_inactive(struct uvm_pmalloc * * and skip to next page. */ if (pmap_is_referenced(p)) { + uvm_unlock_pageq(); uvm_pageactivate(p); rw_exit(slock); + uvm_lock_pageq(); uvmexp.pdreact++; continue; } @@ -664,8 +666,10 @@ uvmpd_scan_inactive(struct uvm_pmalloc * */ if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) { dirtyreacts++; + uvm_unlock_pageq(); uvm_pageactivate(p); rw_exit(slock); + uvm_lock_pageq(); continue; } @@ -867,13 +871,9 @@ uvmpd_scan_inactive(struct uvm_pmalloc * uvm_anfree(anon); /* kills anon */ pmap_page_protect(p, PROT_NONE); anon = NULL; - uvm_lock_pageq(); - /* dequeue first to prevent lock recursion */ - uvm_pagedequeue(p); /* free released page */ uvm_pagefree(p); } else { /* page was not released during I/O */ - uvm_lock_pageq(); if (result != VM_PAGER_OK) { /* pageout was a failure... */ if (result != VM_PAGER_AGAIN) @@ -888,13 +888,12 @@ uvmpd_scan_inactive(struct uvm_pmalloc * } } rw_exit(slock); - } else { - /* - * lock page queues here just so they're always locked - * at the end of the loop. - */ - uvm_lock_pageq(); } + /* + * lock page queues here just so they're always locked + * at the end of the loop. + */ + uvm_lock_pageq(); } TAILQ_REMOVE(pglst, &iter, pageq); @@ -1004,8 +1003,9 @@ uvmpd_scan_active(struct uvm_pmalloc *pm * inactive pages. */ if (inactive_shortage > 0) { - /* no need to check wire_count as pg is "active" */ + uvm_unlock_pageq(); uvm_pagedeactivate(p); + uvm_lock_pageq(); uvmexp.pddeact++; inactive_shortage--; } Index: uvm/uvm_vnode.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v diff -u -p -r1.148 uvm_vnode.c --- uvm/uvm_vnode.c 10 Nov 2025 15:53:06 -0000 1.148 +++ uvm/uvm_vnode.c 12 Nov 2025 11:28:56 -0000 @@ -663,9 +663,9 @@ uvn_flush(struct uvm_object *uobj, voff_ /* if we don't need a clean, deactivate/free pages then cont. */ if (!needs_clean) { if (flags & PGO_DEACTIVATE) { - if (pp->wire_count == 0) { - uvm_pagedeactivate(pp); - } + uvm_unlock_pageq(); + uvm_pagedeactivate(pp); + uvm_lock_pageq(); } else if (flags & PGO_FREE) { if (pp->pg_flags & PG_BUSY) { uvm_unlock_pageq(); @@ -788,9 +788,9 @@ ReTry: /* dispose of page */ if (flags & PGO_DEACTIVATE) { - if (ptmp->wire_count == 0) { - uvm_pagedeactivate(ptmp); - } + uvm_unlock_pageq(); + uvm_pagedeactivate(ptmp); + uvm_lock_pageq(); } else if (flags & PGO_FREE && result != VM_PAGER_PEND) { if (result != VM_PAGER_OK) {