Index | Thread | Search

From:
Martin Pieuchot <mpi@grenadille.net>
Subject:
Push `pageqlock' dances
To:
tech@openbsd.org
Date:
Wed, 12 Nov 2025 14:17:45 +0000

Download raw body.

Thread
  • Martin Pieuchot:

    Push `pageqlock' dances

Diff below pushes `pageqlock' dances inside uvm_page{de,}activate()
and uvm_pagewire() to avoid expensive locking as much as possible.
It also prepares the code for dealing with LRU updates in batches.

This builds on top of my previous changes for `wired_count' which is
a per page attribute protected (for managed pages) by the associated
owner lock.  Checking this field inside the above mentioned functions
allows us to grab the `pageqlock' only if necessary.

I also added a check for already deactivated pages to avoid expensive
pmap_page_protect() operations.

Releasing the `pageqlock' in the page daemon and uvn_flush() is done to
avoid lock recursion.  My mid term goal is to reduce these scopes.  So
even if this is not pretty, it is going in the right direction.

ok?

Index: uvm/uvm_amap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_amap.c,v
diff -u -p -r1.97 uvm_amap.c
--- uvm/uvm_amap.c	25 May 2025 01:52:00 -0000	1.97
+++ uvm/uvm_amap.c	12 Nov 2025 11:28:56 -0000
@@ -805,9 +805,7 @@ ReStart:
 			 */
 			atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE);
 			UVM_PAGE_OWN(npg, NULL);
-			uvm_lock_pageq();
 			uvm_pageactivate(npg);
-			uvm_unlock_pageq();
 		}
 	}
 	amap_unlock(amap);
Index: uvm/uvm_anon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_anon.c,v
diff -u -p -r1.64 uvm_anon.c
--- uvm/uvm_anon.c	27 Apr 2025 08:37:47 -0000	1.64
+++ uvm/uvm_anon.c	12 Nov 2025 11:28:56 -0000
@@ -208,9 +208,7 @@ uvm_anon_pagein(struct vm_amap *amap, st
 	/*
 	 * Deactivate the page (to put it on a page queue).
 	 */
-	uvm_lock_pageq();
 	uvm_pagedeactivate(pg);
-	uvm_unlock_pageq();
 	rw_exit(anon->an_lock);
 
 	return FALSE;
Index: uvm/uvm_aobj.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
diff -u -p -r1.119 uvm_aobj.c
--- uvm/uvm_aobj.c	10 Nov 2025 10:53:53 -0000	1.119
+++ uvm/uvm_aobj.c	12 Nov 2025 11:28:56 -0000
@@ -919,18 +919,10 @@ uao_flush(struct uvm_object *uobj, voff_
 		 * XXX in the future.
 		 */
 		case PGO_CLEANIT|PGO_FREE:
-			/* FALLTHROUGH */
 		case PGO_CLEANIT|PGO_DEACTIVATE:
-			/* FALLTHROUGH */
 		case PGO_DEACTIVATE:
  deactivate_it:
-			if (pg->wire_count != 0)
-				continue;
-
-			uvm_lock_pageq();
 			uvm_pagedeactivate(pg);
-			uvm_unlock_pageq();
-
 			continue;
 		case PGO_FREE:
 			/*
@@ -1407,9 +1399,7 @@ uao_pagein_page(struct uvm_aobj *aobj, i
 	/*
 	 * deactivate the page (to put it on a page queue).
 	 */
-	uvm_lock_pageq();
 	uvm_pagedeactivate(pg);
-	uvm_unlock_pageq();
 
 	return FALSE;
 }
Index: uvm/uvm_fault.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
diff -u -p -r1.171 uvm_fault.c
--- uvm/uvm_fault.c	11 Sep 2025 17:04:35 -0000	1.171
+++ uvm/uvm_fault.c	12 Nov 2025 12:04:58 -0000
@@ -182,11 +182,7 @@ uvmfault_anonflush(struct vm_anon **anon
 		KASSERT(rw_lock_held(anons[lcv]->an_lock));
 		pg = anons[lcv]->an_page;
 		if (pg && (pg->pg_flags & PG_BUSY) == 0) {
-			uvm_lock_pageq();
-			if (pg->wire_count == 0) {
-				uvm_pagedeactivate(pg);
-			}
-			uvm_unlock_pageq();
+			uvm_pagedeactivate(pg);
 		}
 	}
 }
@@ -398,9 +394,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
 			 * We have successfully read the page, activate it.
 			 */
 			pmap_clear_modify(pg);
-			uvm_lock_pageq();
 			uvm_pageactivate(pg);
-			uvm_unlock_pageq();
 			atomic_clearbits_int(&pg->pg_flags,
 			    PG_WANTED|PG_BUSY|PG_FAKE);
 			UVM_PAGE_OWN(pg, NULL);
@@ -979,9 +973,7 @@ uvm_fault_upper_lookup(struct uvm_faulti
 		 */
 		if (pg && (pg->pg_flags & (PG_RELEASED|PG_BUSY)) == 0 &&
 		    !pmap_extract(ufi->orig_map->pmap, currva, &pa)) {
-			uvm_lock_pageq();
 			uvm_pageactivate(pg);	/* reactivate */
-			uvm_unlock_pageq();
 			counters_inc(uvmexp_counters, flt_namap);
 
 			/* No fault-ahead when wired. */
@@ -1163,13 +1155,11 @@ retry:
 	/*
 	 * ... update the page queues.
 	 */
-	uvm_lock_pageq();
 	if (flt->wired) {
 		uvm_pagewire(pg);
 	} else {
 		uvm_pageactivate(pg);
 	}
-	uvm_unlock_pageq();
 
 	if (flt->wired) {
 		/*
@@ -1254,11 +1244,7 @@ uvm_fault_lower_lookup(
 		 * are neither busy nor released, so we don't need to check
 		 * for this.  we can just directly enter the pages.
 		 */
-		if (pages[lcv]->wire_count == 0) {
-			uvm_lock_pageq();
-			uvm_pageactivate(pages[lcv]);
-			uvm_unlock_pageq();
-		}
+		uvm_pageactivate(pages[lcv]);
 		counters_inc(uvmexp_counters, flt_nomap);
 
 		/* No fault-ahead when wired. */
@@ -1389,9 +1375,7 @@ uvm_fault_lower(struct uvm_faultinfo *uf
 		/* update rusage counters */
 		curproc->p_ru.ru_minflt++;
 		if (uobjpage != PGO_DONTCARE) {
-			uvm_lock_pageq();
 			uvm_pageactivate(uobjpage);
-			uvm_unlock_pageq();
 		}
 	} else {
 		error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage);
@@ -1546,7 +1530,6 @@ uvm_fault_lower(struct uvm_faultinfo *uf
 		return ERESTART;
 	}
 
-	uvm_lock_pageq();
 	if (flt->wired) {
 		uvm_pagewire(pg);
 		if (pg->pg_flags & PQ_AOBJ) {
@@ -1568,7 +1551,6 @@ uvm_fault_lower(struct uvm_faultinfo *uf
 	} else {
 		uvm_pageactivate(pg);
 	}
-	uvm_unlock_pageq();
 
 	if (dropswap)
 		uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
@@ -1675,9 +1657,7 @@ uvm_fault_lower_io(
 
 	/* release the page now, still holding object lock */
 	if (pg != PGO_DONTCARE) {
-		uvm_lock_pageq();
 		uvm_pageactivate(pg);
-		uvm_unlock_pageq();
 
 		if (pg->pg_flags & PG_WANTED)
 			wakeup(pg);
@@ -1807,9 +1787,7 @@ uvm_fault_unwire_locked(vm_map_t map, va
 
 		pg = PHYS_TO_VM_PAGE(pa);
 		if (pg) {
-			uvm_lock_pageq();
 			uvm_pageunwire(pg);
-			uvm_unlock_pageq();
 		}
 	}
 
Index: uvm/uvm_map.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
diff -u -p -r1.348 uvm_map.c
--- uvm/uvm_map.c	9 Nov 2025 15:53:47 -0000	1.348
+++ uvm/uvm_map.c	12 Nov 2025 11:28:56 -0000
@@ -4438,16 +4438,8 @@ uvm_map_clean(struct vm_map *map, vaddr_
 			case PGO_CLEANIT|PGO_DEACTIVATE:
 			case PGO_DEACTIVATE:
 deactivate_it:
-				/* skip the page if it's wired */
-				if (pg->wire_count != 0)
-					break;
-
-				uvm_lock_pageq();
-
 				KASSERT(pg->uanon == anon);
 				uvm_pagedeactivate(pg);
-
-				uvm_unlock_pageq();
 				break;
 			case PGO_FREE:
 				/*
Index: uvm/uvm_object.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_object.c,v
diff -u -p -r1.27 uvm_object.c
--- uvm/uvm_object.c	10 Mar 2025 14:13:58 -0000	1.27
+++ uvm/uvm_object.c	12 Nov 2025 11:28:56 -0000
@@ -161,13 +161,11 @@ uvm_obj_wire(struct uvm_object *uobj, vo
 		}
 
 		/* Wire the pages */
-		uvm_lock_pageq();
 		for (i = 0; i < npages; i++) {
 			uvm_pagewire(pgs[i]);
 			if (pageq != NULL)
 				TAILQ_INSERT_TAIL(pageq, pgs[i], pageq);
 		}
-		uvm_unlock_pageq();
 
 		/* Unbusy the pages */
 		uvm_page_unbusy(pgs, npages);
@@ -198,7 +196,6 @@ uvm_obj_unwire(struct uvm_object *uobj, 
 	off_t offset;
 
 	rw_enter(uobj->vmobjlock, RW_WRITE | RW_DUPOK);
-	uvm_lock_pageq();
 	for (offset = start; offset < end; offset += PAGE_SIZE) {
 		pg = uvm_pagelookup(uobj, offset);
 
@@ -207,7 +204,6 @@ uvm_obj_unwire(struct uvm_object *uobj, 
 
 		uvm_pageunwire(pg);
 	}
-	uvm_unlock_pageq();
 	rw_exit(uobj->vmobjlock);
 }
 #endif /* !SMALL_KERNEL */
Index: uvm/uvm_page.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_page.c,v
diff -u -p -r1.183 uvm_page.c
--- uvm/uvm_page.c	27 Apr 2025 08:37:47 -0000	1.183
+++ uvm/uvm_page.c	12 Nov 2025 14:10:16 -0000
@@ -1221,17 +1221,16 @@ uvm_pagelookup(struct uvm_object *obj, v
 
 /*
  * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
- *
- * => caller must lock page queues
  */
 void
 uvm_pagewire(struct vm_page *pg)
 {
 	KASSERT(uvm_page_owner_locked_p(pg, TRUE));
-	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
 
 	if (pg->wire_count == 0) {
+		uvm_lock_pageq();
 		uvm_pagedequeue(pg);
+		uvm_unlock_pageq();
 		atomic_inc_int(&uvmexp.wired);
 	}
 	pg->wire_count++;
@@ -1241,13 +1240,11 @@ uvm_pagewire(struct vm_page *pg)
  * uvm_pageunwire: unwire the page.
  *
  * => activate if wire count goes to zero.
- * => caller must lock page queues
  */
 void
 uvm_pageunwire(struct vm_page *pg)
 {
 	KASSERT(uvm_page_owner_locked_p(pg, TRUE));
-	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
 
 	pg->wire_count--;
 	if (pg->wire_count == 0) {
@@ -1257,18 +1254,27 @@ uvm_pageunwire(struct vm_page *pg)
 }
 
 /*
- * uvm_pagedeactivate: deactivate page.
+ * uvm_pagedeactivate: deactivate page (unless wired)
  *
- * => caller must lock page queues
- * => caller must check to make sure page is not wired
- * => object that page belongs to must be locked (so we can adjust pg->flags)
+ * => object that page belongs to must be locked
  */
 void
 uvm_pagedeactivate(struct vm_page *pg)
 {
 	KASSERT(uvm_page_owner_locked_p(pg, FALSE));
-	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
 
+	if (pg->wire_count > 0) {
+		KASSERT((pg->pg_flags & (PQ_INACTIVE|PQ_ACTIVE)) == 0);
+		return;
+	}
+
+	uvm_lock_pageq();
+	if (pg->pg_flags & PQ_INACTIVE) {
+		uvm_unlock_pageq();
+		return;
+	}
+
+	/* Make sure next access to this page will fault. */
 	pmap_page_protect(pg, PROT_NONE);
 
 	if (pg->pg_flags & PQ_ACTIVE) {
@@ -1276,42 +1282,40 @@ uvm_pagedeactivate(struct vm_page *pg)
 		atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
 		uvmexp.active--;
 	}
-	if ((pg->pg_flags & PQ_INACTIVE) == 0) {
-		KASSERT(pg->wire_count == 0);
-		TAILQ_INSERT_TAIL(&uvm.page_inactive, pg, pageq);
-		atomic_setbits_int(&pg->pg_flags, PQ_INACTIVE);
-		uvmexp.inactive++;
-		pmap_clear_reference(pg);
-		/*
-		 * update the "clean" bit.  this isn't 100%
-		 * accurate, and doesn't have to be.  we'll
-		 * re-sync it after we zap all mappings when
-		 * scanning the inactive list.
-		 */
-		if ((pg->pg_flags & PG_CLEAN) != 0 &&
-		    pmap_is_modified(pg))
-			atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
-	}
+	TAILQ_INSERT_TAIL(&uvm.page_inactive, pg, pageq);
+	atomic_setbits_int(&pg->pg_flags, PQ_INACTIVE);
+	uvmexp.inactive++;
+	uvm_unlock_pageq();
+
+	pmap_clear_reference(pg);
+	/*
+	 * update the "clean" bit.  this isn't 100% accurate, and
+	 * doesn't have to be.  we'll re-sync it after we zap all
+	 * mappings when scanning the inactive list.
+	 */
+	if ((pg->pg_flags & PG_CLEAN) != 0 && pmap_is_modified(pg))
+		atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
 }
 
 /*
- * uvm_pageactivate: activate page
- *
- * => caller must lock page queues
+ * uvm_pageactivate: activate page (unless wired)
  */
 void
 uvm_pageactivate(struct vm_page *pg)
 {
 	KASSERT(uvm_page_owner_locked_p(pg, FALSE));
-	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
-
-	uvm_pagedequeue(pg);
-	if (pg->wire_count == 0) {
-		TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
-		atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE);
-		uvmexp.active++;
 
+	if (pg->wire_count > 0) {
+		KASSERT((pg->pg_flags & (PQ_INACTIVE|PQ_ACTIVE)) == 0);
+		return;
 	}
+
+	uvm_lock_pageq();
+	uvm_pagedequeue(pg);
+	TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
+	atomic_setbits_int(&pg->pg_flags, PQ_ACTIVE);
+	uvmexp.active++;
+	uvm_unlock_pageq();
 }
 
 /*
@@ -1320,6 +1324,9 @@ uvm_pageactivate(struct vm_page *pg)
 void
 uvm_pagedequeue(struct vm_page *pg)
 {
+	KASSERT(uvm_page_owner_locked_p(pg, FALSE));
+	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
+
 	if (pg->pg_flags & PQ_ACTIVE) {
 		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
 		atomic_clearbits_int(&pg->pg_flags, PQ_ACTIVE);
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
diff -u -p -r1.138 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c	5 Oct 2025 14:13:22 -0000	1.138
+++ uvm/uvm_pdaemon.c	12 Nov 2025 11:28:56 -0000
@@ -592,8 +592,10 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 			 * and skip to next page.
 			 */
 			if (pmap_is_referenced(p)) {
+				uvm_unlock_pageq();
 				uvm_pageactivate(p);
 				rw_exit(slock);
+				uvm_lock_pageq();
 				uvmexp.pdreact++;
 				continue;
 			}
@@ -664,8 +666,10 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 			 */
 			if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
 				dirtyreacts++;
+				uvm_unlock_pageq();
 				uvm_pageactivate(p);
 				rw_exit(slock);
+				uvm_lock_pageq();
 				continue;
 			}
 
@@ -867,13 +871,9 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 				uvm_anfree(anon);	/* kills anon */
 				pmap_page_protect(p, PROT_NONE);
 				anon = NULL;
-				uvm_lock_pageq();
-				/* dequeue first to prevent lock recursion */
-				uvm_pagedequeue(p);
 				/* free released page */
 				uvm_pagefree(p);
 			} else {	/* page was not released during I/O */
-				uvm_lock_pageq();
 				if (result != VM_PAGER_OK) {
 					/* pageout was a failure... */
 					if (result != VM_PAGER_AGAIN)
@@ -888,13 +888,12 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 				}
 			}
 			rw_exit(slock);
-		} else {
-			/*
-			 * lock page queues here just so they're always locked
-			 * at the end of the loop.
-			 */
-			uvm_lock_pageq();
 		}
+		/*
+		 * lock page queues here just so they're always locked
+		 * at the end of the loop.
+		 */
+		uvm_lock_pageq();
 	}
 	TAILQ_REMOVE(pglst, &iter, pageq);
 
@@ -1004,8 +1003,9 @@ uvmpd_scan_active(struct uvm_pmalloc *pm
 		 * inactive pages.
 		 */
 		if (inactive_shortage > 0) {
-			/* no need to check wire_count as pg is "active" */
+			uvm_unlock_pageq();
 			uvm_pagedeactivate(p);
+			uvm_lock_pageq();
 			uvmexp.pddeact++;
 			inactive_shortage--;
 		}
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
diff -u -p -r1.148 uvm_vnode.c
--- uvm/uvm_vnode.c	10 Nov 2025 15:53:06 -0000	1.148
+++ uvm/uvm_vnode.c	12 Nov 2025 11:28:56 -0000
@@ -663,9 +663,9 @@ uvn_flush(struct uvm_object *uobj, voff_
 		/* if we don't need a clean, deactivate/free pages then cont. */
 		if (!needs_clean) {
 			if (flags & PGO_DEACTIVATE) {
-				if (pp->wire_count == 0) {
-					uvm_pagedeactivate(pp);
-				}
+				uvm_unlock_pageq();
+				uvm_pagedeactivate(pp);
+				uvm_lock_pageq();
 			} else if (flags & PGO_FREE) {
 				if (pp->pg_flags & PG_BUSY) {
 					uvm_unlock_pageq();
@@ -788,9 +788,9 @@ ReTry:
 
 			/* dispose of page */
 			if (flags & PGO_DEACTIVATE) {
-				if (ptmp->wire_count == 0) {
-					uvm_pagedeactivate(ptmp);
-				}
+				uvm_unlock_pageq();
+				uvm_pagedeactivate(ptmp);
+				uvm_lock_pageq();
 			} else if (flags & PGO_FREE &&
 			    result != VM_PAGER_PEND) {
 				if (result != VM_PAGER_OK) {