Index | Thread | Search

From:
Martin Pieuchot <mpi@grenadille.net>
Subject:
Re: uvmpd_iterator
To:
tech@openbsd.org
Date:
Tue, 4 Mar 2025 14:26:11 +0100

Download raw body.

Thread
On 26/02/25(Wed) 15:09, Martin Pieuchot wrote:
> Currently the pdaemon does funny dances when iterating the inactive list
> in order to grab the next page on the list.  Diff below simplifies those
> dances by inserting a marker (iterator) on the list.  This is necessary
> to reduce the scope of the uvm_pageq_lock.
> 
> ok?

Anyone?

Index: uvm/uvm_page.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_page.h,v
diff -u -p -r1.72 uvm_page.h
--- uvm/uvm_page.h	19 Feb 2025 11:07:47 -0000	1.72
+++ uvm/uvm_page.h	26 Feb 2025 14:00:08 -0000
@@ -147,13 +147,12 @@ struct vm_page {
 #define PG_RDONLY	0x00000080	/* page must be mapped read-only */
 #define PG_ZERO		0x00000100	/* page is pre-zero'd */
 #define PG_DEV		0x00000200	/* page is in device space, lay off */
-
-#define PG_PAGER1	0x00001000	/* pager-specific flag */
 #define PG_MASK		0x0000ffff
 
 #define PQ_FREE		0x00010000	/* page is on free list */
 #define PQ_INACTIVE	0x00020000	/* page is in inactive list */
 #define PQ_ACTIVE	0x00040000	/* page is in active list */
+#define PQ_ITER		0x00080000	/* page is an iterator marker */
 #define PQ_ANON		0x00100000	/* page is part of an anon, rather
 					   than an uvm_object */
 #define PQ_AOBJ		0x00200000	/* page is part of an anonymous
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
diff -u -p -r1.134 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c	25 Jan 2025 08:55:52 -0000	1.134
+++ uvm/uvm_pdaemon.c	26 Feb 2025 14:02:39 -0000
@@ -453,6 +453,29 @@ uvmpd_match_constraint(struct vm_page *p
 	return 0;
 }
 
+struct vm_page *
+uvmpd_iterator(struct pglist *pglst, struct vm_page *p, struct vm_page *iter)
+{
+	struct vm_page *nextpg = NULL;
+
+	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
+
+	/* p is null to signal final swap i/o. */
+	if (p == NULL)
+		return NULL;
+
+	do {
+		nextpg = TAILQ_NEXT(iter, pageq);
+	} while (nextpg && (nextpg->pg_flags & PQ_ITER));
+
+	if (nextpg) {
+		TAILQ_REMOVE(pglst, iter, pageq);
+		TAILQ_INSERT_AFTER(pglst, nextpg, iter, pageq);
+	}
+
+	return nextpg;
+}
+
 /*
  * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
  *
@@ -467,7 +490,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 {
 	struct pglist *pglst = &uvm.page_inactive;
 	int result, freed = 0;
-	struct vm_page *p, *nextpg;
+	struct vm_page *p, iter = { .pg_flags = PQ_ITER };
 	struct uvm_object *uobj;
 	struct vm_page *pps[SWCLUSTPAGES], **ppsp;
 	int npages;
@@ -501,7 +524,12 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 			break;
 	}
 
-	for (; p != NULL || swslot != 0; p = nextpg) {
+	if (p == NULL)
+		return 0;
+
+	/* Insert iterator. */
+	TAILQ_INSERT_AFTER(pglst, p, &iter, pageq);
+	for (; p != NULL || swslot != 0; p = uvmpd_iterator(pglst, p, &iter)) {
 		/*
 		 * note that p can be NULL iff we have traversed the whole
 		 * list and need to do one final swap-backed clustered pageout.
@@ -522,7 +550,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 
 				/* set p to null to signal final swap i/o */
 				p = NULL;
-				nextpg = NULL;
 			}
 		}
 		if (p) {	/* if (we have a new page to consider) */
@@ -530,7 +557,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 			 * we are below target and have a new page to consider.
 			 */
 			uvmexp.pdscans++;
-			nextpg = TAILQ_NEXT(p, pageq);
 
 			/*
 			 * If we are not short on memory and only interested
@@ -772,26 +798,11 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 		 * async I/O is in progress and the async I/O done routine
 		 * will clean up after us.   in this case we move on to the
 		 * next page.
-		 *
-		 * there is a very remote chance that the pending async i/o can
-		 * finish _before_ we get here.   if that happens, our page "p"
-		 * may no longer be on the inactive queue.   so we verify this
-		 * when determining the next page (starting over at the head if
-		 * we've lost our inactive page).
 		 */
-
 		if (result == VM_PAGER_PEND) {
 			atomic_add_int(&uvmexp.paging, npages);
 			uvm_lock_pageq();
 			uvmexp.pdpending++;
-			if (p) {
-				if (p->pg_flags & PQ_INACTIVE)
-					nextpg = TAILQ_NEXT(p, pageq);
-				else
-					nextpg = TAILQ_FIRST(pglst);
-			} else {
-				nextpg = NULL;
-			}
 			continue;
 		}
 
@@ -852,12 +863,10 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 				pmap_page_protect(p, PROT_NONE);
 				anon = NULL;
 				uvm_lock_pageq();
-				nextpg = TAILQ_NEXT(p, pageq);
 				/* free released page */
 				uvm_pagefree(p);
 			} else {	/* page was not released during I/O */
 				uvm_lock_pageq();
-				nextpg = TAILQ_NEXT(p, pageq);
 				if (result != VM_PAGER_OK) {
 					/* pageout was a failure... */
 					if (result != VM_PAGER_AGAIN)
@@ -871,33 +880,16 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 					    PG_CLEAN);
 				}
 			}
-
-			/*
-			 * drop object lock (if there is an object left).   do
-			 * a safety check of nextpg to make sure it is on the
-			 * inactive queue (it should be since PG_BUSY pages on
-			 * the inactive queue can't be re-queued [note: not
-			 * true for active queue]).
-			 */
 			rw_exit(slock);
-
-			if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
-				nextpg = TAILQ_FIRST(pglst);	/* reload! */
-			}
 		} else {
 			/*
-			 * if p is null in this loop, make sure it stays null
-			 * in the next loop.
-			 */
-			nextpg = NULL;
-
-			/*
 			 * lock page queues here just so they're always locked
 			 * at the end of the loop.
 			 */
 			uvm_lock_pageq();
 		}
 	}
+	TAILQ_REMOVE(pglst, &iter, pageq);
 
 	return freed;
 }