Index | Thread | Search

From:
Martin Pieuchot <mpi@openbsd.org>
Subject:
uvm_pageout() cleanup
To:
tech@openbsd.org
Date:
Mon, 18 Mar 2024 17:51:06 +0100

Download raw body.

Thread
It isn't clear to me how concurrent accesses to uvmexp.* variables
should be protected and/or dealt with.  The diff below is a step towards
documenting and cleaning some of this mess.  It does the following:

- Stop calling uvmpd_tune() inside uvm_pageout().  We currently do not
  support adding RAM. `uvmexp.npages' is immutable after boot.  This
  helps thinking about the globals it modifies.

- Cleanup uvmpd_tune() and document that `uvmexp.freemin' and
  `uvmexp.freetarg' are immutable.

- Try to read `uvmexp.free' as much as possible under `uvm_fpageq_lock'.

- Reduce the scope of the `uvm_pageq_lock' lock.  It serializes accesses
  to `uvmexp.active' and `uvmexp.inactive'.

- Document which locks/mechanisms are protecting some fields in struct uvmexp. 

I'm still unhappy about the accesses to `uvmexp.inactive' and
`uvmexp.inactarg' inside uvm_pmr_getpages() but I don't want to grab
another global mutex there as it is a highly contended code path.  So
how to proceed is open to discussion.  However I still think the diff
below is a step forward.

ok?

Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
diff -u -p -r1.109 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c	27 Oct 2023 19:18:53 -0000	1.109
+++ uvm/uvm_pdaemon.c	17 Mar 2024 17:14:54 -0000
@@ -165,33 +165,27 @@ uvm_wait(const char *wmsg)
 
 /*
  * uvmpd_tune: tune paging parameters
- *
- * => called whenever memory is added to (or removed from?) the system
- * => caller must call with page queues locked
  */
-
 void
 uvmpd_tune(void)
 {
+	int val;
 
-	uvmexp.freemin = uvmexp.npages / 30;
+	val = uvmexp.npages / 30;
 
-	/* between 16k and 512k */
 	/* XXX:  what are these values good for? */
-	uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
-#if 0
-	uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
-#endif
+	val = max(val, (16*1024) >> PAGE_SHIFT);
 
 	/* Make sure there's always a user page free. */
-	if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
-		uvmexp.freemin = uvmexp.reserve_kernel + 1;
-
-	uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
-	if (uvmexp.freetarg <= uvmexp.freemin)
-		uvmexp.freetarg = uvmexp.freemin + 1;
-
-	/* uvmexp.inactarg: computed in main daemon loop */
+	if (val < uvmexp.reserve_kernel + 1)
+		val = uvmexp.reserve_kernel + 1;
+	uvmexp.freemin = val;
+
+	/* Calculate free target. */
+	val = (uvmexp.freemin * 4) / 3;
+	if (val <= uvmexp.freemin)
+		val = uvmexp.freemin + 1;
+	uvmexp.freetarg = val;
 
 	uvmexp.wiredmax = uvmexp.npages / 3;
 }
@@ -211,15 +205,12 @@ uvm_pageout(void *arg)
 {
 	struct uvm_constraint_range constraint;
 	struct uvm_pmalloc *pma;
-	int npages = 0;
+	int free;
 
 	/* ensure correct priority and set paging parameters... */
 	uvm.pagedaemon_proc = curproc;
 	(void) spl0();
-	uvm_lock_pageq();
-	npages = uvmexp.npages;
 	uvmpd_tune();
-	uvm_unlock_pageq();
 
 	for (;;) {
 		long size;
@@ -245,44 +236,38 @@ uvm_pageout(void *arg)
 			} else
 				constraint = no_constraint;
 		}
-
+		free = uvmexp.free - BUFPAGES_DEFICIT;
 		uvm_unlock_fpageq();
 
 		/*
 		 * now lock page queues and recompute inactive count
 		 */
 		uvm_lock_pageq();
-		if (npages != uvmexp.npages) {	/* check for new pages? */
-			npages = uvmexp.npages;
-			uvmpd_tune();
-		}
-
 		uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
 		if (uvmexp.inactarg <= uvmexp.freetarg) {
 			uvmexp.inactarg = uvmexp.freetarg + 1;
 		}
+		uvm_unlock_pageq();
 
 		/* Reclaim pages from the buffer cache if possible. */
 		size = 0;
 		if (pma != NULL)
 			size += pma->pm_size >> PAGE_SHIFT;
-		if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
-			size += uvmexp.freetarg - (uvmexp.free -
-			    BUFPAGES_DEFICIT);
+		if (free < uvmexp.freetarg)
+			size += uvmexp.freetarg - free;
 		if (size == 0)
 			size = 16; /* XXX */
-		uvm_unlock_pageq();
+
 		(void) bufbackoff(&constraint, size * 2);
 #if NDRM > 0
 		drmbackoff(size * 2);
 #endif
-		uvm_lock_pageq();
-
 		/*
 		 * scan if needed
 		 */
-		if (pma != NULL ||
-		    ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
+		uvm_lock_pageq();
+		free = uvmexp.free - BUFPAGES_DEFICIT;
+		if (pma != NULL || (free < uvmexp.freetarg) ||
 		    ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
 			uvmpd_scan(pma, &constraint);
 		}
Index: uvm/uvmexp.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvmexp.h,v
diff -u -p -r1.11 uvmexp.h
--- uvm/uvmexp.h	27 Oct 2023 19:18:53 -0000	1.11
+++ uvm/uvmexp.h	17 Mar 2024 17:18:47 -0000
@@ -45,7 +45,9 @@
  *	I	immutable after creation
  *	K	kernel lock
  *	F	uvm_lock_fpageq
+ *	L	uvm_lock_pageq
  *	S	uvm_swap_data_lock
+ *	p	copy of per-CPU counters, used only by userland.
  */
 struct uvmexp {
 	/* vm_page constants */
@@ -56,8 +58,8 @@ struct uvmexp {
 	/* vm_page counters */
 	int npages;     /* [I] number of pages we manage */
 	int free;       /* [F] number of free pages */
-	int active;     /* number of active pages */
-	int inactive;   /* number of pages that we free'd but may want back */
+	int active;     /* [L] # of active pages */
+	int inactive;   /* [L] # of pages that we free'd but may want back */
 	int paging;	/* number of pages in the process of being paged out */
 	int wired;      /* number of wired pages */
 
@@ -69,10 +71,10 @@ struct uvmexp {
 	int vtextpages;		/* XXX # of pages used by vtext vnodes */
 
 	/* pageout params */
-	int freemin;    /* min number of free pages */
-	int freetarg;   /* target number of free pages */
+	int freemin;    /* [I] min number of free pages */
+	int freetarg;   /* [I] target number of free pages */
 	int inactarg;   /* target number of inactive pages */
-	int wiredmax;   /* max number of wired pages */
+	int wiredmax;   /* [I] max number of wired pages */
 	int anonmin;	/* min threshold for anon pages */
 	int vtextmin;	/* min threshold for vtext pages */
 	int vnodemin;	/* min threshold for vnode pages */
@@ -91,16 +93,16 @@ struct uvmexp {
 	int unused06;	/* formerly nfreeanon */
 
 	/* stat counters */
-	int faults;		/* page fault count */
+	int faults;		/* [p] page fault count */
 	int traps;		/* trap count */
 	int intrs;		/* interrupt count */
 	int swtch;		/* context switch count */
 	int softs;		/* software interrupt count */
 	int syscalls;		/* system calls */
-	int pageins;		/* pagein operation count */
+	int pageins;		/* [p] pagein operation count */
 				/* pageouts are in pdpageouts below */
-	int unused07;		/* formerly obsolete_swapins */
-	int unused08;		/* formerly obsolete_swapouts */
+	int unused07;           /* formerly obsolete_swapins */
+	int unused08;           /* formerly obsolete_swapouts */
 	int pgswapin;		/* pages swapped in */
 	int pgswapout;		/* pages swapped out */
 	int forks;  		/* forks */
@@ -113,28 +115,28 @@ struct uvmexp {
 	int unused09;		/* formerly zeroaborts */
 
 	/* fault subcounters */
-	int fltnoram;	/* number of times fault was out of ram */
-	int fltnoanon;	/* number of times fault was out of anons */
-	int fltnoamap;	/* number of times fault was out of amap chunks */
-	int fltpgwait;	/* number of times fault had to wait on a page */
-	int fltpgrele;	/* number of times fault found a released page */
-	int fltrelck;	/* number of times fault relock called */
-	int fltrelckok;	/* number of times fault relock is a success */
-	int fltanget;	/* number of times fault gets anon page */
-	int fltanretry;	/* number of times fault retrys an anon get */
-	int fltamcopy;	/* number of times fault clears "needs copy" */
-	int fltnamap;	/* number of times fault maps a neighbor anon page */
-	int fltnomap;	/* number of times fault maps a neighbor obj page */
-	int fltlget;	/* number of times fault does a locked pgo_get */
-	int fltget;	/* number of times fault does an unlocked get */
-	int flt_anon;	/* number of times fault anon (case 1a) */
-	int flt_acow;	/* number of times fault anon cow (case 1b) */
-	int flt_obj;	/* number of times fault is on object page (2a) */
-	int flt_prcopy;	/* number of times fault promotes with copy (2b) */
-	int flt_przero;	/* number of times fault promotes with zerofill (2b) */
+	int fltnoram;	/* [p] # of times fault was out of ram */
+	int fltnoanon;	/* [p] # of times fault was out of anons */
+	int fltnoamap;	/* [p] # of times fault was out of amap chunks */
+	int fltpgwait;	/* [p] # of times fault had to wait on a page */
+	int fltpgrele;	/* [p] # of times fault found a released page */
+	int fltrelck;	/* [p] # of times fault relock called */
+	int fltrelckok;	/* [p] # of times fault relock is a success */
+	int fltanget;	/* [p] # of times fault gets anon page */
+	int fltanretry;	/* [p] # of times fault retrys an anon get */
+	int fltamcopy;	/* [p] # of times fault clears "needs copy" */
+	int fltnamap;	/* [p] # of times fault maps a neighbor anon page */
+	int fltnomap;	/* [p] # of times fault maps a neighbor obj page */
+	int fltlget;	/* [p] # of times fault does a locked pgo_get */
+	int fltget;	/* [p] # of times fault does an unlocked get */
+	int flt_anon;	/* [p] # of times fault anon (case 1a) */
+	int flt_acow;	/* [p] # of times fault anon cow (case 1b) */
+	int flt_obj;	/* [p] # of times fault is on object page (2a) */
+	int flt_prcopy;	/* [p] # of times fault promotes with copy (2b) */
+	int flt_przero;	/* [p] # of times fault promotes with zerofill (2b) */
 
 	/* daemon counters */
-	int pdwoke;	/* number of times daemon woke up */
+	int pdwoke;	/* [F] # of times daemon woke up */
 	int pdrevs;	/* number of times daemon rev'd clock hand */
 	int pdswout;	/* number of times daemon called for swapout */
 	int pdfreed;	/* number of pages daemon freed since boot */