Download raw body.
uvm_pageout() cleanup
It isn't clear to me how concurrent accesses to uvmexp.* variables
should be protected and/or dealt with. The diff below is a step towards
documenting and cleaning some of this mess. It does the following:
- Stop calling uvmpd_tune() inside uvm_pageout(). We currently do not
support adding RAM. `uvmexp.npages' is immutable after boot. This
helps thinking about the globals it modifies.
- Cleanup uvmpd_tune() and document that `uvmexp.freemin' and
`uvmexp.freetarg' are immutable.
- Try to read `uvmexp.free' as much as possible under `uvm_fpageq_lock'.
- Reduce the scope of the `uvm_pageq_lock' lock. It serializes accesses
to `uvmexp.active' and `uvmexp.inactive'.
- Document which locks/mechanisms are protecting some fields in struct uvmexp.
I'm still unhappy about the accesses to `uvmexp.inactive' and
`uvmexp.inactarg' inside uvm_pmr_getpages() but I don't want to grab
another global mutex there as it is a highly contended code path. So
how to proceed is open to discussion. However I still think the diff
below is a step forward.
ok?
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
diff -u -p -r1.109 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c 27 Oct 2023 19:18:53 -0000 1.109
+++ uvm/uvm_pdaemon.c 17 Mar 2024 17:14:54 -0000
@@ -165,33 +165,27 @@ uvm_wait(const char *wmsg)
/*
* uvmpd_tune: tune paging parameters
- *
- * => called whenever memory is added to (or removed from?) the system
- * => caller must call with page queues locked
*/
-
void
uvmpd_tune(void)
{
+ int val;
- uvmexp.freemin = uvmexp.npages / 30;
+ val = uvmexp.npages / 30;
- /* between 16k and 512k */
/* XXX: what are these values good for? */
- uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
-#if 0
- uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
-#endif
+ val = max(val, (16*1024) >> PAGE_SHIFT);
/* Make sure there's always a user page free. */
- if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
- uvmexp.freemin = uvmexp.reserve_kernel + 1;
-
- uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
- if (uvmexp.freetarg <= uvmexp.freemin)
- uvmexp.freetarg = uvmexp.freemin + 1;
-
- /* uvmexp.inactarg: computed in main daemon loop */
+ if (val < uvmexp.reserve_kernel + 1)
+ val = uvmexp.reserve_kernel + 1;
+ uvmexp.freemin = val;
+
+ /* Calculate free target. */
+ val = (uvmexp.freemin * 4) / 3;
+ if (val <= uvmexp.freemin)
+ val = uvmexp.freemin + 1;
+ uvmexp.freetarg = val;
uvmexp.wiredmax = uvmexp.npages / 3;
}
@@ -211,15 +205,12 @@ uvm_pageout(void *arg)
{
struct uvm_constraint_range constraint;
struct uvm_pmalloc *pma;
- int npages = 0;
+ int free;
/* ensure correct priority and set paging parameters... */
uvm.pagedaemon_proc = curproc;
(void) spl0();
- uvm_lock_pageq();
- npages = uvmexp.npages;
uvmpd_tune();
- uvm_unlock_pageq();
for (;;) {
long size;
@@ -245,44 +236,38 @@ uvm_pageout(void *arg)
} else
constraint = no_constraint;
}
-
+ free = uvmexp.free - BUFPAGES_DEFICIT;
uvm_unlock_fpageq();
/*
* now lock page queues and recompute inactive count
*/
uvm_lock_pageq();
- if (npages != uvmexp.npages) { /* check for new pages? */
- npages = uvmexp.npages;
- uvmpd_tune();
- }
-
uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
if (uvmexp.inactarg <= uvmexp.freetarg) {
uvmexp.inactarg = uvmexp.freetarg + 1;
}
+ uvm_unlock_pageq();
/* Reclaim pages from the buffer cache if possible. */
size = 0;
if (pma != NULL)
size += pma->pm_size >> PAGE_SHIFT;
- if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
- size += uvmexp.freetarg - (uvmexp.free -
- BUFPAGES_DEFICIT);
+ if (free < uvmexp.freetarg)
+ size += uvmexp.freetarg - free;
if (size == 0)
size = 16; /* XXX */
- uvm_unlock_pageq();
+
(void) bufbackoff(&constraint, size * 2);
#if NDRM > 0
drmbackoff(size * 2);
#endif
- uvm_lock_pageq();
-
/*
* scan if needed
*/
- if (pma != NULL ||
- ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
+ uvm_lock_pageq();
+ free = uvmexp.free - BUFPAGES_DEFICIT;
+ if (pma != NULL || (free < uvmexp.freetarg) ||
((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
uvmpd_scan(pma, &constraint);
}
Index: uvm/uvmexp.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvmexp.h,v
diff -u -p -r1.11 uvmexp.h
--- uvm/uvmexp.h 27 Oct 2023 19:18:53 -0000 1.11
+++ uvm/uvmexp.h 17 Mar 2024 17:18:47 -0000
@@ -45,7 +45,9 @@
* I immutable after creation
* K kernel lock
* F uvm_lock_fpageq
+ * L uvm_lock_pageq
* S uvm_swap_data_lock
+ * p copy of per-CPU counters, used only by userland.
*/
struct uvmexp {
/* vm_page constants */
@@ -56,8 +58,8 @@ struct uvmexp {
/* vm_page counters */
int npages; /* [I] number of pages we manage */
int free; /* [F] number of free pages */
- int active; /* number of active pages */
- int inactive; /* number of pages that we free'd but may want back */
+ int active; /* [L] # of active pages */
+ int inactive; /* [L] # of pages that we free'd but may want back */
int paging; /* number of pages in the process of being paged out */
int wired; /* number of wired pages */
@@ -69,10 +71,10 @@ struct uvmexp {
int vtextpages; /* XXX # of pages used by vtext vnodes */
/* pageout params */
- int freemin; /* min number of free pages */
- int freetarg; /* target number of free pages */
+ int freemin; /* [I] min number of free pages */
+ int freetarg; /* [I] target number of free pages */
int inactarg; /* target number of inactive pages */
- int wiredmax; /* max number of wired pages */
+ int wiredmax; /* [I] max number of wired pages */
int anonmin; /* min threshold for anon pages */
int vtextmin; /* min threshold for vtext pages */
int vnodemin; /* min threshold for vnode pages */
@@ -91,16 +93,16 @@ struct uvmexp {
int unused06; /* formerly nfreeanon */
/* stat counters */
- int faults; /* page fault count */
+ int faults; /* [p] page fault count */
int traps; /* trap count */
int intrs; /* interrupt count */
int swtch; /* context switch count */
int softs; /* software interrupt count */
int syscalls; /* system calls */
- int pageins; /* pagein operation count */
+ int pageins; /* [p] pagein operation count */
/* pageouts are in pdpageouts below */
- int unused07; /* formerly obsolete_swapins */
- int unused08; /* formerly obsolete_swapouts */
+ int unused07; /* formerly obsolete_swapins */
+ int unused08; /* formerly obsolete_swapouts */
int pgswapin; /* pages swapped in */
int pgswapout; /* pages swapped out */
int forks; /* forks */
@@ -113,28 +115,28 @@ struct uvmexp {
int unused09; /* formerly zeroaborts */
/* fault subcounters */
- int fltnoram; /* number of times fault was out of ram */
- int fltnoanon; /* number of times fault was out of anons */
- int fltnoamap; /* number of times fault was out of amap chunks */
- int fltpgwait; /* number of times fault had to wait on a page */
- int fltpgrele; /* number of times fault found a released page */
- int fltrelck; /* number of times fault relock called */
- int fltrelckok; /* number of times fault relock is a success */
- int fltanget; /* number of times fault gets anon page */
- int fltanretry; /* number of times fault retrys an anon get */
- int fltamcopy; /* number of times fault clears "needs copy" */
- int fltnamap; /* number of times fault maps a neighbor anon page */
- int fltnomap; /* number of times fault maps a neighbor obj page */
- int fltlget; /* number of times fault does a locked pgo_get */
- int fltget; /* number of times fault does an unlocked get */
- int flt_anon; /* number of times fault anon (case 1a) */
- int flt_acow; /* number of times fault anon cow (case 1b) */
- int flt_obj; /* number of times fault is on object page (2a) */
- int flt_prcopy; /* number of times fault promotes with copy (2b) */
- int flt_przero; /* number of times fault promotes with zerofill (2b) */
+ int fltnoram; /* [p] # of times fault was out of ram */
+ int fltnoanon; /* [p] # of times fault was out of anons */
+ int fltnoamap; /* [p] # of times fault was out of amap chunks */
+ int fltpgwait; /* [p] # of times fault had to wait on a page */
+ int fltpgrele; /* [p] # of times fault found a released page */
+ int fltrelck; /* [p] # of times fault relock called */
+ int fltrelckok; /* [p] # of times fault relock is a success */
+ int fltanget; /* [p] # of times fault gets anon page */
+ int fltanretry; /* [p] # of times fault retrys an anon get */
+ int fltamcopy; /* [p] # of times fault clears "needs copy" */
+ int fltnamap; /* [p] # of times fault maps a neighbor anon page */
+ int fltnomap; /* [p] # of times fault maps a neighbor obj page */
+ int fltlget; /* [p] # of times fault does a locked pgo_get */
+ int fltget; /* [p] # of times fault does an unlocked get */
+ int flt_anon; /* [p] # of times fault anon (case 1a) */
+ int flt_acow; /* [p] # of times fault anon cow (case 1b) */
+ int flt_obj; /* [p] # of times fault is on object page (2a) */
+ int flt_prcopy; /* [p] # of times fault promotes with copy (2b) */
+ int flt_przero; /* [p] # of times fault promotes with zerofill (2b) */
/* daemon counters */
- int pdwoke; /* number of times daemon woke up */
+ int pdwoke; /* [F] # of times daemon woke up */
int pdrevs; /* number of times daemon rev'd clock hand */
int pdswout; /* number of times daemon called for swapout */
int pdfreed; /* number of pages daemon freed since boot */
uvm_pageout() cleanup