From: Martin Pieuchot Subject: uvm_pageout() cleanup To: tech@openbsd.org Date: Mon, 18 Mar 2024 17:51:06 +0100 It isn't clear to me how concurrent accesses to uvmexp.* variables should be protected and/or dealt with. The diff below is a step towards documenting and cleaning some of this mess. It does the following: - Stop calling uvmpd_tune() inside uvm_pageout(). We currently do not support adding RAM. `uvmexp.npages' is immutable after boot. This helps thinking about the globals it modifies. - Cleanup uvmpd_tune() and document that `uvmexp.freemin' and `uvmexp.freetarg' are immutable. - Try to read `uvmexp.free' as much as possible under `uvm_fpageq_lock'. - Reduce the scope of the `uvm_pageq_lock' lock. It serializes accesses to `uvmexp.active' and `uvmexp.inactive'. - Document which locks/mechanisms are protecting some fields in struct uvmexp. I'm still unhappy about the accesses to `uvmexp.inactive' and `uvmexp.inactarg' inside uvm_pmr_getpages() but I don't want to grab another global mutex there as it is a highly contended code path. So how to proceed is open to discussion. However I still think the diff below is a step forward. ok? Index: uvm/uvm_pdaemon.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v diff -u -p -r1.109 uvm_pdaemon.c --- uvm/uvm_pdaemon.c 27 Oct 2023 19:18:53 -0000 1.109 +++ uvm/uvm_pdaemon.c 17 Mar 2024 17:14:54 -0000 @@ -165,33 +165,27 @@ uvm_wait(const char *wmsg) /* * uvmpd_tune: tune paging parameters - * - * => called whenever memory is added to (or removed from?) the system - * => caller must call with page queues locked */ - void uvmpd_tune(void) { + int val; - uvmexp.freemin = uvmexp.npages / 30; + val = uvmexp.npages / 30; - /* between 16k and 512k */ /* XXX: what are these values good for? */ - uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT); -#if 0 - uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT); -#endif + val = max(val, (16*1024) >> PAGE_SHIFT); /* Make sure there's always a user page free. */ - if (uvmexp.freemin < uvmexp.reserve_kernel + 1) - uvmexp.freemin = uvmexp.reserve_kernel + 1; - - uvmexp.freetarg = (uvmexp.freemin * 4) / 3; - if (uvmexp.freetarg <= uvmexp.freemin) - uvmexp.freetarg = uvmexp.freemin + 1; - - /* uvmexp.inactarg: computed in main daemon loop */ + if (val < uvmexp.reserve_kernel + 1) + val = uvmexp.reserve_kernel + 1; + uvmexp.freemin = val; + + /* Calculate free target. */ + val = (uvmexp.freemin * 4) / 3; + if (val <= uvmexp.freemin) + val = uvmexp.freemin + 1; + uvmexp.freetarg = val; uvmexp.wiredmax = uvmexp.npages / 3; } @@ -211,15 +205,12 @@ uvm_pageout(void *arg) { struct uvm_constraint_range constraint; struct uvm_pmalloc *pma; - int npages = 0; + int free; /* ensure correct priority and set paging parameters... */ uvm.pagedaemon_proc = curproc; (void) spl0(); - uvm_lock_pageq(); - npages = uvmexp.npages; uvmpd_tune(); - uvm_unlock_pageq(); for (;;) { long size; @@ -245,44 +236,38 @@ uvm_pageout(void *arg) } else constraint = no_constraint; } - + free = uvmexp.free - BUFPAGES_DEFICIT; uvm_unlock_fpageq(); /* * now lock page queues and recompute inactive count */ uvm_lock_pageq(); - if (npages != uvmexp.npages) { /* check for new pages? */ - npages = uvmexp.npages; - uvmpd_tune(); - } - uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3; if (uvmexp.inactarg <= uvmexp.freetarg) { uvmexp.inactarg = uvmexp.freetarg + 1; } + uvm_unlock_pageq(); /* Reclaim pages from the buffer cache if possible. */ size = 0; if (pma != NULL) size += pma->pm_size >> PAGE_SHIFT; - if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg) - size += uvmexp.freetarg - (uvmexp.free - - BUFPAGES_DEFICIT); + if (free < uvmexp.freetarg) + size += uvmexp.freetarg - free; if (size == 0) size = 16; /* XXX */ - uvm_unlock_pageq(); + (void) bufbackoff(&constraint, size * 2); #if NDRM > 0 drmbackoff(size * 2); #endif - uvm_lock_pageq(); - /* * scan if needed */ - if (pma != NULL || - ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) || + uvm_lock_pageq(); + free = uvmexp.free - BUFPAGES_DEFICIT; + if (pma != NULL || (free < uvmexp.freetarg) || ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) { uvmpd_scan(pma, &constraint); } Index: uvm/uvmexp.h =================================================================== RCS file: /cvs/src/sys/uvm/uvmexp.h,v diff -u -p -r1.11 uvmexp.h --- uvm/uvmexp.h 27 Oct 2023 19:18:53 -0000 1.11 +++ uvm/uvmexp.h 17 Mar 2024 17:18:47 -0000 @@ -45,7 +45,9 @@ * I immutable after creation * K kernel lock * F uvm_lock_fpageq + * L uvm_lock_pageq * S uvm_swap_data_lock + * p copy of per-CPU counters, used only by userland. */ struct uvmexp { /* vm_page constants */ @@ -56,8 +58,8 @@ struct uvmexp { /* vm_page counters */ int npages; /* [I] number of pages we manage */ int free; /* [F] number of free pages */ - int active; /* number of active pages */ - int inactive; /* number of pages that we free'd but may want back */ + int active; /* [L] # of active pages */ + int inactive; /* [L] # of pages that we free'd but may want back */ int paging; /* number of pages in the process of being paged out */ int wired; /* number of wired pages */ @@ -69,10 +71,10 @@ struct uvmexp { int vtextpages; /* XXX # of pages used by vtext vnodes */ /* pageout params */ - int freemin; /* min number of free pages */ - int freetarg; /* target number of free pages */ + int freemin; /* [I] min number of free pages */ + int freetarg; /* [I] target number of free pages */ int inactarg; /* target number of inactive pages */ - int wiredmax; /* max number of wired pages */ + int wiredmax; /* [I] max number of wired pages */ int anonmin; /* min threshold for anon pages */ int vtextmin; /* min threshold for vtext pages */ int vnodemin; /* min threshold for vnode pages */ @@ -91,16 +93,16 @@ struct uvmexp { int unused06; /* formerly nfreeanon */ /* stat counters */ - int faults; /* page fault count */ + int faults; /* [p] page fault count */ int traps; /* trap count */ int intrs; /* interrupt count */ int swtch; /* context switch count */ int softs; /* software interrupt count */ int syscalls; /* system calls */ - int pageins; /* pagein operation count */ + int pageins; /* [p] pagein operation count */ /* pageouts are in pdpageouts below */ - int unused07; /* formerly obsolete_swapins */ - int unused08; /* formerly obsolete_swapouts */ + int unused07; /* formerly obsolete_swapins */ + int unused08; /* formerly obsolete_swapouts */ int pgswapin; /* pages swapped in */ int pgswapout; /* pages swapped out */ int forks; /* forks */ @@ -113,28 +115,28 @@ struct uvmexp { int unused09; /* formerly zeroaborts */ /* fault subcounters */ - int fltnoram; /* number of times fault was out of ram */ - int fltnoanon; /* number of times fault was out of anons */ - int fltnoamap; /* number of times fault was out of amap chunks */ - int fltpgwait; /* number of times fault had to wait on a page */ - int fltpgrele; /* number of times fault found a released page */ - int fltrelck; /* number of times fault relock called */ - int fltrelckok; /* number of times fault relock is a success */ - int fltanget; /* number of times fault gets anon page */ - int fltanretry; /* number of times fault retrys an anon get */ - int fltamcopy; /* number of times fault clears "needs copy" */ - int fltnamap; /* number of times fault maps a neighbor anon page */ - int fltnomap; /* number of times fault maps a neighbor obj page */ - int fltlget; /* number of times fault does a locked pgo_get */ - int fltget; /* number of times fault does an unlocked get */ - int flt_anon; /* number of times fault anon (case 1a) */ - int flt_acow; /* number of times fault anon cow (case 1b) */ - int flt_obj; /* number of times fault is on object page (2a) */ - int flt_prcopy; /* number of times fault promotes with copy (2b) */ - int flt_przero; /* number of times fault promotes with zerofill (2b) */ + int fltnoram; /* [p] # of times fault was out of ram */ + int fltnoanon; /* [p] # of times fault was out of anons */ + int fltnoamap; /* [p] # of times fault was out of amap chunks */ + int fltpgwait; /* [p] # of times fault had to wait on a page */ + int fltpgrele; /* [p] # of times fault found a released page */ + int fltrelck; /* [p] # of times fault relock called */ + int fltrelckok; /* [p] # of times fault relock is a success */ + int fltanget; /* [p] # of times fault gets anon page */ + int fltanretry; /* [p] # of times fault retrys an anon get */ + int fltamcopy; /* [p] # of times fault clears "needs copy" */ + int fltnamap; /* [p] # of times fault maps a neighbor anon page */ + int fltnomap; /* [p] # of times fault maps a neighbor obj page */ + int fltlget; /* [p] # of times fault does a locked pgo_get */ + int fltget; /* [p] # of times fault does an unlocked get */ + int flt_anon; /* [p] # of times fault anon (case 1a) */ + int flt_acow; /* [p] # of times fault anon cow (case 1b) */ + int flt_obj; /* [p] # of times fault is on object page (2a) */ + int flt_prcopy; /* [p] # of times fault promotes with copy (2b) */ + int flt_przero; /* [p] # of times fault promotes with zerofill (2b) */ /* daemon counters */ - int pdwoke; /* number of times daemon woke up */ + int pdwoke; /* [F] # of times daemon woke up */ int pdrevs; /* number of times daemon rev'd clock hand */ int pdswout; /* number of times daemon called for swapout */ int pdfreed; /* number of pages daemon freed since boot */