From: Martin Pieuchot Subject: Re: uvm_pageout() cleanup To: tech@openbsd.org Date: Sat, 23 Mar 2024 12:41:46 +0100 On 18/03/24(Mon) 17:51, Martin Pieuchot wrote: > It isn't clear to me how concurrent accesses to uvmexp.* variables > should be protected and/or dealt with. The diff below is a step towards > documenting and cleaning some of this mess. It does the following: > > - Stop calling uvmpd_tune() inside uvm_pageout(). We currently do not > support adding RAM. `uvmexp.npages' is immutable after boot. This > helps thinking about the globals it modifies. > > - Cleanup uvmpd_tune() and document that `uvmexp.freemin' and > `uvmexp.freetarg' are immutable. > > - Try to read `uvmexp.free' as much as possible under `uvm_fpageq_lock'. > > - Reduce the scope of the `uvm_pageq_lock' lock. It serializes accesses > to `uvmexp.active' and `uvmexp.inactive'. > > - Document which locks/mechanisms are protecting some fields in struct uvmexp. > > I'm still unhappy about the accesses to `uvmexp.inactive' and > `uvmexp.inactarg' inside uvm_pmr_getpages() but I don't want to grab > another global mutex there as it is a highly contended code path. So > how to proceed is open to discussion. However I still think the diff > below is a step forward. > > ok? Anyone? > Index: uvm/uvm_pdaemon.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v > diff -u -p -r1.109 uvm_pdaemon.c > --- uvm/uvm_pdaemon.c 27 Oct 2023 19:18:53 -0000 1.109 > +++ uvm/uvm_pdaemon.c 17 Mar 2024 17:14:54 -0000 > @@ -165,33 +165,27 @@ uvm_wait(const char *wmsg) > > /* > * uvmpd_tune: tune paging parameters > - * > - * => called whenever memory is added to (or removed from?) the system > - * => caller must call with page queues locked > */ > - > void > uvmpd_tune(void) > { > + int val; > > - uvmexp.freemin = uvmexp.npages / 30; > + val = uvmexp.npages / 30; > > - /* between 16k and 512k */ > /* XXX: what are these values good for? */ > - uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT); > -#if 0 > - uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT); > -#endif > + val = max(val, (16*1024) >> PAGE_SHIFT); > > /* Make sure there's always a user page free. */ > - if (uvmexp.freemin < uvmexp.reserve_kernel + 1) > - uvmexp.freemin = uvmexp.reserve_kernel + 1; > - > - uvmexp.freetarg = (uvmexp.freemin * 4) / 3; > - if (uvmexp.freetarg <= uvmexp.freemin) > - uvmexp.freetarg = uvmexp.freemin + 1; > - > - /* uvmexp.inactarg: computed in main daemon loop */ > + if (val < uvmexp.reserve_kernel + 1) > + val = uvmexp.reserve_kernel + 1; > + uvmexp.freemin = val; > + > + /* Calculate free target. */ > + val = (uvmexp.freemin * 4) / 3; > + if (val <= uvmexp.freemin) > + val = uvmexp.freemin + 1; > + uvmexp.freetarg = val; > > uvmexp.wiredmax = uvmexp.npages / 3; > } > @@ -211,15 +205,12 @@ uvm_pageout(void *arg) > { > struct uvm_constraint_range constraint; > struct uvm_pmalloc *pma; > - int npages = 0; > + int free; > > /* ensure correct priority and set paging parameters... */ > uvm.pagedaemon_proc = curproc; > (void) spl0(); > - uvm_lock_pageq(); > - npages = uvmexp.npages; > uvmpd_tune(); > - uvm_unlock_pageq(); > > for (;;) { > long size; > @@ -245,44 +236,38 @@ uvm_pageout(void *arg) > } else > constraint = no_constraint; > } > - > + free = uvmexp.free - BUFPAGES_DEFICIT; > uvm_unlock_fpageq(); > > /* > * now lock page queues and recompute inactive count > */ > uvm_lock_pageq(); > - if (npages != uvmexp.npages) { /* check for new pages? */ > - npages = uvmexp.npages; > - uvmpd_tune(); > - } > - > uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3; > if (uvmexp.inactarg <= uvmexp.freetarg) { > uvmexp.inactarg = uvmexp.freetarg + 1; > } > + uvm_unlock_pageq(); > > /* Reclaim pages from the buffer cache if possible. */ > size = 0; > if (pma != NULL) > size += pma->pm_size >> PAGE_SHIFT; > - if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg) > - size += uvmexp.freetarg - (uvmexp.free - > - BUFPAGES_DEFICIT); > + if (free < uvmexp.freetarg) > + size += uvmexp.freetarg - free; > if (size == 0) > size = 16; /* XXX */ > - uvm_unlock_pageq(); > + > (void) bufbackoff(&constraint, size * 2); > #if NDRM > 0 > drmbackoff(size * 2); > #endif > - uvm_lock_pageq(); > - > /* > * scan if needed > */ > - if (pma != NULL || > - ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) || > + uvm_lock_pageq(); > + free = uvmexp.free - BUFPAGES_DEFICIT; > + if (pma != NULL || (free < uvmexp.freetarg) || > ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) { > uvmpd_scan(pma, &constraint); > } > Index: uvm/uvmexp.h > =================================================================== > RCS file: /cvs/src/sys/uvm/uvmexp.h,v > diff -u -p -r1.11 uvmexp.h > --- uvm/uvmexp.h 27 Oct 2023 19:18:53 -0000 1.11 > +++ uvm/uvmexp.h 17 Mar 2024 17:18:47 -0000 > @@ -45,7 +45,9 @@ > * I immutable after creation > * K kernel lock > * F uvm_lock_fpageq > + * L uvm_lock_pageq > * S uvm_swap_data_lock > + * p copy of per-CPU counters, used only by userland. > */ > struct uvmexp { > /* vm_page constants */ > @@ -56,8 +58,8 @@ struct uvmexp { > /* vm_page counters */ > int npages; /* [I] number of pages we manage */ > int free; /* [F] number of free pages */ > - int active; /* number of active pages */ > - int inactive; /* number of pages that we free'd but may want back */ > + int active; /* [L] # of active pages */ > + int inactive; /* [L] # of pages that we free'd but may want back */ > int paging; /* number of pages in the process of being paged out */ > int wired; /* number of wired pages */ > > @@ -69,10 +71,10 @@ struct uvmexp { > int vtextpages; /* XXX # of pages used by vtext vnodes */ > > /* pageout params */ > - int freemin; /* min number of free pages */ > - int freetarg; /* target number of free pages */ > + int freemin; /* [I] min number of free pages */ > + int freetarg; /* [I] target number of free pages */ > int inactarg; /* target number of inactive pages */ > - int wiredmax; /* max number of wired pages */ > + int wiredmax; /* [I] max number of wired pages */ > int anonmin; /* min threshold for anon pages */ > int vtextmin; /* min threshold for vtext pages */ > int vnodemin; /* min threshold for vnode pages */ > @@ -91,16 +93,16 @@ struct uvmexp { > int unused06; /* formerly nfreeanon */ > > /* stat counters */ > - int faults; /* page fault count */ > + int faults; /* [p] page fault count */ > int traps; /* trap count */ > int intrs; /* interrupt count */ > int swtch; /* context switch count */ > int softs; /* software interrupt count */ > int syscalls; /* system calls */ > - int pageins; /* pagein operation count */ > + int pageins; /* [p] pagein operation count */ > /* pageouts are in pdpageouts below */ > - int unused07; /* formerly obsolete_swapins */ > - int unused08; /* formerly obsolete_swapouts */ > + int unused07; /* formerly obsolete_swapins */ > + int unused08; /* formerly obsolete_swapouts */ > int pgswapin; /* pages swapped in */ > int pgswapout; /* pages swapped out */ > int forks; /* forks */ > @@ -113,28 +115,28 @@ struct uvmexp { > int unused09; /* formerly zeroaborts */ > > /* fault subcounters */ > - int fltnoram; /* number of times fault was out of ram */ > - int fltnoanon; /* number of times fault was out of anons */ > - int fltnoamap; /* number of times fault was out of amap chunks */ > - int fltpgwait; /* number of times fault had to wait on a page */ > - int fltpgrele; /* number of times fault found a released page */ > - int fltrelck; /* number of times fault relock called */ > - int fltrelckok; /* number of times fault relock is a success */ > - int fltanget; /* number of times fault gets anon page */ > - int fltanretry; /* number of times fault retrys an anon get */ > - int fltamcopy; /* number of times fault clears "needs copy" */ > - int fltnamap; /* number of times fault maps a neighbor anon page */ > - int fltnomap; /* number of times fault maps a neighbor obj page */ > - int fltlget; /* number of times fault does a locked pgo_get */ > - int fltget; /* number of times fault does an unlocked get */ > - int flt_anon; /* number of times fault anon (case 1a) */ > - int flt_acow; /* number of times fault anon cow (case 1b) */ > - int flt_obj; /* number of times fault is on object page (2a) */ > - int flt_prcopy; /* number of times fault promotes with copy (2b) */ > - int flt_przero; /* number of times fault promotes with zerofill (2b) */ > + int fltnoram; /* [p] # of times fault was out of ram */ > + int fltnoanon; /* [p] # of times fault was out of anons */ > + int fltnoamap; /* [p] # of times fault was out of amap chunks */ > + int fltpgwait; /* [p] # of times fault had to wait on a page */ > + int fltpgrele; /* [p] # of times fault found a released page */ > + int fltrelck; /* [p] # of times fault relock called */ > + int fltrelckok; /* [p] # of times fault relock is a success */ > + int fltanget; /* [p] # of times fault gets anon page */ > + int fltanretry; /* [p] # of times fault retrys an anon get */ > + int fltamcopy; /* [p] # of times fault clears "needs copy" */ > + int fltnamap; /* [p] # of times fault maps a neighbor anon page */ > + int fltnomap; /* [p] # of times fault maps a neighbor obj page */ > + int fltlget; /* [p] # of times fault does a locked pgo_get */ > + int fltget; /* [p] # of times fault does an unlocked get */ > + int flt_anon; /* [p] # of times fault anon (case 1a) */ > + int flt_acow; /* [p] # of times fault anon cow (case 1b) */ > + int flt_obj; /* [p] # of times fault is on object page (2a) */ > + int flt_prcopy; /* [p] # of times fault promotes with copy (2b) */ > + int flt_przero; /* [p] # of times fault promotes with zerofill (2b) */ > > /* daemon counters */ > - int pdwoke; /* number of times daemon woke up */ > + int pdwoke; /* [F] # of times daemon woke up */ > int pdrevs; /* number of times daemon rev'd clock hand */ > int pdswout; /* number of times daemon called for swapout */ > int pdfreed; /* number of pages daemon freed since boot */ >