From: Mark Kettenis Subject: Re: uvm_pageout() cleanup To: Martin Pieuchot Cc: tech@openbsd.org Date: Sat, 23 Mar 2024 13:06:46 +0100 > Date: Sat, 23 Mar 2024 12:41:46 +0100 > From: Martin Pieuchot > > On 18/03/24(Mon) 17:51, Martin Pieuchot wrote: > > It isn't clear to me how concurrent accesses to uvmexp.* variables > > should be protected and/or dealt with. The diff below is a step towards > > documenting and cleaning some of this mess. It does the following: > > > > - Stop calling uvmpd_tune() inside uvm_pageout(). We currently do not > > support adding RAM. `uvmexp.npages' is immutable after boot. This > > helps thinking about the globals it modifies. > > > > - Cleanup uvmpd_tune() and document that `uvmexp.freemin' and > > `uvmexp.freetarg' are immutable. > > > > - Try to read `uvmexp.free' as much as possible under `uvm_fpageq_lock'. > > > > - Reduce the scope of the `uvm_pageq_lock' lock. It serializes accesses > > to `uvmexp.active' and `uvmexp.inactive'. > > > > - Document which locks/mechanisms are protecting some fields in struct uvmexp. > > > > I'm still unhappy about the accesses to `uvmexp.inactive' and > > `uvmexp.inactarg' inside uvm_pmr_getpages() but I don't want to grab > > another global mutex there as it is a highly contended code path. So > > how to proceed is open to discussion. However I still think the diff > > below is a step forward. > > > > ok? > > Anyone? Sure. I can't really see us hotplugging physmem. Except perhaps in virtual machines. ok kettenis@ > > Index: uvm/uvm_pdaemon.c > > =================================================================== > > RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v > > diff -u -p -r1.109 uvm_pdaemon.c > > --- uvm/uvm_pdaemon.c 27 Oct 2023 19:18:53 -0000 1.109 > > +++ uvm/uvm_pdaemon.c 17 Mar 2024 17:14:54 -0000 > > @@ -165,33 +165,27 @@ uvm_wait(const char *wmsg) > > > > /* > > * uvmpd_tune: tune paging parameters > > - * > > - * => called whenever memory is added to (or removed from?) the system > > - * => caller must call with page queues locked > > */ > > - > > void > > uvmpd_tune(void) > > { > > + int val; > > > > - uvmexp.freemin = uvmexp.npages / 30; > > + val = uvmexp.npages / 30; > > > > - /* between 16k and 512k */ > > /* XXX: what are these values good for? */ > > - uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT); > > -#if 0 > > - uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT); > > -#endif > > + val = max(val, (16*1024) >> PAGE_SHIFT); > > > > /* Make sure there's always a user page free. */ > > - if (uvmexp.freemin < uvmexp.reserve_kernel + 1) > > - uvmexp.freemin = uvmexp.reserve_kernel + 1; > > - > > - uvmexp.freetarg = (uvmexp.freemin * 4) / 3; > > - if (uvmexp.freetarg <= uvmexp.freemin) > > - uvmexp.freetarg = uvmexp.freemin + 1; > > - > > - /* uvmexp.inactarg: computed in main daemon loop */ > > + if (val < uvmexp.reserve_kernel + 1) > > + val = uvmexp.reserve_kernel + 1; > > + uvmexp.freemin = val; > > + > > + /* Calculate free target. */ > > + val = (uvmexp.freemin * 4) / 3; > > + if (val <= uvmexp.freemin) > > + val = uvmexp.freemin + 1; > > + uvmexp.freetarg = val; > > > > uvmexp.wiredmax = uvmexp.npages / 3; > > } > > @@ -211,15 +205,12 @@ uvm_pageout(void *arg) > > { > > struct uvm_constraint_range constraint; > > struct uvm_pmalloc *pma; > > - int npages = 0; > > + int free; > > > > /* ensure correct priority and set paging parameters... */ > > uvm.pagedaemon_proc = curproc; > > (void) spl0(); > > - uvm_lock_pageq(); > > - npages = uvmexp.npages; > > uvmpd_tune(); > > - uvm_unlock_pageq(); > > > > for (;;) { > > long size; > > @@ -245,44 +236,38 @@ uvm_pageout(void *arg) > > } else > > constraint = no_constraint; > > } > > - > > + free = uvmexp.free - BUFPAGES_DEFICIT; > > uvm_unlock_fpageq(); > > > > /* > > * now lock page queues and recompute inactive count > > */ > > uvm_lock_pageq(); > > - if (npages != uvmexp.npages) { /* check for new pages? */ > > - npages = uvmexp.npages; > > - uvmpd_tune(); > > - } > > - > > uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3; > > if (uvmexp.inactarg <= uvmexp.freetarg) { > > uvmexp.inactarg = uvmexp.freetarg + 1; > > } > > + uvm_unlock_pageq(); > > > > /* Reclaim pages from the buffer cache if possible. */ > > size = 0; > > if (pma != NULL) > > size += pma->pm_size >> PAGE_SHIFT; > > - if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg) > > - size += uvmexp.freetarg - (uvmexp.free - > > - BUFPAGES_DEFICIT); > > + if (free < uvmexp.freetarg) > > + size += uvmexp.freetarg - free; > > if (size == 0) > > size = 16; /* XXX */ > > - uvm_unlock_pageq(); > > + > > (void) bufbackoff(&constraint, size * 2); > > #if NDRM > 0 > > drmbackoff(size * 2); > > #endif > > - uvm_lock_pageq(); > > - > > /* > > * scan if needed > > */ > > - if (pma != NULL || > > - ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) || > > + uvm_lock_pageq(); > > + free = uvmexp.free - BUFPAGES_DEFICIT; > > + if (pma != NULL || (free < uvmexp.freetarg) || > > ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) { > > uvmpd_scan(pma, &constraint); > > } > > Index: uvm/uvmexp.h > > =================================================================== > > RCS file: /cvs/src/sys/uvm/uvmexp.h,v > > diff -u -p -r1.11 uvmexp.h > > --- uvm/uvmexp.h 27 Oct 2023 19:18:53 -0000 1.11 > > +++ uvm/uvmexp.h 17 Mar 2024 17:18:47 -0000 > > @@ -45,7 +45,9 @@ > > * I immutable after creation > > * K kernel lock > > * F uvm_lock_fpageq > > + * L uvm_lock_pageq > > * S uvm_swap_data_lock > > + * p copy of per-CPU counters, used only by userland. > > */ > > struct uvmexp { > > /* vm_page constants */ > > @@ -56,8 +58,8 @@ struct uvmexp { > > /* vm_page counters */ > > int npages; /* [I] number of pages we manage */ > > int free; /* [F] number of free pages */ > > - int active; /* number of active pages */ > > - int inactive; /* number of pages that we free'd but may want back */ > > + int active; /* [L] # of active pages */ > > + int inactive; /* [L] # of pages that we free'd but may want back */ > > int paging; /* number of pages in the process of being paged out */ > > int wired; /* number of wired pages */ > > > > @@ -69,10 +71,10 @@ struct uvmexp { > > int vtextpages; /* XXX # of pages used by vtext vnodes */ > > > > /* pageout params */ > > - int freemin; /* min number of free pages */ > > - int freetarg; /* target number of free pages */ > > + int freemin; /* [I] min number of free pages */ > > + int freetarg; /* [I] target number of free pages */ > > int inactarg; /* target number of inactive pages */ > > - int wiredmax; /* max number of wired pages */ > > + int wiredmax; /* [I] max number of wired pages */ > > int anonmin; /* min threshold for anon pages */ > > int vtextmin; /* min threshold for vtext pages */ > > int vnodemin; /* min threshold for vnode pages */ > > @@ -91,16 +93,16 @@ struct uvmexp { > > int unused06; /* formerly nfreeanon */ > > > > /* stat counters */ > > - int faults; /* page fault count */ > > + int faults; /* [p] page fault count */ > > int traps; /* trap count */ > > int intrs; /* interrupt count */ > > int swtch; /* context switch count */ > > int softs; /* software interrupt count */ > > int syscalls; /* system calls */ > > - int pageins; /* pagein operation count */ > > + int pageins; /* [p] pagein operation count */ > > /* pageouts are in pdpageouts below */ > > - int unused07; /* formerly obsolete_swapins */ > > - int unused08; /* formerly obsolete_swapouts */ > > + int unused07; /* formerly obsolete_swapins */ > > + int unused08; /* formerly obsolete_swapouts */ > > int pgswapin; /* pages swapped in */ > > int pgswapout; /* pages swapped out */ > > int forks; /* forks */ > > @@ -113,28 +115,28 @@ struct uvmexp { > > int unused09; /* formerly zeroaborts */ > > > > /* fault subcounters */ > > - int fltnoram; /* number of times fault was out of ram */ > > - int fltnoanon; /* number of times fault was out of anons */ > > - int fltnoamap; /* number of times fault was out of amap chunks */ > > - int fltpgwait; /* number of times fault had to wait on a page */ > > - int fltpgrele; /* number of times fault found a released page */ > > - int fltrelck; /* number of times fault relock called */ > > - int fltrelckok; /* number of times fault relock is a success */ > > - int fltanget; /* number of times fault gets anon page */ > > - int fltanretry; /* number of times fault retrys an anon get */ > > - int fltamcopy; /* number of times fault clears "needs copy" */ > > - int fltnamap; /* number of times fault maps a neighbor anon page */ > > - int fltnomap; /* number of times fault maps a neighbor obj page */ > > - int fltlget; /* number of times fault does a locked pgo_get */ > > - int fltget; /* number of times fault does an unlocked get */ > > - int flt_anon; /* number of times fault anon (case 1a) */ > > - int flt_acow; /* number of times fault anon cow (case 1b) */ > > - int flt_obj; /* number of times fault is on object page (2a) */ > > - int flt_prcopy; /* number of times fault promotes with copy (2b) */ > > - int flt_przero; /* number of times fault promotes with zerofill (2b) */ > > + int fltnoram; /* [p] # of times fault was out of ram */ > > + int fltnoanon; /* [p] # of times fault was out of anons */ > > + int fltnoamap; /* [p] # of times fault was out of amap chunks */ > > + int fltpgwait; /* [p] # of times fault had to wait on a page */ > > + int fltpgrele; /* [p] # of times fault found a released page */ > > + int fltrelck; /* [p] # of times fault relock called */ > > + int fltrelckok; /* [p] # of times fault relock is a success */ > > + int fltanget; /* [p] # of times fault gets anon page */ > > + int fltanretry; /* [p] # of times fault retrys an anon get */ > > + int fltamcopy; /* [p] # of times fault clears "needs copy" */ > > + int fltnamap; /* [p] # of times fault maps a neighbor anon page */ > > + int fltnomap; /* [p] # of times fault maps a neighbor obj page */ > > + int fltlget; /* [p] # of times fault does a locked pgo_get */ > > + int fltget; /* [p] # of times fault does an unlocked get */ > > + int flt_anon; /* [p] # of times fault anon (case 1a) */ > > + int flt_acow; /* [p] # of times fault anon cow (case 1b) */ > > + int flt_obj; /* [p] # of times fault is on object page (2a) */ > > + int flt_prcopy; /* [p] # of times fault promotes with copy (2b) */ > > + int flt_przero; /* [p] # of times fault promotes with zerofill (2b) */ > > > > /* daemon counters */ > > - int pdwoke; /* number of times daemon woke up */ > > + int pdwoke; /* [F] # of times daemon woke up */ > > int pdrevs; /* number of times daemon rev'd clock hand */ > > int pdswout; /* number of times daemon called for swapout */ > > int pdfreed; /* number of pages daemon freed since boot */ > > > >