Download raw body.
uvm_pageout() cleanup
> Date: Sat, 23 Mar 2024 12:41:46 +0100
> From: Martin Pieuchot <mpi@openbsd.org>
>
> On 18/03/24(Mon) 17:51, Martin Pieuchot wrote:
> > It isn't clear to me how concurrent accesses to uvmexp.* variables
> > should be protected and/or dealt with. The diff below is a step towards
> > documenting and cleaning some of this mess. It does the following:
> >
> > - Stop calling uvmpd_tune() inside uvm_pageout(). We currently do not
> > support adding RAM. `uvmexp.npages' is immutable after boot. This
> > helps thinking about the globals it modifies.
> >
> > - Cleanup uvmpd_tune() and document that `uvmexp.freemin' and
> > `uvmexp.freetarg' are immutable.
> >
> > - Try to read `uvmexp.free' as much as possible under `uvm_fpageq_lock'.
> >
> > - Reduce the scope of the `uvm_pageq_lock' lock. It serializes accesses
> > to `uvmexp.active' and `uvmexp.inactive'.
> >
> > - Document which locks/mechanisms are protecting some fields in struct uvmexp.
> >
> > I'm still unhappy about the accesses to `uvmexp.inactive' and
> > `uvmexp.inactarg' inside uvm_pmr_getpages() but I don't want to grab
> > another global mutex there as it is a highly contended code path. So
> > how to proceed is open to discussion. However I still think the diff
> > below is a step forward.
> >
> > ok?
>
> Anyone?
Sure. I can't really see us hotplugging physmem. Except perhaps in
virtual machines.
ok kettenis@
> > Index: uvm/uvm_pdaemon.c
> > ===================================================================
> > RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> > diff -u -p -r1.109 uvm_pdaemon.c
> > --- uvm/uvm_pdaemon.c 27 Oct 2023 19:18:53 -0000 1.109
> > +++ uvm/uvm_pdaemon.c 17 Mar 2024 17:14:54 -0000
> > @@ -165,33 +165,27 @@ uvm_wait(const char *wmsg)
> >
> > /*
> > * uvmpd_tune: tune paging parameters
> > - *
> > - * => called whenever memory is added to (or removed from?) the system
> > - * => caller must call with page queues locked
> > */
> > -
> > void
> > uvmpd_tune(void)
> > {
> > + int val;
> >
> > - uvmexp.freemin = uvmexp.npages / 30;
> > + val = uvmexp.npages / 30;
> >
> > - /* between 16k and 512k */
> > /* XXX: what are these values good for? */
> > - uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
> > -#if 0
> > - uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
> > -#endif
> > + val = max(val, (16*1024) >> PAGE_SHIFT);
> >
> > /* Make sure there's always a user page free. */
> > - if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
> > - uvmexp.freemin = uvmexp.reserve_kernel + 1;
> > -
> > - uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
> > - if (uvmexp.freetarg <= uvmexp.freemin)
> > - uvmexp.freetarg = uvmexp.freemin + 1;
> > -
> > - /* uvmexp.inactarg: computed in main daemon loop */
> > + if (val < uvmexp.reserve_kernel + 1)
> > + val = uvmexp.reserve_kernel + 1;
> > + uvmexp.freemin = val;
> > +
> > + /* Calculate free target. */
> > + val = (uvmexp.freemin * 4) / 3;
> > + if (val <= uvmexp.freemin)
> > + val = uvmexp.freemin + 1;
> > + uvmexp.freetarg = val;
> >
> > uvmexp.wiredmax = uvmexp.npages / 3;
> > }
> > @@ -211,15 +205,12 @@ uvm_pageout(void *arg)
> > {
> > struct uvm_constraint_range constraint;
> > struct uvm_pmalloc *pma;
> > - int npages = 0;
> > + int free;
> >
> > /* ensure correct priority and set paging parameters... */
> > uvm.pagedaemon_proc = curproc;
> > (void) spl0();
> > - uvm_lock_pageq();
> > - npages = uvmexp.npages;
> > uvmpd_tune();
> > - uvm_unlock_pageq();
> >
> > for (;;) {
> > long size;
> > @@ -245,44 +236,38 @@ uvm_pageout(void *arg)
> > } else
> > constraint = no_constraint;
> > }
> > -
> > + free = uvmexp.free - BUFPAGES_DEFICIT;
> > uvm_unlock_fpageq();
> >
> > /*
> > * now lock page queues and recompute inactive count
> > */
> > uvm_lock_pageq();
> > - if (npages != uvmexp.npages) { /* check for new pages? */
> > - npages = uvmexp.npages;
> > - uvmpd_tune();
> > - }
> > -
> > uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
> > if (uvmexp.inactarg <= uvmexp.freetarg) {
> > uvmexp.inactarg = uvmexp.freetarg + 1;
> > }
> > + uvm_unlock_pageq();
> >
> > /* Reclaim pages from the buffer cache if possible. */
> > size = 0;
> > if (pma != NULL)
> > size += pma->pm_size >> PAGE_SHIFT;
> > - if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
> > - size += uvmexp.freetarg - (uvmexp.free -
> > - BUFPAGES_DEFICIT);
> > + if (free < uvmexp.freetarg)
> > + size += uvmexp.freetarg - free;
> > if (size == 0)
> > size = 16; /* XXX */
> > - uvm_unlock_pageq();
> > +
> > (void) bufbackoff(&constraint, size * 2);
> > #if NDRM > 0
> > drmbackoff(size * 2);
> > #endif
> > - uvm_lock_pageq();
> > -
> > /*
> > * scan if needed
> > */
> > - if (pma != NULL ||
> > - ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
> > + uvm_lock_pageq();
> > + free = uvmexp.free - BUFPAGES_DEFICIT;
> > + if (pma != NULL || (free < uvmexp.freetarg) ||
> > ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
> > uvmpd_scan(pma, &constraint);
> > }
> > Index: uvm/uvmexp.h
> > ===================================================================
> > RCS file: /cvs/src/sys/uvm/uvmexp.h,v
> > diff -u -p -r1.11 uvmexp.h
> > --- uvm/uvmexp.h 27 Oct 2023 19:18:53 -0000 1.11
> > +++ uvm/uvmexp.h 17 Mar 2024 17:18:47 -0000
> > @@ -45,7 +45,9 @@
> > * I immutable after creation
> > * K kernel lock
> > * F uvm_lock_fpageq
> > + * L uvm_lock_pageq
> > * S uvm_swap_data_lock
> > + * p copy of per-CPU counters, used only by userland.
> > */
> > struct uvmexp {
> > /* vm_page constants */
> > @@ -56,8 +58,8 @@ struct uvmexp {
> > /* vm_page counters */
> > int npages; /* [I] number of pages we manage */
> > int free; /* [F] number of free pages */
> > - int active; /* number of active pages */
> > - int inactive; /* number of pages that we free'd but may want back */
> > + int active; /* [L] # of active pages */
> > + int inactive; /* [L] # of pages that we free'd but may want back */
> > int paging; /* number of pages in the process of being paged out */
> > int wired; /* number of wired pages */
> >
> > @@ -69,10 +71,10 @@ struct uvmexp {
> > int vtextpages; /* XXX # of pages used by vtext vnodes */
> >
> > /* pageout params */
> > - int freemin; /* min number of free pages */
> > - int freetarg; /* target number of free pages */
> > + int freemin; /* [I] min number of free pages */
> > + int freetarg; /* [I] target number of free pages */
> > int inactarg; /* target number of inactive pages */
> > - int wiredmax; /* max number of wired pages */
> > + int wiredmax; /* [I] max number of wired pages */
> > int anonmin; /* min threshold for anon pages */
> > int vtextmin; /* min threshold for vtext pages */
> > int vnodemin; /* min threshold for vnode pages */
> > @@ -91,16 +93,16 @@ struct uvmexp {
> > int unused06; /* formerly nfreeanon */
> >
> > /* stat counters */
> > - int faults; /* page fault count */
> > + int faults; /* [p] page fault count */
> > int traps; /* trap count */
> > int intrs; /* interrupt count */
> > int swtch; /* context switch count */
> > int softs; /* software interrupt count */
> > int syscalls; /* system calls */
> > - int pageins; /* pagein operation count */
> > + int pageins; /* [p] pagein operation count */
> > /* pageouts are in pdpageouts below */
> > - int unused07; /* formerly obsolete_swapins */
> > - int unused08; /* formerly obsolete_swapouts */
> > + int unused07; /* formerly obsolete_swapins */
> > + int unused08; /* formerly obsolete_swapouts */
> > int pgswapin; /* pages swapped in */
> > int pgswapout; /* pages swapped out */
> > int forks; /* forks */
> > @@ -113,28 +115,28 @@ struct uvmexp {
> > int unused09; /* formerly zeroaborts */
> >
> > /* fault subcounters */
> > - int fltnoram; /* number of times fault was out of ram */
> > - int fltnoanon; /* number of times fault was out of anons */
> > - int fltnoamap; /* number of times fault was out of amap chunks */
> > - int fltpgwait; /* number of times fault had to wait on a page */
> > - int fltpgrele; /* number of times fault found a released page */
> > - int fltrelck; /* number of times fault relock called */
> > - int fltrelckok; /* number of times fault relock is a success */
> > - int fltanget; /* number of times fault gets anon page */
> > - int fltanretry; /* number of times fault retrys an anon get */
> > - int fltamcopy; /* number of times fault clears "needs copy" */
> > - int fltnamap; /* number of times fault maps a neighbor anon page */
> > - int fltnomap; /* number of times fault maps a neighbor obj page */
> > - int fltlget; /* number of times fault does a locked pgo_get */
> > - int fltget; /* number of times fault does an unlocked get */
> > - int flt_anon; /* number of times fault anon (case 1a) */
> > - int flt_acow; /* number of times fault anon cow (case 1b) */
> > - int flt_obj; /* number of times fault is on object page (2a) */
> > - int flt_prcopy; /* number of times fault promotes with copy (2b) */
> > - int flt_przero; /* number of times fault promotes with zerofill (2b) */
> > + int fltnoram; /* [p] # of times fault was out of ram */
> > + int fltnoanon; /* [p] # of times fault was out of anons */
> > + int fltnoamap; /* [p] # of times fault was out of amap chunks */
> > + int fltpgwait; /* [p] # of times fault had to wait on a page */
> > + int fltpgrele; /* [p] # of times fault found a released page */
> > + int fltrelck; /* [p] # of times fault relock called */
> > + int fltrelckok; /* [p] # of times fault relock is a success */
> > + int fltanget; /* [p] # of times fault gets anon page */
> > + int fltanretry; /* [p] # of times fault retrys an anon get */
> > + int fltamcopy; /* [p] # of times fault clears "needs copy" */
> > + int fltnamap; /* [p] # of times fault maps a neighbor anon page */
> > + int fltnomap; /* [p] # of times fault maps a neighbor obj page */
> > + int fltlget; /* [p] # of times fault does a locked pgo_get */
> > + int fltget; /* [p] # of times fault does an unlocked get */
> > + int flt_anon; /* [p] # of times fault anon (case 1a) */
> > + int flt_acow; /* [p] # of times fault anon cow (case 1b) */
> > + int flt_obj; /* [p] # of times fault is on object page (2a) */
> > + int flt_prcopy; /* [p] # of times fault promotes with copy (2b) */
> > + int flt_przero; /* [p] # of times fault promotes with zerofill (2b) */
> >
> > /* daemon counters */
> > - int pdwoke; /* number of times daemon woke up */
> > + int pdwoke; /* [F] # of times daemon woke up */
> > int pdrevs; /* number of times daemon rev'd clock hand */
> > int pdswout; /* number of times daemon called for swapout */
> > int pdfreed; /* number of pages daemon freed since boot */
> >
>
>
uvm_pageout() cleanup