Download raw body.
uvm_pageout() cleanup
On 18/03/24(Mon) 17:51, Martin Pieuchot wrote:
> It isn't clear to me how concurrent accesses to uvmexp.* variables
> should be protected and/or dealt with. The diff below is a step towards
> documenting and cleaning some of this mess. It does the following:
>
> - Stop calling uvmpd_tune() inside uvm_pageout(). We currently do not
> support adding RAM. `uvmexp.npages' is immutable after boot. This
> helps thinking about the globals it modifies.
>
> - Cleanup uvmpd_tune() and document that `uvmexp.freemin' and
> `uvmexp.freetarg' are immutable.
>
> - Try to read `uvmexp.free' as much as possible under `uvm_fpageq_lock'.
>
> - Reduce the scope of the `uvm_pageq_lock' lock. It serializes accesses
> to `uvmexp.active' and `uvmexp.inactive'.
>
> - Document which locks/mechanisms are protecting some fields in struct uvmexp.
>
> I'm still unhappy about the accesses to `uvmexp.inactive' and
> `uvmexp.inactarg' inside uvm_pmr_getpages() but I don't want to grab
> another global mutex there as it is a highly contended code path. So
> how to proceed is open to discussion. However I still think the diff
> below is a step forward.
>
> ok?
Anyone?
> Index: uvm/uvm_pdaemon.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> diff -u -p -r1.109 uvm_pdaemon.c
> --- uvm/uvm_pdaemon.c 27 Oct 2023 19:18:53 -0000 1.109
> +++ uvm/uvm_pdaemon.c 17 Mar 2024 17:14:54 -0000
> @@ -165,33 +165,27 @@ uvm_wait(const char *wmsg)
>
> /*
> * uvmpd_tune: tune paging parameters
> - *
> - * => called whenever memory is added to (or removed from?) the system
> - * => caller must call with page queues locked
> */
> -
> void
> uvmpd_tune(void)
> {
> + int val;
>
> - uvmexp.freemin = uvmexp.npages / 30;
> + val = uvmexp.npages / 30;
>
> - /* between 16k and 512k */
> /* XXX: what are these values good for? */
> - uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
> -#if 0
> - uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
> -#endif
> + val = max(val, (16*1024) >> PAGE_SHIFT);
>
> /* Make sure there's always a user page free. */
> - if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
> - uvmexp.freemin = uvmexp.reserve_kernel + 1;
> -
> - uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
> - if (uvmexp.freetarg <= uvmexp.freemin)
> - uvmexp.freetarg = uvmexp.freemin + 1;
> -
> - /* uvmexp.inactarg: computed in main daemon loop */
> + if (val < uvmexp.reserve_kernel + 1)
> + val = uvmexp.reserve_kernel + 1;
> + uvmexp.freemin = val;
> +
> + /* Calculate free target. */
> + val = (uvmexp.freemin * 4) / 3;
> + if (val <= uvmexp.freemin)
> + val = uvmexp.freemin + 1;
> + uvmexp.freetarg = val;
>
> uvmexp.wiredmax = uvmexp.npages / 3;
> }
> @@ -211,15 +205,12 @@ uvm_pageout(void *arg)
> {
> struct uvm_constraint_range constraint;
> struct uvm_pmalloc *pma;
> - int npages = 0;
> + int free;
>
> /* ensure correct priority and set paging parameters... */
> uvm.pagedaemon_proc = curproc;
> (void) spl0();
> - uvm_lock_pageq();
> - npages = uvmexp.npages;
> uvmpd_tune();
> - uvm_unlock_pageq();
>
> for (;;) {
> long size;
> @@ -245,44 +236,38 @@ uvm_pageout(void *arg)
> } else
> constraint = no_constraint;
> }
> -
> + free = uvmexp.free - BUFPAGES_DEFICIT;
> uvm_unlock_fpageq();
>
> /*
> * now lock page queues and recompute inactive count
> */
> uvm_lock_pageq();
> - if (npages != uvmexp.npages) { /* check for new pages? */
> - npages = uvmexp.npages;
> - uvmpd_tune();
> - }
> -
> uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
> if (uvmexp.inactarg <= uvmexp.freetarg) {
> uvmexp.inactarg = uvmexp.freetarg + 1;
> }
> + uvm_unlock_pageq();
>
> /* Reclaim pages from the buffer cache if possible. */
> size = 0;
> if (pma != NULL)
> size += pma->pm_size >> PAGE_SHIFT;
> - if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
> - size += uvmexp.freetarg - (uvmexp.free -
> - BUFPAGES_DEFICIT);
> + if (free < uvmexp.freetarg)
> + size += uvmexp.freetarg - free;
> if (size == 0)
> size = 16; /* XXX */
> - uvm_unlock_pageq();
> +
> (void) bufbackoff(&constraint, size * 2);
> #if NDRM > 0
> drmbackoff(size * 2);
> #endif
> - uvm_lock_pageq();
> -
> /*
> * scan if needed
> */
> - if (pma != NULL ||
> - ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
> + uvm_lock_pageq();
> + free = uvmexp.free - BUFPAGES_DEFICIT;
> + if (pma != NULL || (free < uvmexp.freetarg) ||
> ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
> uvmpd_scan(pma, &constraint);
> }
> Index: uvm/uvmexp.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvmexp.h,v
> diff -u -p -r1.11 uvmexp.h
> --- uvm/uvmexp.h 27 Oct 2023 19:18:53 -0000 1.11
> +++ uvm/uvmexp.h 17 Mar 2024 17:18:47 -0000
> @@ -45,7 +45,9 @@
> * I immutable after creation
> * K kernel lock
> * F uvm_lock_fpageq
> + * L uvm_lock_pageq
> * S uvm_swap_data_lock
> + * p copy of per-CPU counters, used only by userland.
> */
> struct uvmexp {
> /* vm_page constants */
> @@ -56,8 +58,8 @@ struct uvmexp {
> /* vm_page counters */
> int npages; /* [I] number of pages we manage */
> int free; /* [F] number of free pages */
> - int active; /* number of active pages */
> - int inactive; /* number of pages that we free'd but may want back */
> + int active; /* [L] # of active pages */
> + int inactive; /* [L] # of pages that we free'd but may want back */
> int paging; /* number of pages in the process of being paged out */
> int wired; /* number of wired pages */
>
> @@ -69,10 +71,10 @@ struct uvmexp {
> int vtextpages; /* XXX # of pages used by vtext vnodes */
>
> /* pageout params */
> - int freemin; /* min number of free pages */
> - int freetarg; /* target number of free pages */
> + int freemin; /* [I] min number of free pages */
> + int freetarg; /* [I] target number of free pages */
> int inactarg; /* target number of inactive pages */
> - int wiredmax; /* max number of wired pages */
> + int wiredmax; /* [I] max number of wired pages */
> int anonmin; /* min threshold for anon pages */
> int vtextmin; /* min threshold for vtext pages */
> int vnodemin; /* min threshold for vnode pages */
> @@ -91,16 +93,16 @@ struct uvmexp {
> int unused06; /* formerly nfreeanon */
>
> /* stat counters */
> - int faults; /* page fault count */
> + int faults; /* [p] page fault count */
> int traps; /* trap count */
> int intrs; /* interrupt count */
> int swtch; /* context switch count */
> int softs; /* software interrupt count */
> int syscalls; /* system calls */
> - int pageins; /* pagein operation count */
> + int pageins; /* [p] pagein operation count */
> /* pageouts are in pdpageouts below */
> - int unused07; /* formerly obsolete_swapins */
> - int unused08; /* formerly obsolete_swapouts */
> + int unused07; /* formerly obsolete_swapins */
> + int unused08; /* formerly obsolete_swapouts */
> int pgswapin; /* pages swapped in */
> int pgswapout; /* pages swapped out */
> int forks; /* forks */
> @@ -113,28 +115,28 @@ struct uvmexp {
> int unused09; /* formerly zeroaborts */
>
> /* fault subcounters */
> - int fltnoram; /* number of times fault was out of ram */
> - int fltnoanon; /* number of times fault was out of anons */
> - int fltnoamap; /* number of times fault was out of amap chunks */
> - int fltpgwait; /* number of times fault had to wait on a page */
> - int fltpgrele; /* number of times fault found a released page */
> - int fltrelck; /* number of times fault relock called */
> - int fltrelckok; /* number of times fault relock is a success */
> - int fltanget; /* number of times fault gets anon page */
> - int fltanretry; /* number of times fault retrys an anon get */
> - int fltamcopy; /* number of times fault clears "needs copy" */
> - int fltnamap; /* number of times fault maps a neighbor anon page */
> - int fltnomap; /* number of times fault maps a neighbor obj page */
> - int fltlget; /* number of times fault does a locked pgo_get */
> - int fltget; /* number of times fault does an unlocked get */
> - int flt_anon; /* number of times fault anon (case 1a) */
> - int flt_acow; /* number of times fault anon cow (case 1b) */
> - int flt_obj; /* number of times fault is on object page (2a) */
> - int flt_prcopy; /* number of times fault promotes with copy (2b) */
> - int flt_przero; /* number of times fault promotes with zerofill (2b) */
> + int fltnoram; /* [p] # of times fault was out of ram */
> + int fltnoanon; /* [p] # of times fault was out of anons */
> + int fltnoamap; /* [p] # of times fault was out of amap chunks */
> + int fltpgwait; /* [p] # of times fault had to wait on a page */
> + int fltpgrele; /* [p] # of times fault found a released page */
> + int fltrelck; /* [p] # of times fault relock called */
> + int fltrelckok; /* [p] # of times fault relock is a success */
> + int fltanget; /* [p] # of times fault gets anon page */
> + int fltanretry; /* [p] # of times fault retrys an anon get */
> + int fltamcopy; /* [p] # of times fault clears "needs copy" */
> + int fltnamap; /* [p] # of times fault maps a neighbor anon page */
> + int fltnomap; /* [p] # of times fault maps a neighbor obj page */
> + int fltlget; /* [p] # of times fault does a locked pgo_get */
> + int fltget; /* [p] # of times fault does an unlocked get */
> + int flt_anon; /* [p] # of times fault anon (case 1a) */
> + int flt_acow; /* [p] # of times fault anon cow (case 1b) */
> + int flt_obj; /* [p] # of times fault is on object page (2a) */
> + int flt_prcopy; /* [p] # of times fault promotes with copy (2b) */
> + int flt_przero; /* [p] # of times fault promotes with zerofill (2b) */
>
> /* daemon counters */
> - int pdwoke; /* number of times daemon woke up */
> + int pdwoke; /* [F] # of times daemon woke up */
> int pdrevs; /* number of times daemon rev'd clock hand */
> int pdswout; /* number of times daemon called for swapout */
> int pdfreed; /* number of pages daemon freed since boot */
>
uvm_pageout() cleanup