Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: uvm_pageout() cleanup
To:
Martin Pieuchot <mpi@openbsd.org>
Cc:
tech@openbsd.org
Date:
Sat, 23 Mar 2024 13:06:46 +0100

Download raw body.

Thread
> Date: Sat, 23 Mar 2024 12:41:46 +0100
> From: Martin Pieuchot <mpi@openbsd.org>
> 
> On 18/03/24(Mon) 17:51, Martin Pieuchot wrote:
> > It isn't clear to me how concurrent accesses to uvmexp.* variables
> > should be protected and/or dealt with.  The diff below is a step towards
> > documenting and cleaning some of this mess.  It does the following:
> > 
> > - Stop calling uvmpd_tune() inside uvm_pageout().  We currently do not
> >   support adding RAM. `uvmexp.npages' is immutable after boot.  This
> >   helps thinking about the globals it modifies.
> > 
> > - Cleanup uvmpd_tune() and document that `uvmexp.freemin' and
> >   `uvmexp.freetarg' are immutable.
> > 
> > - Try to read `uvmexp.free' as much as possible under `uvm_fpageq_lock'.
> > 
> > - Reduce the scope of the `uvm_pageq_lock' lock.  It serializes accesses
> >   to `uvmexp.active' and `uvmexp.inactive'.
> > 
> > - Document which locks/mechanisms are protecting some fields in struct uvmexp. 
> > 
> > I'm still unhappy about the accesses to `uvmexp.inactive' and
> > `uvmexp.inactarg' inside uvm_pmr_getpages() but I don't want to grab
> > another global mutex there as it is a highly contended code path.  So
> > how to proceed is open to discussion.  However I still think the diff
> > below is a step forward.
> > 
> > ok?
> 
> Anyone?

Sure.  I can't really see us hotplugging physmem.  Except perhaps in
virtual machines.

ok kettenis@

> > Index: uvm/uvm_pdaemon.c
> > ===================================================================
> > RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> > diff -u -p -r1.109 uvm_pdaemon.c
> > --- uvm/uvm_pdaemon.c	27 Oct 2023 19:18:53 -0000	1.109
> > +++ uvm/uvm_pdaemon.c	17 Mar 2024 17:14:54 -0000
> > @@ -165,33 +165,27 @@ uvm_wait(const char *wmsg)
> >  
> >  /*
> >   * uvmpd_tune: tune paging parameters
> > - *
> > - * => called whenever memory is added to (or removed from?) the system
> > - * => caller must call with page queues locked
> >   */
> > -
> >  void
> >  uvmpd_tune(void)
> >  {
> > +	int val;
> >  
> > -	uvmexp.freemin = uvmexp.npages / 30;
> > +	val = uvmexp.npages / 30;
> >  
> > -	/* between 16k and 512k */
> >  	/* XXX:  what are these values good for? */
> > -	uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
> > -#if 0
> > -	uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
> > -#endif
> > +	val = max(val, (16*1024) >> PAGE_SHIFT);
> >  
> >  	/* Make sure there's always a user page free. */
> > -	if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
> > -		uvmexp.freemin = uvmexp.reserve_kernel + 1;
> > -
> > -	uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
> > -	if (uvmexp.freetarg <= uvmexp.freemin)
> > -		uvmexp.freetarg = uvmexp.freemin + 1;
> > -
> > -	/* uvmexp.inactarg: computed in main daemon loop */
> > +	if (val < uvmexp.reserve_kernel + 1)
> > +		val = uvmexp.reserve_kernel + 1;
> > +	uvmexp.freemin = val;
> > +
> > +	/* Calculate free target. */
> > +	val = (uvmexp.freemin * 4) / 3;
> > +	if (val <= uvmexp.freemin)
> > +		val = uvmexp.freemin + 1;
> > +	uvmexp.freetarg = val;
> >  
> >  	uvmexp.wiredmax = uvmexp.npages / 3;
> >  }
> > @@ -211,15 +205,12 @@ uvm_pageout(void *arg)
> >  {
> >  	struct uvm_constraint_range constraint;
> >  	struct uvm_pmalloc *pma;
> > -	int npages = 0;
> > +	int free;
> >  
> >  	/* ensure correct priority and set paging parameters... */
> >  	uvm.pagedaemon_proc = curproc;
> >  	(void) spl0();
> > -	uvm_lock_pageq();
> > -	npages = uvmexp.npages;
> >  	uvmpd_tune();
> > -	uvm_unlock_pageq();
> >  
> >  	for (;;) {
> >  		long size;
> > @@ -245,44 +236,38 @@ uvm_pageout(void *arg)
> >  			} else
> >  				constraint = no_constraint;
> >  		}
> > -
> > +		free = uvmexp.free - BUFPAGES_DEFICIT;
> >  		uvm_unlock_fpageq();
> >  
> >  		/*
> >  		 * now lock page queues and recompute inactive count
> >  		 */
> >  		uvm_lock_pageq();
> > -		if (npages != uvmexp.npages) {	/* check for new pages? */
> > -			npages = uvmexp.npages;
> > -			uvmpd_tune();
> > -		}
> > -
> >  		uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
> >  		if (uvmexp.inactarg <= uvmexp.freetarg) {
> >  			uvmexp.inactarg = uvmexp.freetarg + 1;
> >  		}
> > +		uvm_unlock_pageq();
> >  
> >  		/* Reclaim pages from the buffer cache if possible. */
> >  		size = 0;
> >  		if (pma != NULL)
> >  			size += pma->pm_size >> PAGE_SHIFT;
> > -		if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
> > -			size += uvmexp.freetarg - (uvmexp.free -
> > -			    BUFPAGES_DEFICIT);
> > +		if (free < uvmexp.freetarg)
> > +			size += uvmexp.freetarg - free;
> >  		if (size == 0)
> >  			size = 16; /* XXX */
> > -		uvm_unlock_pageq();
> > +
> >  		(void) bufbackoff(&constraint, size * 2);
> >  #if NDRM > 0
> >  		drmbackoff(size * 2);
> >  #endif
> > -		uvm_lock_pageq();
> > -
> >  		/*
> >  		 * scan if needed
> >  		 */
> > -		if (pma != NULL ||
> > -		    ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
> > +		uvm_lock_pageq();
> > +		free = uvmexp.free - BUFPAGES_DEFICIT;
> > +		if (pma != NULL || (free < uvmexp.freetarg) ||
> >  		    ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
> >  			uvmpd_scan(pma, &constraint);
> >  		}
> > Index: uvm/uvmexp.h
> > ===================================================================
> > RCS file: /cvs/src/sys/uvm/uvmexp.h,v
> > diff -u -p -r1.11 uvmexp.h
> > --- uvm/uvmexp.h	27 Oct 2023 19:18:53 -0000	1.11
> > +++ uvm/uvmexp.h	17 Mar 2024 17:18:47 -0000
> > @@ -45,7 +45,9 @@
> >   *	I	immutable after creation
> >   *	K	kernel lock
> >   *	F	uvm_lock_fpageq
> > + *	L	uvm_lock_pageq
> >   *	S	uvm_swap_data_lock
> > + *	p	copy of per-CPU counters, used only by userland.
> >   */
> >  struct uvmexp {
> >  	/* vm_page constants */
> > @@ -56,8 +58,8 @@ struct uvmexp {
> >  	/* vm_page counters */
> >  	int npages;     /* [I] number of pages we manage */
> >  	int free;       /* [F] number of free pages */
> > -	int active;     /* number of active pages */
> > -	int inactive;   /* number of pages that we free'd but may want back */
> > +	int active;     /* [L] # of active pages */
> > +	int inactive;   /* [L] # of pages that we free'd but may want back */
> >  	int paging;	/* number of pages in the process of being paged out */
> >  	int wired;      /* number of wired pages */
> >  
> > @@ -69,10 +71,10 @@ struct uvmexp {
> >  	int vtextpages;		/* XXX # of pages used by vtext vnodes */
> >  
> >  	/* pageout params */
> > -	int freemin;    /* min number of free pages */
> > -	int freetarg;   /* target number of free pages */
> > +	int freemin;    /* [I] min number of free pages */
> > +	int freetarg;   /* [I] target number of free pages */
> >  	int inactarg;   /* target number of inactive pages */
> > -	int wiredmax;   /* max number of wired pages */
> > +	int wiredmax;   /* [I] max number of wired pages */
> >  	int anonmin;	/* min threshold for anon pages */
> >  	int vtextmin;	/* min threshold for vtext pages */
> >  	int vnodemin;	/* min threshold for vnode pages */
> > @@ -91,16 +93,16 @@ struct uvmexp {
> >  	int unused06;	/* formerly nfreeanon */
> >  
> >  	/* stat counters */
> > -	int faults;		/* page fault count */
> > +	int faults;		/* [p] page fault count */
> >  	int traps;		/* trap count */
> >  	int intrs;		/* interrupt count */
> >  	int swtch;		/* context switch count */
> >  	int softs;		/* software interrupt count */
> >  	int syscalls;		/* system calls */
> > -	int pageins;		/* pagein operation count */
> > +	int pageins;		/* [p] pagein operation count */
> >  				/* pageouts are in pdpageouts below */
> > -	int unused07;		/* formerly obsolete_swapins */
> > -	int unused08;		/* formerly obsolete_swapouts */
> > +	int unused07;           /* formerly obsolete_swapins */
> > +	int unused08;           /* formerly obsolete_swapouts */
> >  	int pgswapin;		/* pages swapped in */
> >  	int pgswapout;		/* pages swapped out */
> >  	int forks;  		/* forks */
> > @@ -113,28 +115,28 @@ struct uvmexp {
> >  	int unused09;		/* formerly zeroaborts */
> >  
> >  	/* fault subcounters */
> > -	int fltnoram;	/* number of times fault was out of ram */
> > -	int fltnoanon;	/* number of times fault was out of anons */
> > -	int fltnoamap;	/* number of times fault was out of amap chunks */
> > -	int fltpgwait;	/* number of times fault had to wait on a page */
> > -	int fltpgrele;	/* number of times fault found a released page */
> > -	int fltrelck;	/* number of times fault relock called */
> > -	int fltrelckok;	/* number of times fault relock is a success */
> > -	int fltanget;	/* number of times fault gets anon page */
> > -	int fltanretry;	/* number of times fault retrys an anon get */
> > -	int fltamcopy;	/* number of times fault clears "needs copy" */
> > -	int fltnamap;	/* number of times fault maps a neighbor anon page */
> > -	int fltnomap;	/* number of times fault maps a neighbor obj page */
> > -	int fltlget;	/* number of times fault does a locked pgo_get */
> > -	int fltget;	/* number of times fault does an unlocked get */
> > -	int flt_anon;	/* number of times fault anon (case 1a) */
> > -	int flt_acow;	/* number of times fault anon cow (case 1b) */
> > -	int flt_obj;	/* number of times fault is on object page (2a) */
> > -	int flt_prcopy;	/* number of times fault promotes with copy (2b) */
> > -	int flt_przero;	/* number of times fault promotes with zerofill (2b) */
> > +	int fltnoram;	/* [p] # of times fault was out of ram */
> > +	int fltnoanon;	/* [p] # of times fault was out of anons */
> > +	int fltnoamap;	/* [p] # of times fault was out of amap chunks */
> > +	int fltpgwait;	/* [p] # of times fault had to wait on a page */
> > +	int fltpgrele;	/* [p] # of times fault found a released page */
> > +	int fltrelck;	/* [p] # of times fault relock called */
> > +	int fltrelckok;	/* [p] # of times fault relock is a success */
> > +	int fltanget;	/* [p] # of times fault gets anon page */
> > +	int fltanretry;	/* [p] # of times fault retrys an anon get */
> > +	int fltamcopy;	/* [p] # of times fault clears "needs copy" */
> > +	int fltnamap;	/* [p] # of times fault maps a neighbor anon page */
> > +	int fltnomap;	/* [p] # of times fault maps a neighbor obj page */
> > +	int fltlget;	/* [p] # of times fault does a locked pgo_get */
> > +	int fltget;	/* [p] # of times fault does an unlocked get */
> > +	int flt_anon;	/* [p] # of times fault anon (case 1a) */
> > +	int flt_acow;	/* [p] # of times fault anon cow (case 1b) */
> > +	int flt_obj;	/* [p] # of times fault is on object page (2a) */
> > +	int flt_prcopy;	/* [p] # of times fault promotes with copy (2b) */
> > +	int flt_przero;	/* [p] # of times fault promotes with zerofill (2b) */
> >  
> >  	/* daemon counters */
> > -	int pdwoke;	/* number of times daemon woke up */
> > +	int pdwoke;	/* [F] # of times daemon woke up */
> >  	int pdrevs;	/* number of times daemon rev'd clock hand */
> >  	int pdswout;	/* number of times daemon called for swapout */
> >  	int pdfreed;	/* number of pages daemon freed since boot */
> > 
> 
>