Index | Thread | Search

From:
Martin Pieuchot <mpi@openbsd.org>
Subject:
Re: uvm_pageout() cleanup
To:
tech@openbsd.org
Date:
Sat, 23 Mar 2024 12:41:46 +0100

Download raw body.

Thread
On 18/03/24(Mon) 17:51, Martin Pieuchot wrote:
> It isn't clear to me how concurrent accesses to uvmexp.* variables
> should be protected and/or dealt with.  The diff below is a step towards
> documenting and cleaning some of this mess.  It does the following:
> 
> - Stop calling uvmpd_tune() inside uvm_pageout().  We currently do not
>   support adding RAM. `uvmexp.npages' is immutable after boot.  This
>   helps thinking about the globals it modifies.
> 
> - Cleanup uvmpd_tune() and document that `uvmexp.freemin' and
>   `uvmexp.freetarg' are immutable.
> 
> - Try to read `uvmexp.free' as much as possible under `uvm_fpageq_lock'.
> 
> - Reduce the scope of the `uvm_pageq_lock' lock.  It serializes accesses
>   to `uvmexp.active' and `uvmexp.inactive'.
> 
> - Document which locks/mechanisms are protecting some fields in struct uvmexp. 
> 
> I'm still unhappy about the accesses to `uvmexp.inactive' and
> `uvmexp.inactarg' inside uvm_pmr_getpages() but I don't want to grab
> another global mutex there as it is a highly contended code path.  So
> how to proceed is open to discussion.  However I still think the diff
> below is a step forward.
> 
> ok?

Anyone?

> Index: uvm/uvm_pdaemon.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> diff -u -p -r1.109 uvm_pdaemon.c
> --- uvm/uvm_pdaemon.c	27 Oct 2023 19:18:53 -0000	1.109
> +++ uvm/uvm_pdaemon.c	17 Mar 2024 17:14:54 -0000
> @@ -165,33 +165,27 @@ uvm_wait(const char *wmsg)
>  
>  /*
>   * uvmpd_tune: tune paging parameters
> - *
> - * => called whenever memory is added to (or removed from?) the system
> - * => caller must call with page queues locked
>   */
> -
>  void
>  uvmpd_tune(void)
>  {
> +	int val;
>  
> -	uvmexp.freemin = uvmexp.npages / 30;
> +	val = uvmexp.npages / 30;
>  
> -	/* between 16k and 512k */
>  	/* XXX:  what are these values good for? */
> -	uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
> -#if 0
> -	uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
> -#endif
> +	val = max(val, (16*1024) >> PAGE_SHIFT);
>  
>  	/* Make sure there's always a user page free. */
> -	if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
> -		uvmexp.freemin = uvmexp.reserve_kernel + 1;
> -
> -	uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
> -	if (uvmexp.freetarg <= uvmexp.freemin)
> -		uvmexp.freetarg = uvmexp.freemin + 1;
> -
> -	/* uvmexp.inactarg: computed in main daemon loop */
> +	if (val < uvmexp.reserve_kernel + 1)
> +		val = uvmexp.reserve_kernel + 1;
> +	uvmexp.freemin = val;
> +
> +	/* Calculate free target. */
> +	val = (uvmexp.freemin * 4) / 3;
> +	if (val <= uvmexp.freemin)
> +		val = uvmexp.freemin + 1;
> +	uvmexp.freetarg = val;
>  
>  	uvmexp.wiredmax = uvmexp.npages / 3;
>  }
> @@ -211,15 +205,12 @@ uvm_pageout(void *arg)
>  {
>  	struct uvm_constraint_range constraint;
>  	struct uvm_pmalloc *pma;
> -	int npages = 0;
> +	int free;
>  
>  	/* ensure correct priority and set paging parameters... */
>  	uvm.pagedaemon_proc = curproc;
>  	(void) spl0();
> -	uvm_lock_pageq();
> -	npages = uvmexp.npages;
>  	uvmpd_tune();
> -	uvm_unlock_pageq();
>  
>  	for (;;) {
>  		long size;
> @@ -245,44 +236,38 @@ uvm_pageout(void *arg)
>  			} else
>  				constraint = no_constraint;
>  		}
> -
> +		free = uvmexp.free - BUFPAGES_DEFICIT;
>  		uvm_unlock_fpageq();
>  
>  		/*
>  		 * now lock page queues and recompute inactive count
>  		 */
>  		uvm_lock_pageq();
> -		if (npages != uvmexp.npages) {	/* check for new pages? */
> -			npages = uvmexp.npages;
> -			uvmpd_tune();
> -		}
> -
>  		uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
>  		if (uvmexp.inactarg <= uvmexp.freetarg) {
>  			uvmexp.inactarg = uvmexp.freetarg + 1;
>  		}
> +		uvm_unlock_pageq();
>  
>  		/* Reclaim pages from the buffer cache if possible. */
>  		size = 0;
>  		if (pma != NULL)
>  			size += pma->pm_size >> PAGE_SHIFT;
> -		if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
> -			size += uvmexp.freetarg - (uvmexp.free -
> -			    BUFPAGES_DEFICIT);
> +		if (free < uvmexp.freetarg)
> +			size += uvmexp.freetarg - free;
>  		if (size == 0)
>  			size = 16; /* XXX */
> -		uvm_unlock_pageq();
> +
>  		(void) bufbackoff(&constraint, size * 2);
>  #if NDRM > 0
>  		drmbackoff(size * 2);
>  #endif
> -		uvm_lock_pageq();
> -
>  		/*
>  		 * scan if needed
>  		 */
> -		if (pma != NULL ||
> -		    ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
> +		uvm_lock_pageq();
> +		free = uvmexp.free - BUFPAGES_DEFICIT;
> +		if (pma != NULL || (free < uvmexp.freetarg) ||
>  		    ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
>  			uvmpd_scan(pma, &constraint);
>  		}
> Index: uvm/uvmexp.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvmexp.h,v
> diff -u -p -r1.11 uvmexp.h
> --- uvm/uvmexp.h	27 Oct 2023 19:18:53 -0000	1.11
> +++ uvm/uvmexp.h	17 Mar 2024 17:18:47 -0000
> @@ -45,7 +45,9 @@
>   *	I	immutable after creation
>   *	K	kernel lock
>   *	F	uvm_lock_fpageq
> + *	L	uvm_lock_pageq
>   *	S	uvm_swap_data_lock
> + *	p	copy of per-CPU counters, used only by userland.
>   */
>  struct uvmexp {
>  	/* vm_page constants */
> @@ -56,8 +58,8 @@ struct uvmexp {
>  	/* vm_page counters */
>  	int npages;     /* [I] number of pages we manage */
>  	int free;       /* [F] number of free pages */
> -	int active;     /* number of active pages */
> -	int inactive;   /* number of pages that we free'd but may want back */
> +	int active;     /* [L] # of active pages */
> +	int inactive;   /* [L] # of pages that we free'd but may want back */
>  	int paging;	/* number of pages in the process of being paged out */
>  	int wired;      /* number of wired pages */
>  
> @@ -69,10 +71,10 @@ struct uvmexp {
>  	int vtextpages;		/* XXX # of pages used by vtext vnodes */
>  
>  	/* pageout params */
> -	int freemin;    /* min number of free pages */
> -	int freetarg;   /* target number of free pages */
> +	int freemin;    /* [I] min number of free pages */
> +	int freetarg;   /* [I] target number of free pages */
>  	int inactarg;   /* target number of inactive pages */
> -	int wiredmax;   /* max number of wired pages */
> +	int wiredmax;   /* [I] max number of wired pages */
>  	int anonmin;	/* min threshold for anon pages */
>  	int vtextmin;	/* min threshold for vtext pages */
>  	int vnodemin;	/* min threshold for vnode pages */
> @@ -91,16 +93,16 @@ struct uvmexp {
>  	int unused06;	/* formerly nfreeanon */
>  
>  	/* stat counters */
> -	int faults;		/* page fault count */
> +	int faults;		/* [p] page fault count */
>  	int traps;		/* trap count */
>  	int intrs;		/* interrupt count */
>  	int swtch;		/* context switch count */
>  	int softs;		/* software interrupt count */
>  	int syscalls;		/* system calls */
> -	int pageins;		/* pagein operation count */
> +	int pageins;		/* [p] pagein operation count */
>  				/* pageouts are in pdpageouts below */
> -	int unused07;		/* formerly obsolete_swapins */
> -	int unused08;		/* formerly obsolete_swapouts */
> +	int unused07;           /* formerly obsolete_swapins */
> +	int unused08;           /* formerly obsolete_swapouts */
>  	int pgswapin;		/* pages swapped in */
>  	int pgswapout;		/* pages swapped out */
>  	int forks;  		/* forks */
> @@ -113,28 +115,28 @@ struct uvmexp {
>  	int unused09;		/* formerly zeroaborts */
>  
>  	/* fault subcounters */
> -	int fltnoram;	/* number of times fault was out of ram */
> -	int fltnoanon;	/* number of times fault was out of anons */
> -	int fltnoamap;	/* number of times fault was out of amap chunks */
> -	int fltpgwait;	/* number of times fault had to wait on a page */
> -	int fltpgrele;	/* number of times fault found a released page */
> -	int fltrelck;	/* number of times fault relock called */
> -	int fltrelckok;	/* number of times fault relock is a success */
> -	int fltanget;	/* number of times fault gets anon page */
> -	int fltanretry;	/* number of times fault retrys an anon get */
> -	int fltamcopy;	/* number of times fault clears "needs copy" */
> -	int fltnamap;	/* number of times fault maps a neighbor anon page */
> -	int fltnomap;	/* number of times fault maps a neighbor obj page */
> -	int fltlget;	/* number of times fault does a locked pgo_get */
> -	int fltget;	/* number of times fault does an unlocked get */
> -	int flt_anon;	/* number of times fault anon (case 1a) */
> -	int flt_acow;	/* number of times fault anon cow (case 1b) */
> -	int flt_obj;	/* number of times fault is on object page (2a) */
> -	int flt_prcopy;	/* number of times fault promotes with copy (2b) */
> -	int flt_przero;	/* number of times fault promotes with zerofill (2b) */
> +	int fltnoram;	/* [p] # of times fault was out of ram */
> +	int fltnoanon;	/* [p] # of times fault was out of anons */
> +	int fltnoamap;	/* [p] # of times fault was out of amap chunks */
> +	int fltpgwait;	/* [p] # of times fault had to wait on a page */
> +	int fltpgrele;	/* [p] # of times fault found a released page */
> +	int fltrelck;	/* [p] # of times fault relock called */
> +	int fltrelckok;	/* [p] # of times fault relock is a success */
> +	int fltanget;	/* [p] # of times fault gets anon page */
> +	int fltanretry;	/* [p] # of times fault retrys an anon get */
> +	int fltamcopy;	/* [p] # of times fault clears "needs copy" */
> +	int fltnamap;	/* [p] # of times fault maps a neighbor anon page */
> +	int fltnomap;	/* [p] # of times fault maps a neighbor obj page */
> +	int fltlget;	/* [p] # of times fault does a locked pgo_get */
> +	int fltget;	/* [p] # of times fault does an unlocked get */
> +	int flt_anon;	/* [p] # of times fault anon (case 1a) */
> +	int flt_acow;	/* [p] # of times fault anon cow (case 1b) */
> +	int flt_obj;	/* [p] # of times fault is on object page (2a) */
> +	int flt_prcopy;	/* [p] # of times fault promotes with copy (2b) */
> +	int flt_przero;	/* [p] # of times fault promotes with zerofill (2b) */
>  
>  	/* daemon counters */
> -	int pdwoke;	/* number of times daemon woke up */
> +	int pdwoke;	/* [F] # of times daemon woke up */
>  	int pdrevs;	/* number of times daemon rev'd clock hand */
>  	int pdswout;	/* number of times daemon called for swapout */
>  	int pdfreed;	/* number of pages daemon freed since boot */
>