Index | Thread | Search

From:
Mark Kettenis <mark.kettenis@xs4all.nl>
Subject:
Re: Push UVM's pageqlock inside uvm_pagefree()
To:
Martin Pieuchot <mpi@grenadille.net>
Cc:
tech@openbsd.org
Date:
Wed, 19 Feb 2025 15:08:27 +0100

Download raw body.

Thread
> Date: Wed, 19 Feb 2025 13:11:09 +0100
> From: Martin Pieuchot <mpi@grenadille.net>
> 
> Here's the first step to reduce the contention on the global pageqlock
> mutex.  The lock is necessary to guarantee the integrity of the global
> LRUs.  So it is only required around uvm_pagedequeue().
> 
> This diff already makes a noticeable difference.  ok?

Possibly not ok.

Is it possible that uvm_pageclean() gets called simulatinously by two
different CPUs for the same page?  I'm not 100% sure that isn't
possible.  But if you can convince me that that isn't possible, there
are still a few issues:

1. Probably should remove the

     => caller must lock page queues if `pg' is managed

   bit from the comment.

2. I think this means that uvmexp.wired-- now happens without holding
   the uvmexp.pageqlock.  So that counter may go off and that in turn
   may cause problems in the pagedaemon.  Maybe we need to use atomic
   ops for it?

3. I think you're removing an optimization in uvn_flush().  That
   function would accumulate cleaned pages and return them to
   pmemrange in one go.  That potentially avoids lots of unlock/relock
   cases.

4. The current code has some cases that do:

	uvm_lock_pageq();
  	pmap_page_protect(pg, PROT_NONE);
  	uvm_pagefree(pg);
	uvm_unlock_pageq();

   I'm wondering if that is trying to stop the case where we remove
   the page from the pmap, but another CPU tries to access the page
   and fault it back in.  So what prevents that from happening once
   you no longer grab the lock?


> 
> Index: uvm/uvm_anon.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_anon.c,v
> diff -u -p -r1.61 uvm_anon.c
> --- uvm/uvm_anon.c	27 Dec 2024 12:04:40 -0000	1.61
> +++ uvm/uvm_anon.c	19 Feb 2025 11:34:20 -0000
> @@ -106,14 +106,10 @@ uvm_anfree_list(struct vm_anon *anon, st
>  			 * clean page, and put it on pglist
>  			 * for later freeing.
>  			 */
> -			uvm_lock_pageq();
>  			uvm_pageclean(pg);
> -			uvm_unlock_pageq();
>  			TAILQ_INSERT_HEAD(pgl, pg, pageq);
>  		} else {
> -			uvm_lock_pageq();	/* lock out pagedaemon */
>  			uvm_pagefree(pg);	/* bye bye */
> -			uvm_unlock_pageq();	/* free the daemon */
>  		}
>  	} else {
>  		if (anon->an_swslot != 0 && anon->an_swslot != SWSLOT_BAD) {
> @@ -249,10 +245,8 @@ uvm_anon_release(struct vm_anon *anon)
>  	KASSERT(pg->uanon == anon);
>  	KASSERT(anon->an_ref == 0);
>  
> -	uvm_lock_pageq();
>  	pmap_page_protect(pg, PROT_NONE);
>  	uvm_pagefree(pg);
> -	uvm_unlock_pageq();
>  	KASSERT(anon->an_page == NULL);
>  	lock = anon->an_lock;
>  	uvm_anon_dropswap(anon);
> Index: uvm/uvm_aobj.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> diff -u -p -r1.115 uvm_aobj.c
> --- uvm/uvm_aobj.c	27 Dec 2024 12:04:40 -0000	1.115
> +++ uvm/uvm_aobj.c	19 Feb 2025 11:34:20 -0000
> @@ -839,9 +839,7 @@ uao_detach(struct uvm_object *uobj)
>  			continue;
>  		}
>  		uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
> -		uvm_lock_pageq();
>  		uvm_pagefree(pg);
> -		uvm_unlock_pageq();
>  	}
>  
>  	/*
> @@ -957,10 +955,7 @@ uao_flush(struct uvm_object *uobj, voff_
>  			 * because we need to update swap accounting anyway.
>  			 */
>  			uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
> -			uvm_lock_pageq();
>  			uvm_pagefree(pg);
> -			uvm_unlock_pageq();
> -
>  			continue;
>  		default:
>  			panic("uao_flush: weird flags");
> @@ -1179,9 +1174,7 @@ uao_get(struct uvm_object *uobj, voff_t 
>  				atomic_clearbits_int(&ptmp->pg_flags,
>  				    PG_WANTED|PG_BUSY);
>  				UVM_PAGE_OWN(ptmp, NULL);
> -				uvm_lock_pageq();
>  				uvm_pagefree(ptmp);
> -				uvm_unlock_pageq();
>  				rw_exit(uobj->vmobjlock);
>  
>  				return rv;
> Index: uvm/uvm_fault.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> diff -u -p -r1.163 uvm_fault.c
> --- uvm/uvm_fault.c	29 Jan 2025 15:22:33 -0000	1.163
> +++ uvm/uvm_fault.c	19 Feb 2025 11:34:20 -0000
> @@ -417,9 +417,7 @@ uvmfault_anonget(struct uvm_faultinfo *u
>  				 * cannot be mapped and thus no need to
>  				 * pmap_page_protect() it.
>  				 */
> -				uvm_lock_pageq();
>  				uvm_pagefree(pg);
> -				uvm_unlock_pageq();
>  
>  				if (locked) {
>  					uvmfault_unlockall(ufi, NULL, NULL);
> Index: uvm/uvm_km.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_km.c,v
> diff -u -p -r1.155 uvm_km.c
> --- uvm/uvm_km.c	1 Nov 2024 20:26:18 -0000	1.155
> +++ uvm/uvm_km.c	19 Feb 2025 11:34:21 -0000
> @@ -270,9 +270,7 @@ uvm_km_pgremove(struct uvm_object *uobj,
>  		slot = uao_dropswap(uobj, curoff >> PAGE_SHIFT);
>  
>  		if (pp != NULL) {
> -			uvm_lock_pageq();
>  			uvm_pagefree(pp);
> -			uvm_unlock_pageq();
>  		} else if (slot != 0) {
>  			swpgonlydelta++;
>  		}
> Index: uvm/uvm_object.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_object.c,v
> diff -u -p -r1.26 uvm_object.c
> --- uvm/uvm_object.c	19 Feb 2025 11:10:54 -0000	1.26
> +++ uvm/uvm_object.c	19 Feb 2025 11:34:21 -0000
> @@ -238,9 +238,7 @@ uvm_obj_free(struct uvm_object *uobj)
>  		 */
>  		atomic_clearbits_int(&pg->pg_flags, PG_TABLED);
>  		pg->uobject = NULL;
> -		uvm_lock_pageq();
>  		uvm_pageclean(pg);
> -		uvm_unlock_pageq();
>  		TAILQ_INSERT_TAIL(&pgl, pg, pageq);
>   	}
>  	uvm_pglistfree(&pgl);
> Index: uvm/uvm_page.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_page.c,v
> diff -u -p -r1.181 uvm_page.c
> --- uvm/uvm_page.c	19 Feb 2025 11:10:54 -0000	1.181
> +++ uvm/uvm_page.c	19 Feb 2025 11:34:21 -0000
> @@ -955,10 +955,6 @@ uvm_pageclean(struct vm_page *pg)
>  {
>  	u_int flags_to_clear = 0;
>  
> -	if ((pg->pg_flags & (PG_TABLED|PQ_ACTIVE|PQ_INACTIVE)) &&
> -	    (pg->uobject == NULL || !UVM_OBJ_IS_PMAP(pg->uobject)))
> -		MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
> -
>  #ifdef DEBUG
>  	if (pg->uobject == (void *)0xdeadbeef &&
>  	    pg->uanon == (void *)0xdeadbeef) {
> @@ -982,7 +978,11 @@ uvm_pageclean(struct vm_page *pg)
>  	/*
>  	 * now remove the page from the queues
>  	 */
> -	uvm_pagedequeue(pg);
> +	if (pg->pg_flags & (PQ_ACTIVE|PQ_INACTIVE)) {
> +		uvm_lock_pageq();
> +		uvm_pagedequeue(pg);
> +		uvm_unlock_pageq();
> +	}
>  
>  	/*
>  	 * if the page was wired, unwire it now.
> Index: uvm/uvm_pdaemon.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> diff -u -p -r1.134 uvm_pdaemon.c
> --- uvm/uvm_pdaemon.c	25 Jan 2025 08:55:52 -0000	1.134
> +++ uvm/uvm_pdaemon.c	19 Feb 2025 11:34:21 -0000
> @@ -596,6 +596,8 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>  
>  				/* zap all mappings with pmap_page_protect... */
>  				pmap_page_protect(p, PROT_NONE);
> +				/* dequeue first to prevent lock recursion */
> +				uvm_pagedequeue(p);
>  				uvm_pagefree(p);
>  				freed++;
>  
> @@ -853,6 +855,8 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>  				anon = NULL;
>  				uvm_lock_pageq();
>  				nextpg = TAILQ_NEXT(p, pageq);
> +				/* dequeue first to prevent lock recursion */
> +				uvm_pagedequeue(p);
>  				/* free released page */
>  				uvm_pagefree(p);
>  			} else {	/* page was not released during I/O */
> @@ -1055,7 +1059,6 @@ uvmpd_drop(struct pglist *pglst)
>  			struct uvm_object * uobj = p->uobject;
>  
>  			rw_enter(uobj->vmobjlock, RW_WRITE);
> -			uvm_lock_pageq();
>  			/*
>  			 * we now have the page queues locked.
>  			 * the page is not busy.   if the page is clean we
> @@ -1071,7 +1074,6 @@ uvmpd_drop(struct pglist *pglst)
>  				pmap_page_protect(p, PROT_NONE);
>  				uvm_pagefree(p);
>  			}
> -			uvm_unlock_pageq();
>  			rw_exit(uobj->vmobjlock);
>  		}
>  	}
> Index: uvm/uvm_swap.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_swap.c,v
> diff -u -p -r1.173 uvm_swap.c
> --- uvm/uvm_swap.c	7 Nov 2024 09:04:55 -0000	1.173
> +++ uvm/uvm_swap.c	19 Feb 2025 11:34:21 -0000
> @@ -395,10 +395,8 @@ uvm_swap_freepages(struct vm_page **pps,
>  		return;
>  	}
>  
> -	uvm_lock_pageq();
>  	for (i = 0; i < npages; i++)
>  		uvm_pagefree(pps[i]);
> -	uvm_unlock_pageq();
>  
>  }
>  
> Index: uvm/uvm_vnode.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
> diff -u -p -r1.138 uvm_vnode.c
> --- uvm/uvm_vnode.c	27 Dec 2024 12:04:40 -0000	1.138
> +++ uvm/uvm_vnode.c	19 Feb 2025 11:34:21 -0000
> @@ -602,13 +602,11 @@ uvn_flush(struct uvm_object *uobj, voff_
>  	struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
>  	struct vm_page *pp, *ptmp;
>  	struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
> -	struct pglist dead;
>  	int npages, result, lcv;
>  	boolean_t retval, need_iosync, needs_clean;
>  	voff_t curoff;
>  
>  	KASSERT(rw_write_held(uobj->vmobjlock));
> -	TAILQ_INIT(&dead);
>  
>  	/* get init vals and determine how we are going to traverse object */
>  	need_iosync = FALSE;
> @@ -696,9 +694,9 @@ uvn_flush(struct uvm_object *uobj, voff_
>  					continue;
>  				} else {
>  					pmap_page_protect(pp, PROT_NONE);
> -					/* removed page from object */
> -					uvm_pageclean(pp);
> -					TAILQ_INSERT_HEAD(&dead, pp, pageq);
> +					/* dequeue to prevent lock recursion */
> +					uvm_pagedequeue(pp);
> +					uvm_pagefree(pp);
>  				}
>  			}
>  			continue;
> @@ -830,8 +828,9 @@ ReTry:
>  					retval = FALSE;
>  				}
>  				pmap_page_protect(ptmp, PROT_NONE);
> -				uvm_pageclean(ptmp);
> -				TAILQ_INSERT_TAIL(&dead, ptmp, pageq);
> +				/* dequeue first to prevent lock recursion */
> +				uvm_pagedequeue(ptmp);
> +				uvm_pagefree(ptmp);
>  			}
>  
>  		}		/* end of "lcv" for loop */
> @@ -852,8 +851,6 @@ ReTry:
>  			wakeup(&uvn->u_flags);
>  		uvn->u_flags &= ~(UVM_VNODE_IOSYNC|UVM_VNODE_IOSYNCWANTED);
>  	}
> -
> -	uvm_pglistfree(&dead);
>  
>  	return retval;
>  }
> 
> 
>