From: Mark Kettenis <mark.kettenis@xs4all.nl>
Subject: Re: pgo_get() vs PG_BUSY
To: Martin Pieuchot <mpi@grenadille.net>
Cc: tech@openbsd.org, tb@openbsd.org
Date: Sat, 07 Dec 2024 13:11:28 +0100

> Date: Tue, 3 Dec 2024 09:46:20 +0100
> From: Martin Pieuchot <mpi@grenadille.net>
> 
> Diff below changes the behavior of the pgo_get() interface with PGO_LOCKED
> and how the page currently fetched via uvm_fault_lower_io() is released.
> 
> The pgo_get() w/ PGO_LOCKED implementations are always called with
> `uobj->vmobjlock' locked and the lock is never released while manipulating
> the pages.  So there is not need to set the PG_BUSY flag on the pages.
> This is what the uao_get() and uvn_get() chunks are about.
> 
> The chunks in uvm_fault_lower_lookup() are simplifications related to
> the fact that none of the pgo_get() set the PG_BUSY flags.
> 
> The rest of the diff deals with `uobjpage', the physical page fetched
> with pgo_get() w/ PGO_SYNCIO.  Instead of releasing the pages in
> multiple places release it as soon as we've got the associated lock in
> uvm_fault_lower_io().  The reasoning is the same as above.
> 
> This diff is the complicated part of the current parallelisation I'm
> working on, so do not hesitate if you have any question. 
> 
> ok?

This seems to leave behind a comment that states that pgo_get() will
set the PG_BUSY flag in uvm_fault_lower_lookup():

                /*
                 * if center page is resident and not
                 * PG_BUSY, then pgo_get made it PG_BUSY
                 * for us and gave us a handle to it.
                 * remember this page as "uobjpage."
                 * (for later use).
                 */

That needs to be fixed.

> diff --git sys/uvm/uvm_aobj.c sys/uvm/uvm_aobj.c
> index 6ff2abee482..8176f22a6a4 100644
> --- sys/uvm/uvm_aobj.c
> +++ sys/uvm/uvm_aobj.c
> @@ -1038,8 +1038,6 @@ uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
>  			/*
>  			 * useful page: plug it in our result array
>  			 */
> -			atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
> -			UVM_PAGE_OWN(ptmp, "uao_get1");
>  			pps[lcv] = ptmp;
>  			gotpages++;
>  		}
> diff --git sys/uvm/uvm_fault.c sys/uvm/uvm_fault.c
> index b605252407b..e5e1bffe5cc 100644
> --- sys/uvm/uvm_fault.c
> +++ sys/uvm/uvm_fault.c
> @@ -1134,6 +1134,7 @@ uvm_fault_lower_lookup(
>  		    pages[lcv] == PGO_DONTCARE)
>  			continue;
>  
> +		KASSERT((pages[lcv]->pg_flags & PG_BUSY) == 0);
>  		KASSERT((pages[lcv]->pg_flags & PG_RELEASED) == 0);
>  
>  		/*
> @@ -1149,8 +1150,13 @@ uvm_fault_lower_lookup(
>  		}
>  
>  		if (pmap_extract(ufi->orig_map->pmap, currva, &pa))
> -			goto next;
> +			continue;
>  
> +		/*
> +		 * calling pgo_get with PGO_LOCKED returns us pages which
> +		 * are neither busy nor released, so we don't need to check
> +		 * for this.  we can just directly enter the pages.
> +		 */
>  		if (pages[lcv]->wire_count == 0) {
>  			uvm_lock_pageq();
>  			uvm_pageactivate(pages[lcv]);
> @@ -1162,24 +1168,17 @@ uvm_fault_lower_lookup(
>  		KASSERT(flt->wired == FALSE);
>  
>  		/*
> -		 * Since this page isn't the page that's
> -		 * actually faulting, ignore pmap_enter()
> -		 * failures; it's not critical that we
> +		 * Since this page isn't the page that's actually faulting,
> +		 * ignore pmap_enter() failures; it's not critical that we
>  		 * enter these right now.
> +		 * NOTE: page can't be PG_WANTED or PG_RELEASED because we've
> +		 * held the lock the whole time we've had the handle.
>  		 */
>  		(void) pmap_enter(ufi->orig_map->pmap, currva,
>  		    VM_PAGE_TO_PHYS(pages[lcv]) | flt->pa_flags,
>  		    flt->enter_prot & MASK(ufi->entry), PMAP_CANFAIL);
>  		entered++;
>  
> -		/*
> -		 * NOTE: page can't be PG_WANTED because
> -		 * we've held the lock the whole time
> -		 * we've had the handle.
> -		 */
> -next:
> -		atomic_clearbits_int(&pages[lcv]->pg_flags, PG_BUSY);
> -		UVM_PAGE_OWN(pages[lcv], NULL);
>  	}
>  	if (entered > 0)
>  		pmap_update(ufi->orig_map->pmap);
> @@ -1324,21 +1323,6 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
>  		 * out of memory resources?
>  		 */
>  		if (anon == NULL || pg == NULL) {
> -			/*
> -			 * arg!  must unbusy our page and fail or sleep.
> -			 */
> -			if (uobjpage != PGO_DONTCARE) {
> -				uvm_lock_pageq();
> -				uvm_pageactivate(uobjpage);
> -				uvm_unlock_pageq();

Shouldn't we still activate the page in this case?

> -
> -				if (uobjpage->pg_flags & PG_WANTED)
> -					wakeup(uobjpage);
> -				atomic_clearbits_int(&uobjpage->pg_flags,
> -				    PG_BUSY|PG_WANTED);
> -				UVM_PAGE_OWN(uobjpage, NULL);
> -			}
> -
>  			/* unlock and fail ... */
>  			uvmfault_unlockall(ufi, amap, uobj);
>  			if (anon == NULL)
> @@ -1389,16 +1373,6 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
>  				pmap_page_protect(uobjpage, PROT_NONE);
>  			}
>  #endif
> -
> -			/* dispose of uobjpage. drop handle to uobj as well. */
> -			if (uobjpage->pg_flags & PG_WANTED)
> -				wakeup(uobjpage);
> -			atomic_clearbits_int(&uobjpage->pg_flags,
> -			    PG_BUSY|PG_WANTED);
> -			UVM_PAGE_OWN(uobjpage, NULL);
> -			uvm_lock_pageq();
> -			uvm_pageactivate(uobjpage);
> -			uvm_unlock_pageq();

Same here.

>  			/* done with copied uobjpage. */
>  			rw_exit(uobj->vmobjlock);
>  			uobj = NULL;
> @@ -1580,15 +1554,13 @@ uvm_fault_lower_io(
>  		locked = FALSE;
>  	}
>  
> -	/* didn't get the lock?   release the page and retry. */
> -	if (locked == FALSE && pg != PGO_DONTCARE) {
> +	/* release the page now, still holding object lock */
> +	if (pg != PGO_DONTCARE) {
>  		uvm_lock_pageq();
> -		/* make sure it is in queues */
>  		uvm_pageactivate(pg);
>  		uvm_unlock_pageq();
>  
>  		if (pg->pg_flags & PG_WANTED)
> -			/* still holding object lock */
>  			wakeup(pg);
>  		atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_WANTED);
>  		UVM_PAGE_OWN(pg, NULL);

This bit doesn't look right to me.  You're now always clearing the
PG_BUSY flag on the page.  But then below there is a comment that says:

        /*
         * we have the data in pg which is PG_BUSY
         */

That can't be true right?


> diff --git sys/uvm/uvm_vnode.c sys/uvm/uvm_vnode.c
> index a8aaa23cb45..d9fc19f6dcb 100644
> --- sys/uvm/uvm_vnode.c
> +++ sys/uvm/uvm_vnode.c
> @@ -992,8 +992,6 @@ uvn_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
>  			 * useful page: busy it and plug it in our
>  			 * result array
>  			 */
> -			atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
> -			UVM_PAGE_OWN(ptmp, "uvn_get1");
>  			pps[lcv] = ptmp;
>  			gotpages++;
>  
> 
> 
>