From: Martin Pieuchot Subject: Re: pgo_get() vs PG_BUSY To: Mark Kettenis Cc: tech@openbsd.org, tb@openbsd.org Date: Tue, 17 Dec 2024 10:35:36 +0100 On 16/12/24(Mon) 21:07, Mark Kettenis wrote: > > Date: Mon, 9 Dec 2024 10:49:56 +0100 > > From: Martin Pieuchot > > > > On 07/12/24(Sat) 13:11, Mark Kettenis wrote: > > > > Date: Tue, 3 Dec 2024 09:46:20 +0100 > > > > From: Martin Pieuchot > > > > > > > > Diff below changes the behavior of the pgo_get() interface with PGO_LOCKED > > > > and how the page currently fetched via uvm_fault_lower_io() is released. > > > > > > > > The pgo_get() w/ PGO_LOCKED implementations are always called with > > > > `uobj->vmobjlock' locked and the lock is never released while manipulating > > > > the pages. So there is not need to set the PG_BUSY flag on the pages. > > > > This is what the uao_get() and uvn_get() chunks are about. > > > > > > > > The chunks in uvm_fault_lower_lookup() are simplifications related to > > > > the fact that none of the pgo_get() set the PG_BUSY flags. > > > > > > > > The rest of the diff deals with `uobjpage', the physical page fetched > > > > with pgo_get() w/ PGO_SYNCIO. Instead of releasing the pages in > > > > multiple places release it as soon as we've got the associated lock in > > > > uvm_fault_lower_io(). The reasoning is the same as above. > > > > > > > > This diff is the complicated part of the current parallelisation I'm > > > > working on, so do not hesitate if you have any question. > > > > > > > > ok? > > > > > > This seems to leave behind a comment that states that pgo_get() will > > > set the PG_BUSY flag in uvm_fault_lower_lookup(): > > > > > > /* > > > * if center page is resident and not > > > * PG_BUSY, then pgo_get made it PG_BUSY > > > * for us and gave us a handle to it. > > > * remember this page as "uobjpage." > > > * (for later use). > > > */ > > > > > > That needs to be fixed. > > > > Indeed, updated diff below with fixed comments. > > Great! > > > > [...] > > > > @@ -1324,21 +1323,6 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, > > > > * out of memory resources? > > > > */ > > > > if (anon == NULL || pg == NULL) { > > > > - /* > > > > - * arg! must unbusy our page and fail or sleep. > > > > - */ > > > > - if (uobjpage != PGO_DONTCARE) { > > > > - uvm_lock_pageq(); > > > > - uvm_pageactivate(uobjpage); > > > > - uvm_unlock_pageq(); > > > > > > Shouldn't we still activate the page in this case? > > > > Yes we should. We do it in uvm_fault_lower_io(). > > Hmm, but if we got the page through uvm_fault_lower_lookup() it > doesn't happen since uvm_fault_lower_lookup() does an early continue > for the center page. Or is that a corner cadse we don't really care > about? I can't say if we don't really care or not. This activate disappeared in NetBSD and I'd rather keep it for the moment. Thanks for spotting it. Updated diff below. Index: uvm/uvm_aobj.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v diff -u -p -r1.111 uvm_aobj.c --- uvm/uvm_aobj.c 27 Nov 2024 10:41:38 -0000 1.111 +++ uvm/uvm_aobj.c 17 Dec 2024 09:04:52 -0000 @@ -1038,8 +1038,6 @@ uao_get(struct uvm_object *uobj, voff_t /* * useful page: plug it in our result array */ - atomic_setbits_int(&ptmp->pg_flags, PG_BUSY); - UVM_PAGE_OWN(ptmp, "uao_get1"); pps[lcv] = ptmp; gotpages++; } Index: uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v diff -u -p -r1.153 uvm_fault.c --- uvm/uvm_fault.c 15 Dec 2024 11:02:59 -0000 1.153 +++ uvm/uvm_fault.c 17 Dec 2024 09:31:57 -0000 @@ -1140,14 +1140,13 @@ uvm_fault_lower_lookup( pages[lcv] == PGO_DONTCARE) continue; + KASSERT((pages[lcv]->pg_flags & PG_BUSY) == 0); KASSERT((pages[lcv]->pg_flags & PG_RELEASED) == 0); /* - * if center page is resident and not - * PG_BUSY, then pgo_get made it PG_BUSY - * for us and gave us a handle to it. - * remember this page as "uobjpage." - * (for later use). + * if center page is resident and not PG_BUSY, then pgo_get + * gave us a handle to it. + * remember this page as "uobjpage." (for later use). */ if (lcv == flt->centeridx) { uobjpage = pages[lcv]; @@ -1155,8 +1154,13 @@ uvm_fault_lower_lookup( } if (pmap_extract(ufi->orig_map->pmap, currva, &pa)) - goto next; + continue; + /* + * calling pgo_get with PGO_LOCKED returns us pages which + * are neither busy nor released, so we don't need to check + * for this. we can just directly enter the pages. + */ if (pages[lcv]->wire_count == 0) { uvm_lock_pageq(); uvm_pageactivate(pages[lcv]); @@ -1168,24 +1172,17 @@ uvm_fault_lower_lookup( KASSERT(flt->wired == FALSE); /* - * Since this page isn't the page that's - * actually faulting, ignore pmap_enter() - * failures; it's not critical that we + * Since this page isn't the page that's actually faulting, + * ignore pmap_enter() failures; it's not critical that we * enter these right now. + * NOTE: page can't be PG_WANTED or PG_RELEASED because we've + * held the lock the whole time we've had the handle. */ (void) pmap_enter(ufi->orig_map->pmap, currva, VM_PAGE_TO_PHYS(pages[lcv]) | flt->pa_flags, flt->enter_prot & MASK(ufi->entry), PMAP_CANFAIL); entered++; - /* - * NOTE: page can't be PG_WANTED because - * we've held the lock the whole time - * we've had the handle. - */ -next: - atomic_clearbits_int(&pages[lcv]->pg_flags, PG_BUSY); - UVM_PAGE_OWN(pages[lcv], NULL); } if (entered > 0) pmap_update(ufi->orig_map->pmap); @@ -1273,6 +1270,11 @@ uvm_fault_lower(struct uvm_faultinfo *uf if (uobjpage) { /* update rusage counters */ curproc->p_ru.ru_minflt++; + if (uobjpage != PGO_DONTCARE) { + uvm_lock_pageq(); + uvm_pageactivate(uobjpage); + uvm_unlock_pageq(); + } } else { error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage); if (error != 0) @@ -1330,21 +1332,6 @@ uvm_fault_lower(struct uvm_faultinfo *uf * out of memory resources? */ if (anon == NULL || pg == NULL) { - /* - * arg! must unbusy our page and fail or sleep. - */ - if (uobjpage != PGO_DONTCARE) { - uvm_lock_pageq(); - uvm_pageactivate(uobjpage); - uvm_unlock_pageq(); - - if (uobjpage->pg_flags & PG_WANTED) - wakeup(uobjpage); - atomic_clearbits_int(&uobjpage->pg_flags, - PG_BUSY|PG_WANTED); - UVM_PAGE_OWN(uobjpage, NULL); - } - /* unlock and fail ... */ uvmfault_unlockall(ufi, amap, uobj); if (anon == NULL) @@ -1395,16 +1382,6 @@ uvm_fault_lower(struct uvm_faultinfo *uf pmap_page_protect(uobjpage, PROT_NONE); } #endif - - /* dispose of uobjpage. drop handle to uobj as well. */ - if (uobjpage->pg_flags & PG_WANTED) - wakeup(uobjpage); - atomic_clearbits_int(&uobjpage->pg_flags, - PG_BUSY|PG_WANTED); - UVM_PAGE_OWN(uobjpage, NULL); - uvm_lock_pageq(); - uvm_pageactivate(uobjpage); - uvm_unlock_pageq(); /* done with copied uobjpage. */ rw_exit(uobj->vmobjlock); uobj = NULL; @@ -1586,15 +1563,13 @@ uvm_fault_lower_io( locked = FALSE; } - /* didn't get the lock? release the page and retry. */ - if (locked == FALSE && pg != PGO_DONTCARE) { + /* release the page now, still holding object lock */ + if (pg != PGO_DONTCARE) { uvm_lock_pageq(); - /* make sure it is in queues */ uvm_pageactivate(pg); uvm_unlock_pageq(); if (pg->pg_flags & PG_WANTED) - /* still holding object lock */ wakeup(pg); atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_WANTED); UVM_PAGE_OWN(pg, NULL); @@ -1607,7 +1582,8 @@ uvm_fault_lower_io( } /* - * we have the data in pg which is PG_BUSY + * we have the data in pg. we are holding object lock (so the page + * can't be released on us). */ *ruobj = uobj; *ruobjpage = pg; Index: uvm/uvm_vnode.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v diff -u -p -r1.135 uvm_vnode.c --- uvm/uvm_vnode.c 27 Nov 2024 10:41:38 -0000 1.135 +++ uvm/uvm_vnode.c 17 Dec 2024 09:04:52 -0000 @@ -992,8 +992,6 @@ uvn_get(struct uvm_object *uobj, voff_t * useful page: busy it and plug it in our * result array */ - atomic_setbits_int(&ptmp->pg_flags, PG_BUSY); - UVM_PAGE_OWN(ptmp, "uvn_get1"); pps[lcv] = ptmp; gotpages++;