Download raw body.
pgo_get() vs PG_BUSY
On 16/12/24(Mon) 21:07, Mark Kettenis wrote:
> > Date: Mon, 9 Dec 2024 10:49:56 +0100
> > From: Martin Pieuchot <mpi@grenadille.net>
> >
> > On 07/12/24(Sat) 13:11, Mark Kettenis wrote:
> > > > Date: Tue, 3 Dec 2024 09:46:20 +0100
> > > > From: Martin Pieuchot <mpi@grenadille.net>
> > > >
> > > > Diff below changes the behavior of the pgo_get() interface with PGO_LOCKED
> > > > and how the page currently fetched via uvm_fault_lower_io() is released.
> > > >
> > > > The pgo_get() w/ PGO_LOCKED implementations are always called with
> > > > `uobj->vmobjlock' locked and the lock is never released while manipulating
> > > > the pages. So there is not need to set the PG_BUSY flag on the pages.
> > > > This is what the uao_get() and uvn_get() chunks are about.
> > > >
> > > > The chunks in uvm_fault_lower_lookup() are simplifications related to
> > > > the fact that none of the pgo_get() set the PG_BUSY flags.
> > > >
> > > > The rest of the diff deals with `uobjpage', the physical page fetched
> > > > with pgo_get() w/ PGO_SYNCIO. Instead of releasing the pages in
> > > > multiple places release it as soon as we've got the associated lock in
> > > > uvm_fault_lower_io(). The reasoning is the same as above.
> > > >
> > > > This diff is the complicated part of the current parallelisation I'm
> > > > working on, so do not hesitate if you have any question.
> > > >
> > > > ok?
> > >
> > > This seems to leave behind a comment that states that pgo_get() will
> > > set the PG_BUSY flag in uvm_fault_lower_lookup():
> > >
> > > /*
> > > * if center page is resident and not
> > > * PG_BUSY, then pgo_get made it PG_BUSY
> > > * for us and gave us a handle to it.
> > > * remember this page as "uobjpage."
> > > * (for later use).
> > > */
> > >
> > > That needs to be fixed.
> >
> > Indeed, updated diff below with fixed comments.
>
> Great!
>
> > > [...]
> > > > @@ -1324,21 +1323,6 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
> > > > * out of memory resources?
> > > > */
> > > > if (anon == NULL || pg == NULL) {
> > > > - /*
> > > > - * arg! must unbusy our page and fail or sleep.
> > > > - */
> > > > - if (uobjpage != PGO_DONTCARE) {
> > > > - uvm_lock_pageq();
> > > > - uvm_pageactivate(uobjpage);
> > > > - uvm_unlock_pageq();
> > >
> > > Shouldn't we still activate the page in this case?
> >
> > Yes we should. We do it in uvm_fault_lower_io().
>
> Hmm, but if we got the page through uvm_fault_lower_lookup() it
> doesn't happen since uvm_fault_lower_lookup() does an early continue
> for the center page. Or is that a corner cadse we don't really care
> about?
I can't say if we don't really care or not. This activate disappeared
in NetBSD and I'd rather keep it for the moment. Thanks for spotting
it.
Updated diff below.
Index: uvm/uvm_aobj.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
diff -u -p -r1.111 uvm_aobj.c
--- uvm/uvm_aobj.c 27 Nov 2024 10:41:38 -0000 1.111
+++ uvm/uvm_aobj.c 17 Dec 2024 09:04:52 -0000
@@ -1038,8 +1038,6 @@ uao_get(struct uvm_object *uobj, voff_t
/*
* useful page: plug it in our result array
*/
- atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
- UVM_PAGE_OWN(ptmp, "uao_get1");
pps[lcv] = ptmp;
gotpages++;
}
Index: uvm/uvm_fault.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
diff -u -p -r1.153 uvm_fault.c
--- uvm/uvm_fault.c 15 Dec 2024 11:02:59 -0000 1.153
+++ uvm/uvm_fault.c 17 Dec 2024 09:31:57 -0000
@@ -1140,14 +1140,13 @@ uvm_fault_lower_lookup(
pages[lcv] == PGO_DONTCARE)
continue;
+ KASSERT((pages[lcv]->pg_flags & PG_BUSY) == 0);
KASSERT((pages[lcv]->pg_flags & PG_RELEASED) == 0);
/*
- * if center page is resident and not
- * PG_BUSY, then pgo_get made it PG_BUSY
- * for us and gave us a handle to it.
- * remember this page as "uobjpage."
- * (for later use).
+ * if center page is resident and not PG_BUSY, then pgo_get
+ * gave us a handle to it.
+ * remember this page as "uobjpage." (for later use).
*/
if (lcv == flt->centeridx) {
uobjpage = pages[lcv];
@@ -1155,8 +1154,13 @@ uvm_fault_lower_lookup(
}
if (pmap_extract(ufi->orig_map->pmap, currva, &pa))
- goto next;
+ continue;
+ /*
+ * calling pgo_get with PGO_LOCKED returns us pages which
+ * are neither busy nor released, so we don't need to check
+ * for this. we can just directly enter the pages.
+ */
if (pages[lcv]->wire_count == 0) {
uvm_lock_pageq();
uvm_pageactivate(pages[lcv]);
@@ -1168,24 +1172,17 @@ uvm_fault_lower_lookup(
KASSERT(flt->wired == FALSE);
/*
- * Since this page isn't the page that's
- * actually faulting, ignore pmap_enter()
- * failures; it's not critical that we
+ * Since this page isn't the page that's actually faulting,
+ * ignore pmap_enter() failures; it's not critical that we
* enter these right now.
+ * NOTE: page can't be PG_WANTED or PG_RELEASED because we've
+ * held the lock the whole time we've had the handle.
*/
(void) pmap_enter(ufi->orig_map->pmap, currva,
VM_PAGE_TO_PHYS(pages[lcv]) | flt->pa_flags,
flt->enter_prot & MASK(ufi->entry), PMAP_CANFAIL);
entered++;
- /*
- * NOTE: page can't be PG_WANTED because
- * we've held the lock the whole time
- * we've had the handle.
- */
-next:
- atomic_clearbits_int(&pages[lcv]->pg_flags, PG_BUSY);
- UVM_PAGE_OWN(pages[lcv], NULL);
}
if (entered > 0)
pmap_update(ufi->orig_map->pmap);
@@ -1273,6 +1270,11 @@ uvm_fault_lower(struct uvm_faultinfo *uf
if (uobjpage) {
/* update rusage counters */
curproc->p_ru.ru_minflt++;
+ if (uobjpage != PGO_DONTCARE) {
+ uvm_lock_pageq();
+ uvm_pageactivate(uobjpage);
+ uvm_unlock_pageq();
+ }
} else {
error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage);
if (error != 0)
@@ -1330,21 +1332,6 @@ uvm_fault_lower(struct uvm_faultinfo *uf
* out of memory resources?
*/
if (anon == NULL || pg == NULL) {
- /*
- * arg! must unbusy our page and fail or sleep.
- */
- if (uobjpage != PGO_DONTCARE) {
- uvm_lock_pageq();
- uvm_pageactivate(uobjpage);
- uvm_unlock_pageq();
-
- if (uobjpage->pg_flags & PG_WANTED)
- wakeup(uobjpage);
- atomic_clearbits_int(&uobjpage->pg_flags,
- PG_BUSY|PG_WANTED);
- UVM_PAGE_OWN(uobjpage, NULL);
- }
-
/* unlock and fail ... */
uvmfault_unlockall(ufi, amap, uobj);
if (anon == NULL)
@@ -1395,16 +1382,6 @@ uvm_fault_lower(struct uvm_faultinfo *uf
pmap_page_protect(uobjpage, PROT_NONE);
}
#endif
-
- /* dispose of uobjpage. drop handle to uobj as well. */
- if (uobjpage->pg_flags & PG_WANTED)
- wakeup(uobjpage);
- atomic_clearbits_int(&uobjpage->pg_flags,
- PG_BUSY|PG_WANTED);
- UVM_PAGE_OWN(uobjpage, NULL);
- uvm_lock_pageq();
- uvm_pageactivate(uobjpage);
- uvm_unlock_pageq();
/* done with copied uobjpage. */
rw_exit(uobj->vmobjlock);
uobj = NULL;
@@ -1586,15 +1563,13 @@ uvm_fault_lower_io(
locked = FALSE;
}
- /* didn't get the lock? release the page and retry. */
- if (locked == FALSE && pg != PGO_DONTCARE) {
+ /* release the page now, still holding object lock */
+ if (pg != PGO_DONTCARE) {
uvm_lock_pageq();
- /* make sure it is in queues */
uvm_pageactivate(pg);
uvm_unlock_pageq();
if (pg->pg_flags & PG_WANTED)
- /* still holding object lock */
wakeup(pg);
atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
@@ -1607,7 +1582,8 @@ uvm_fault_lower_io(
}
/*
- * we have the data in pg which is PG_BUSY
+ * we have the data in pg. we are holding object lock (so the page
+ * can't be released on us).
*/
*ruobj = uobj;
*ruobjpage = pg;
Index: uvm/uvm_vnode.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
diff -u -p -r1.135 uvm_vnode.c
--- uvm/uvm_vnode.c 27 Nov 2024 10:41:38 -0000 1.135
+++ uvm/uvm_vnode.c 17 Dec 2024 09:04:52 -0000
@@ -992,8 +992,6 @@ uvn_get(struct uvm_object *uobj, voff_t
* useful page: busy it and plug it in our
* result array
*/
- atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
- UVM_PAGE_OWN(ptmp, "uvn_get1");
pps[lcv] = ptmp;
gotpages++;
pgo_get() vs PG_BUSY