Download raw body.
Make pdaemon understandable
On 22/12/25(Mon) 20:39, Theo Buehler wrote:
> On Mon, Dec 22, 2025 at 12:11:47PM +0100, Martin Pieuchot wrote:
> > Diff below has been generated with "-b" to be easily reviewable.
> > Attached is the one I intend to commit which mostly includes indent
> > changes.
> >
> > It moves the final pageout of swap cluster outside of the loop. This
> > allows to remove the NULL checks and make the body of the loop readable.
> >
> > I intentionally didn't change anything *inside* the loop even if we now
> > see incoherency.
> >
>
> This reads mostly fine. One thing:
>
> > @@ -633,14 +633,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
> >
> > /* Insert iterator. */
> > TAILQ_INSERT_AFTER(pglst, p, &iter, pageq);
> > - for (; p != NULL || swc.swc_slot != 0; p = uvmpd_iterator(pglst, p, &iter)) {
> > - /*
> > - * note that p can be NULL iff we have traversed the whole
> > - * list and need to do one final swap-backed clustered pageout.
> > - */
> > - uobj = NULL;
> > - anon = NULL;
> > - if (p) {
> > + while ((p = uvmpd_iterator(pglst, p, &iter)) != NULL) {
>
> I'm confused by this change. I would have expected a
>
> do { ... } while ((p = uvmpd_iterator(pglst, p, &iter)) != NULL);
>
> loop. The first time around in the for loop things are applied to p
> itself, not to nextpg as returned by the iterator.
Thanks a lot. You're right, otherwise we skip the first page. Here's
an updated version of the diff that keeps the for () loop for now.
ok?
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
diff -u -p -b -r1.143 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c 18 Dec 2025 16:50:42 -0000 1.143
+++ uvm/uvm_pdaemon.c 23 Dec 2025 11:49:53 -0000
@@ -484,7 +484,7 @@ swapcluster_flush(struct swapcluster *sw
int result;
if (swc->swc_slot == 0)
- return 0; // XXX
+ return 0;
KASSERT(swc->swc_nused <= swc->swc_nallocated);
slot = swc->swc_slot;
@@ -633,14 +633,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
/* Insert iterator. */
TAILQ_INSERT_AFTER(pglst, p, &iter, pageq);
- for (; p != NULL || swc.swc_slot != 0; p = uvmpd_iterator(pglst, p, &iter)) {
- /*
- * note that p can be NULL iff we have traversed the whole
- * list and need to do one final swap-backed clustered pageout.
- */
- uobj = NULL;
- anon = NULL;
- if (p) {
+ for (; p != NULL; p = uvmpd_iterator(pglst, p, &iter)) {
/*
* see if we've met our target
*/
@@ -648,16 +641,8 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
(uvmexp.paging + swapcluster_nused(&swc)
>= (shortage - freed))) ||
dirtyreacts == UVMPD_NUMDIRTYREACTS) {
- if (swc.swc_slot == 0) {
- /* exit now if no swap-i/o pending */
break;
}
-
- /* set p to null to signal final swap i/o */
- p = NULL;
- }
- }
- if (p) { /* if (we have a new page to consider) */
/*
* we are below target and have a new page to consider.
*/
@@ -792,7 +777,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
* of the page so that no one touches it while it is
* in I/O.
*/
-
swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
atomic_setbits_int(&p->pg_flags, PG_BUSY);
UVM_PAGE_OWN(p, "scan_inactive");
@@ -809,8 +793,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
/* start new cluster (if necessary) */
if (swapcluster_allocslots(&swc)) {
- atomic_clearbits_int(&p->pg_flags,
- PG_BUSY);
+ atomic_clearbits_int(&p->pg_flags, PG_BUSY);
UVM_PAGE_OWN(p, NULL);
dirtyreacts++;
uvm_unlock_pageq();
@@ -822,8 +805,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
/* add block to cluster */
if (swapcluster_add(&swc, p)) {
- atomic_clearbits_int(&p->pg_flags,
- PG_BUSY);
+ atomic_clearbits_int(&p->pg_flags, PG_BUSY);
UVM_PAGE_OWN(p, NULL);
dirtyreacts++;
uvm_unlock_pageq();
@@ -838,10 +820,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
if (swc.swc_nused < swc.swc_nallocated)
continue;
}
- } else {
- /* if p == NULL we must be doing a last swap i/o */
- swap_backed = TRUE;
- }
/*
* now consider doing the pageout.
@@ -884,6 +862,18 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
}
}
TAILQ_REMOVE(pglst, &iter, pageq);
+
+ /* final swap-backed clustered pageout */
+ if (swc.swc_slot > 0) {
+ uvm_unlock_pageq();
+ npages = swc.swc_nused;
+ result = swapcluster_flush(&swc);
+ uvm_lock_pageq();
+ if (result == VM_PAGER_PEND) {
+ atomic_add_int(&uvmexp.paging, npages);
+ uvmexp.pdpending++;
+ }
+ }
return freed;
}
Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
diff -u -p -r1.143 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c 18 Dec 2025 16:50:42 -0000 1.143
+++ uvm/uvm_pdaemon.c 23 Dec 2025 11:49:53 -0000
@@ -484,7 +484,7 @@ swapcluster_flush(struct swapcluster *sw
int result;
if (swc->swc_slot == 0)
- return 0; // XXX
+ return 0;
KASSERT(swc->swc_nused <= swc->swc_nallocated);
slot = swc->swc_slot;
@@ -633,141 +633,180 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
/* Insert iterator. */
TAILQ_INSERT_AFTER(pglst, p, &iter, pageq);
- for (; p != NULL || swc.swc_slot != 0; p = uvmpd_iterator(pglst, p, &iter)) {
+ for (; p != NULL; p = uvmpd_iterator(pglst, p, &iter)) {
/*
- * note that p can be NULL iff we have traversed the whole
- * list and need to do one final swap-backed clustered pageout.
+ * see if we've met our target
*/
- uobj = NULL;
- anon = NULL;
- if (p) {
- /*
- * see if we've met our target
- */
- if ((uvmpd_pma_done(pma) &&
- (uvmexp.paging + swapcluster_nused(&swc)
- >= (shortage - freed))) ||
- dirtyreacts == UVMPD_NUMDIRTYREACTS) {
- if (swc.swc_slot == 0) {
- /* exit now if no swap-i/o pending */
- break;
- }
+ if ((uvmpd_pma_done(pma) &&
+ (uvmexp.paging + swapcluster_nused(&swc)
+ >= (shortage - freed))) ||
+ dirtyreacts == UVMPD_NUMDIRTYREACTS) {
+ break;
+ }
+ /*
+ * we are below target and have a new page to consider.
+ */
+ uvmexp.pdscans++;
- /* set p to null to signal final swap i/o */
- p = NULL;
- }
+ /*
+ * If we are not short on memory and only interested
+ * in releasing pages from a given memory range, do not
+ * bother with other pages.
+ */
+ if (uvmexp.paging >= (shortage - freed) &&
+ !uvmpd_pma_done(pma) &&
+ !uvmpd_match_constraint(p, &pma->pm_constraint))
+ continue;
+
+ anon = p->uanon;
+ uobj = p->uobject;
+
+ /*
+ * first we attempt to lock the object that this page
+ * belongs to. if our attempt fails we skip on to
+ * the next page (no harm done). it is important to
+ * "try" locking the object as we are locking in the
+ * wrong order (pageq -> object) and we don't want to
+ * deadlock.
+ */
+ slock = uvmpd_trylockowner(p);
+ if (slock == NULL) {
+ continue;
}
- if (p) { /* if (we have a new page to consider) */
- /*
- * we are below target and have a new page to consider.
- */
- uvmexp.pdscans++;
- /*
- * If we are not short on memory and only interested
- * in releasing pages from a given memory range, do not
- * bother with other pages.
- */
- if (uvmexp.paging >= (shortage - freed) &&
- !uvmpd_pma_done(pma) &&
- !uvmpd_match_constraint(p, &pma->pm_constraint))
- continue;
+ /*
+ * move referenced pages back to active queue
+ * and skip to next page.
+ */
+ if (pmap_is_referenced(p)) {
+ uvm_unlock_pageq();
+ uvm_pageactivate(p);
+ rw_exit(slock);
+ uvm_lock_pageq();
+ uvmexp.pdreact++;
+ continue;
+ }
- anon = p->uanon;
- uobj = p->uobject;
+ if (p->pg_flags & PG_BUSY) {
+ rw_exit(slock);
+ uvmexp.pdbusy++;
+ continue;
+ }
- /*
- * first we attempt to lock the object that this page
- * belongs to. if our attempt fails we skip on to
- * the next page (no harm done). it is important to
- * "try" locking the object as we are locking in the
- * wrong order (pageq -> object) and we don't want to
- * deadlock.
- */
- slock = uvmpd_trylockowner(p);
- if (slock == NULL) {
- continue;
- }
+ /* does the page belong to an object? */
+ if (uobj != NULL) {
+ uvmexp.pdobscan++;
+ } else {
+ KASSERT(anon != NULL);
+ uvmexp.pdanscan++;
+ }
- /*
- * move referenced pages back to active queue
- * and skip to next page.
- */
- if (pmap_is_referenced(p)) {
- uvm_unlock_pageq();
- uvm_pageactivate(p);
- rw_exit(slock);
- uvm_lock_pageq();
- uvmexp.pdreact++;
- continue;
+ /*
+ * we now have the page queues locked.
+ * the page is not busy. if the page is clean we
+ * can free it now and continue.
+ */
+ if (p->pg_flags & PG_CLEAN) {
+ if (p->pg_flags & PQ_SWAPBACKED) {
+ /* this page now lives only in swap */
+ atomic_inc_int(&uvmexp.swpgonly);
}
- if (p->pg_flags & PG_BUSY) {
- rw_exit(slock);
- uvmexp.pdbusy++;
- continue;
- }
+ /* zap all mappings with pmap_page_protect... */
+ pmap_page_protect(p, PROT_NONE);
+ /* dequeue first to prevent lock recursion */
+ if (p->pg_flags & (PQ_ACTIVE|PQ_INACTIVE))
+ uvm_pagedequeue(p);
+ uvm_pagefree(p);
+ freed++;
+
+ if (anon) {
+
+ /*
+ * an anonymous page can only be clean
+ * if it has backing store assigned.
+ */
- /* does the page belong to an object? */
- if (uobj != NULL) {
- uvmexp.pdobscan++;
- } else {
- KASSERT(anon != NULL);
- uvmexp.pdanscan++;
- }
+ KASSERT(anon->an_swslot != 0);
- /*
- * we now have the page queues locked.
- * the page is not busy. if the page is clean we
- * can free it now and continue.
- */
- if (p->pg_flags & PG_CLEAN) {
- if (p->pg_flags & PQ_SWAPBACKED) {
- /* this page now lives only in swap */
- atomic_inc_int(&uvmexp.swpgonly);
- }
-
- /* zap all mappings with pmap_page_protect... */
- pmap_page_protect(p, PROT_NONE);
- /* dequeue first to prevent lock recursion */
- if (p->pg_flags & (PQ_ACTIVE|PQ_INACTIVE))
- uvm_pagedequeue(p);
- uvm_pagefree(p);
- freed++;
-
- if (anon) {
-
- /*
- * an anonymous page can only be clean
- * if it has backing store assigned.
- */
-
- KASSERT(anon->an_swslot != 0);
-
- /* remove from object */
- anon->an_page = NULL;
- }
- rw_exit(slock);
- continue;
+ /* remove from object */
+ anon->an_page = NULL;
}
+ rw_exit(slock);
+ continue;
+ }
- /*
- * this page is dirty, skip it if we'll have met our
- * free target when all the current pageouts complete.
- */
- if (uvmpd_pma_done(pma) &&
- (uvmexp.paging > (shortage - freed))) {
+ /*
+ * this page is dirty, skip it if we'll have met our
+ * free target when all the current pageouts complete.
+ */
+ if (uvmpd_pma_done(pma) &&
+ (uvmexp.paging > (shortage - freed))) {
+ rw_exit(slock);
+ continue;
+ }
+
+ /*
+ * this page is dirty, but we can't page it out
+ * since all pages in swap are only in swap.
+ * reactivate it so that we eventually cycle
+ * all pages thru the inactive queue.
+ */
+ if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
+ dirtyreacts++;
+ uvm_unlock_pageq();
+ uvm_pageactivate(p);
+ rw_exit(slock);
+ uvm_lock_pageq();
+ continue;
+ }
+
+ /*
+ * if the page is swap-backed and dirty and swap space
+ * is full, free any swap allocated to the page
+ * so that other pages can be paged out.
+ */
+ if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled())
+ uvmpd_dropswap(p);
+
+ /*
+ * the page we are looking at is dirty. we must
+ * clean it before it can be freed. to do this we
+ * first mark the page busy so that no one else will
+ * touch the page. we write protect all the mappings
+ * of the page so that no one touches it while it is
+ * in I/O.
+ */
+ swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
+ atomic_setbits_int(&p->pg_flags, PG_BUSY);
+ UVM_PAGE_OWN(p, "scan_inactive");
+ pmap_page_protect(p, PROT_READ);
+ uvmexp.pgswapout++;
+
+ /*
+ * for swap-backed pages we need to (re)allocate
+ * swap space.
+ */
+ if (swap_backed) {
+ /* free old swap slot (if any) */
+ uvmpd_dropswap(p);
+
+ /* start new cluster (if necessary) */
+ if (swapcluster_allocslots(&swc)) {
+ atomic_clearbits_int(&p->pg_flags, PG_BUSY);
+ UVM_PAGE_OWN(p, NULL);
+ dirtyreacts++;
+ uvm_unlock_pageq();
+ uvm_pageactivate(p);
rw_exit(slock);
+ uvm_lock_pageq();
continue;
}
- /*
- * this page is dirty, but we can't page it out
- * since all pages in swap are only in swap.
- * reactivate it so that we eventually cycle
- * all pages thru the inactive queue.
- */
- if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
+ /* add block to cluster */
+ if (swapcluster_add(&swc, p)) {
+ atomic_clearbits_int(&p->pg_flags, PG_BUSY);
+ UVM_PAGE_OWN(p, NULL);
dirtyreacts++;
uvm_unlock_pageq();
uvm_pageactivate(p);
@@ -775,72 +814,11 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
uvm_lock_pageq();
continue;
}
+ rw_exit(slock);
- /*
- * if the page is swap-backed and dirty and swap space
- * is full, free any swap allocated to the page
- * so that other pages can be paged out.
- */
- if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled())
- uvmpd_dropswap(p);
-
- /*
- * the page we are looking at is dirty. we must
- * clean it before it can be freed. to do this we
- * first mark the page busy so that no one else will
- * touch the page. we write protect all the mappings
- * of the page so that no one touches it while it is
- * in I/O.
- */
-
- swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
- atomic_setbits_int(&p->pg_flags, PG_BUSY);
- UVM_PAGE_OWN(p, "scan_inactive");
- pmap_page_protect(p, PROT_READ);
- uvmexp.pgswapout++;
-
- /*
- * for swap-backed pages we need to (re)allocate
- * swap space.
- */
- if (swap_backed) {
- /* free old swap slot (if any) */
- uvmpd_dropswap(p);
-
- /* start new cluster (if necessary) */
- if (swapcluster_allocslots(&swc)) {
- atomic_clearbits_int(&p->pg_flags,
- PG_BUSY);
- UVM_PAGE_OWN(p, NULL);
- dirtyreacts++;
- uvm_unlock_pageq();
- uvm_pageactivate(p);
- rw_exit(slock);
- uvm_lock_pageq();
- continue;
- }
-
- /* add block to cluster */
- if (swapcluster_add(&swc, p)) {
- atomic_clearbits_int(&p->pg_flags,
- PG_BUSY);
- UVM_PAGE_OWN(p, NULL);
- dirtyreacts++;
- uvm_unlock_pageq();
- uvm_pageactivate(p);
- rw_exit(slock);
- uvm_lock_pageq();
- continue;
- }
- rw_exit(slock);
-
- /* cluster not full yet? */
- if (swc.swc_nused < swc.swc_nallocated)
- continue;
- }
- } else {
- /* if p == NULL we must be doing a last swap i/o */
- swap_backed = TRUE;
+ /* cluster not full yet? */
+ if (swc.swc_nused < swc.swc_nallocated)
+ continue;
}
/*
@@ -884,6 +862,18 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
}
}
TAILQ_REMOVE(pglst, &iter, pageq);
+
+ /* final swap-backed clustered pageout */
+ if (swc.swc_slot > 0) {
+ uvm_unlock_pageq();
+ npages = swc.swc_nused;
+ result = swapcluster_flush(&swc);
+ uvm_lock_pageq();
+ if (result == VM_PAGER_PEND) {
+ atomic_add_int(&uvmexp.paging, npages);
+ uvmexp.pdpending++;
+ }
+ }
return freed;
}
Make pdaemon understandable