Download raw body.
Make pdaemon understandable
On Tue, Dec 23, 2025 at 12:54:34PM +0100, Martin Pieuchot wrote:
> On 22/12/25(Mon) 20:39, Theo Buehler wrote:
> > On Mon, Dec 22, 2025 at 12:11:47PM +0100, Martin Pieuchot wrote:
> > > Diff below has been generated with "-b" to be easily reviewable.
> > > Attached is the one I intend to commit which mostly includes indent
> > > changes.
> > >
> > > It moves the final pageout of swap cluster outside of the loop. This
> > > allows to remove the NULL checks and make the body of the loop readable.
> > >
> > > I intentionally didn't change anything *inside* the loop even if we now
> > > see incoherency.
> > >
> >
> > This reads mostly fine. One thing:
> >
> > > @@ -633,14 +633,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
> > >
> > > /* Insert iterator. */
> > > TAILQ_INSERT_AFTER(pglst, p, &iter, pageq);
> > > - for (; p != NULL || swc.swc_slot != 0; p = uvmpd_iterator(pglst, p, &iter)) {
> > > - /*
> > > - * note that p can be NULL iff we have traversed the whole
> > > - * list and need to do one final swap-backed clustered pageout.
> > > - */
> > > - uobj = NULL;
> > > - anon = NULL;
> > > - if (p) {
> > > + while ((p = uvmpd_iterator(pglst, p, &iter)) != NULL) {
> >
> > I'm confused by this change. I would have expected a
> >
> > do { ... } while ((p = uvmpd_iterator(pglst, p, &iter)) != NULL);
> >
> > loop. The first time around in the for loop things are applied to p
> > itself, not to nextpg as returned by the iterator.
>
> Thanks a lot. You're right, otherwise we skip the first page. Here's
> an updated version of the diff that keeps the for () loop for now.
>
> ok?
also passes regress
bluhm
> Index: uvm/uvm_pdaemon.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> diff -u -p -b -r1.143 uvm_pdaemon.c
> --- uvm/uvm_pdaemon.c 18 Dec 2025 16:50:42 -0000 1.143
> +++ uvm/uvm_pdaemon.c 23 Dec 2025 11:49:53 -0000
> @@ -484,7 +484,7 @@ swapcluster_flush(struct swapcluster *sw
> int result;
>
> if (swc->swc_slot == 0)
> - return 0; // XXX
> + return 0;
> KASSERT(swc->swc_nused <= swc->swc_nallocated);
>
> slot = swc->swc_slot;
> @@ -633,14 +633,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>
> /* Insert iterator. */
> TAILQ_INSERT_AFTER(pglst, p, &iter, pageq);
> - for (; p != NULL || swc.swc_slot != 0; p = uvmpd_iterator(pglst, p, &iter)) {
> - /*
> - * note that p can be NULL iff we have traversed the whole
> - * list and need to do one final swap-backed clustered pageout.
> - */
> - uobj = NULL;
> - anon = NULL;
> - if (p) {
> + for (; p != NULL; p = uvmpd_iterator(pglst, p, &iter)) {
> /*
> * see if we've met our target
> */
> @@ -648,16 +641,8 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
> (uvmexp.paging + swapcluster_nused(&swc)
> >= (shortage - freed))) ||
> dirtyreacts == UVMPD_NUMDIRTYREACTS) {
> - if (swc.swc_slot == 0) {
> - /* exit now if no swap-i/o pending */
> break;
> }
> -
> - /* set p to null to signal final swap i/o */
> - p = NULL;
> - }
> - }
> - if (p) { /* if (we have a new page to consider) */
> /*
> * we are below target and have a new page to consider.
> */
> @@ -792,7 +777,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
> * of the page so that no one touches it while it is
> * in I/O.
> */
> -
> swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
> atomic_setbits_int(&p->pg_flags, PG_BUSY);
> UVM_PAGE_OWN(p, "scan_inactive");
> @@ -809,8 +793,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>
> /* start new cluster (if necessary) */
> if (swapcluster_allocslots(&swc)) {
> - atomic_clearbits_int(&p->pg_flags,
> - PG_BUSY);
> + atomic_clearbits_int(&p->pg_flags, PG_BUSY);
> UVM_PAGE_OWN(p, NULL);
> dirtyreacts++;
> uvm_unlock_pageq();
> @@ -822,8 +805,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>
> /* add block to cluster */
> if (swapcluster_add(&swc, p)) {
> - atomic_clearbits_int(&p->pg_flags,
> - PG_BUSY);
> + atomic_clearbits_int(&p->pg_flags, PG_BUSY);
> UVM_PAGE_OWN(p, NULL);
> dirtyreacts++;
> uvm_unlock_pageq();
> @@ -838,10 +820,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
> if (swc.swc_nused < swc.swc_nallocated)
> continue;
> }
> - } else {
> - /* if p == NULL we must be doing a last swap i/o */
> - swap_backed = TRUE;
> - }
>
> /*
> * now consider doing the pageout.
> @@ -884,6 +862,18 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
> }
> }
> TAILQ_REMOVE(pglst, &iter, pageq);
> +
> + /* final swap-backed clustered pageout */
> + if (swc.swc_slot > 0) {
> + uvm_unlock_pageq();
> + npages = swc.swc_nused;
> + result = swapcluster_flush(&swc);
> + uvm_lock_pageq();
> + if (result == VM_PAGER_PEND) {
> + atomic_add_int(&uvmexp.paging, npages);
> + uvmexp.pdpending++;
> + }
> + }
>
> return freed;
> }
> Index: uvm/uvm_pdaemon.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> diff -u -p -r1.143 uvm_pdaemon.c
> --- uvm/uvm_pdaemon.c 18 Dec 2025 16:50:42 -0000 1.143
> +++ uvm/uvm_pdaemon.c 23 Dec 2025 11:49:53 -0000
> @@ -484,7 +484,7 @@ swapcluster_flush(struct swapcluster *sw
> int result;
>
> if (swc->swc_slot == 0)
> - return 0; // XXX
> + return 0;
> KASSERT(swc->swc_nused <= swc->swc_nallocated);
>
> slot = swc->swc_slot;
> @@ -633,141 +633,180 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>
> /* Insert iterator. */
> TAILQ_INSERT_AFTER(pglst, p, &iter, pageq);
> - for (; p != NULL || swc.swc_slot != 0; p = uvmpd_iterator(pglst, p, &iter)) {
> + for (; p != NULL; p = uvmpd_iterator(pglst, p, &iter)) {
> /*
> - * note that p can be NULL iff we have traversed the whole
> - * list and need to do one final swap-backed clustered pageout.
> + * see if we've met our target
> */
> - uobj = NULL;
> - anon = NULL;
> - if (p) {
> - /*
> - * see if we've met our target
> - */
> - if ((uvmpd_pma_done(pma) &&
> - (uvmexp.paging + swapcluster_nused(&swc)
> - >= (shortage - freed))) ||
> - dirtyreacts == UVMPD_NUMDIRTYREACTS) {
> - if (swc.swc_slot == 0) {
> - /* exit now if no swap-i/o pending */
> - break;
> - }
> + if ((uvmpd_pma_done(pma) &&
> + (uvmexp.paging + swapcluster_nused(&swc)
> + >= (shortage - freed))) ||
> + dirtyreacts == UVMPD_NUMDIRTYREACTS) {
> + break;
> + }
> + /*
> + * we are below target and have a new page to consider.
> + */
> + uvmexp.pdscans++;
>
> - /* set p to null to signal final swap i/o */
> - p = NULL;
> - }
> + /*
> + * If we are not short on memory and only interested
> + * in releasing pages from a given memory range, do not
> + * bother with other pages.
> + */
> + if (uvmexp.paging >= (shortage - freed) &&
> + !uvmpd_pma_done(pma) &&
> + !uvmpd_match_constraint(p, &pma->pm_constraint))
> + continue;
> +
> + anon = p->uanon;
> + uobj = p->uobject;
> +
> + /*
> + * first we attempt to lock the object that this page
> + * belongs to. if our attempt fails we skip on to
> + * the next page (no harm done). it is important to
> + * "try" locking the object as we are locking in the
> + * wrong order (pageq -> object) and we don't want to
> + * deadlock.
> + */
> + slock = uvmpd_trylockowner(p);
> + if (slock == NULL) {
> + continue;
> }
> - if (p) { /* if (we have a new page to consider) */
> - /*
> - * we are below target and have a new page to consider.
> - */
> - uvmexp.pdscans++;
>
> - /*
> - * If we are not short on memory and only interested
> - * in releasing pages from a given memory range, do not
> - * bother with other pages.
> - */
> - if (uvmexp.paging >= (shortage - freed) &&
> - !uvmpd_pma_done(pma) &&
> - !uvmpd_match_constraint(p, &pma->pm_constraint))
> - continue;
> + /*
> + * move referenced pages back to active queue
> + * and skip to next page.
> + */
> + if (pmap_is_referenced(p)) {
> + uvm_unlock_pageq();
> + uvm_pageactivate(p);
> + rw_exit(slock);
> + uvm_lock_pageq();
> + uvmexp.pdreact++;
> + continue;
> + }
>
> - anon = p->uanon;
> - uobj = p->uobject;
> + if (p->pg_flags & PG_BUSY) {
> + rw_exit(slock);
> + uvmexp.pdbusy++;
> + continue;
> + }
>
> - /*
> - * first we attempt to lock the object that this page
> - * belongs to. if our attempt fails we skip on to
> - * the next page (no harm done). it is important to
> - * "try" locking the object as we are locking in the
> - * wrong order (pageq -> object) and we don't want to
> - * deadlock.
> - */
> - slock = uvmpd_trylockowner(p);
> - if (slock == NULL) {
> - continue;
> - }
> + /* does the page belong to an object? */
> + if (uobj != NULL) {
> + uvmexp.pdobscan++;
> + } else {
> + KASSERT(anon != NULL);
> + uvmexp.pdanscan++;
> + }
>
> - /*
> - * move referenced pages back to active queue
> - * and skip to next page.
> - */
> - if (pmap_is_referenced(p)) {
> - uvm_unlock_pageq();
> - uvm_pageactivate(p);
> - rw_exit(slock);
> - uvm_lock_pageq();
> - uvmexp.pdreact++;
> - continue;
> + /*
> + * we now have the page queues locked.
> + * the page is not busy. if the page is clean we
> + * can free it now and continue.
> + */
> + if (p->pg_flags & PG_CLEAN) {
> + if (p->pg_flags & PQ_SWAPBACKED) {
> + /* this page now lives only in swap */
> + atomic_inc_int(&uvmexp.swpgonly);
> }
>
> - if (p->pg_flags & PG_BUSY) {
> - rw_exit(slock);
> - uvmexp.pdbusy++;
> - continue;
> - }
> + /* zap all mappings with pmap_page_protect... */
> + pmap_page_protect(p, PROT_NONE);
> + /* dequeue first to prevent lock recursion */
> + if (p->pg_flags & (PQ_ACTIVE|PQ_INACTIVE))
> + uvm_pagedequeue(p);
> + uvm_pagefree(p);
> + freed++;
> +
> + if (anon) {
> +
> + /*
> + * an anonymous page can only be clean
> + * if it has backing store assigned.
> + */
>
> - /* does the page belong to an object? */
> - if (uobj != NULL) {
> - uvmexp.pdobscan++;
> - } else {
> - KASSERT(anon != NULL);
> - uvmexp.pdanscan++;
> - }
> + KASSERT(anon->an_swslot != 0);
>
> - /*
> - * we now have the page queues locked.
> - * the page is not busy. if the page is clean we
> - * can free it now and continue.
> - */
> - if (p->pg_flags & PG_CLEAN) {
> - if (p->pg_flags & PQ_SWAPBACKED) {
> - /* this page now lives only in swap */
> - atomic_inc_int(&uvmexp.swpgonly);
> - }
> -
> - /* zap all mappings with pmap_page_protect... */
> - pmap_page_protect(p, PROT_NONE);
> - /* dequeue first to prevent lock recursion */
> - if (p->pg_flags & (PQ_ACTIVE|PQ_INACTIVE))
> - uvm_pagedequeue(p);
> - uvm_pagefree(p);
> - freed++;
> -
> - if (anon) {
> -
> - /*
> - * an anonymous page can only be clean
> - * if it has backing store assigned.
> - */
> -
> - KASSERT(anon->an_swslot != 0);
> -
> - /* remove from object */
> - anon->an_page = NULL;
> - }
> - rw_exit(slock);
> - continue;
> + /* remove from object */
> + anon->an_page = NULL;
> }
> + rw_exit(slock);
> + continue;
> + }
>
> - /*
> - * this page is dirty, skip it if we'll have met our
> - * free target when all the current pageouts complete.
> - */
> - if (uvmpd_pma_done(pma) &&
> - (uvmexp.paging > (shortage - freed))) {
> + /*
> + * this page is dirty, skip it if we'll have met our
> + * free target when all the current pageouts complete.
> + */
> + if (uvmpd_pma_done(pma) &&
> + (uvmexp.paging > (shortage - freed))) {
> + rw_exit(slock);
> + continue;
> + }
> +
> + /*
> + * this page is dirty, but we can't page it out
> + * since all pages in swap are only in swap.
> + * reactivate it so that we eventually cycle
> + * all pages thru the inactive queue.
> + */
> + if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
> + dirtyreacts++;
> + uvm_unlock_pageq();
> + uvm_pageactivate(p);
> + rw_exit(slock);
> + uvm_lock_pageq();
> + continue;
> + }
> +
> + /*
> + * if the page is swap-backed and dirty and swap space
> + * is full, free any swap allocated to the page
> + * so that other pages can be paged out.
> + */
> + if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled())
> + uvmpd_dropswap(p);
> +
> + /*
> + * the page we are looking at is dirty. we must
> + * clean it before it can be freed. to do this we
> + * first mark the page busy so that no one else will
> + * touch the page. we write protect all the mappings
> + * of the page so that no one touches it while it is
> + * in I/O.
> + */
> + swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
> + atomic_setbits_int(&p->pg_flags, PG_BUSY);
> + UVM_PAGE_OWN(p, "scan_inactive");
> + pmap_page_protect(p, PROT_READ);
> + uvmexp.pgswapout++;
> +
> + /*
> + * for swap-backed pages we need to (re)allocate
> + * swap space.
> + */
> + if (swap_backed) {
> + /* free old swap slot (if any) */
> + uvmpd_dropswap(p);
> +
> + /* start new cluster (if necessary) */
> + if (swapcluster_allocslots(&swc)) {
> + atomic_clearbits_int(&p->pg_flags, PG_BUSY);
> + UVM_PAGE_OWN(p, NULL);
> + dirtyreacts++;
> + uvm_unlock_pageq();
> + uvm_pageactivate(p);
> rw_exit(slock);
> + uvm_lock_pageq();
> continue;
> }
>
> - /*
> - * this page is dirty, but we can't page it out
> - * since all pages in swap are only in swap.
> - * reactivate it so that we eventually cycle
> - * all pages thru the inactive queue.
> - */
> - if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
> + /* add block to cluster */
> + if (swapcluster_add(&swc, p)) {
> + atomic_clearbits_int(&p->pg_flags, PG_BUSY);
> + UVM_PAGE_OWN(p, NULL);
> dirtyreacts++;
> uvm_unlock_pageq();
> uvm_pageactivate(p);
> @@ -775,72 +814,11 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
> uvm_lock_pageq();
> continue;
> }
> + rw_exit(slock);
>
> - /*
> - * if the page is swap-backed and dirty and swap space
> - * is full, free any swap allocated to the page
> - * so that other pages can be paged out.
> - */
> - if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled())
> - uvmpd_dropswap(p);
> -
> - /*
> - * the page we are looking at is dirty. we must
> - * clean it before it can be freed. to do this we
> - * first mark the page busy so that no one else will
> - * touch the page. we write protect all the mappings
> - * of the page so that no one touches it while it is
> - * in I/O.
> - */
> -
> - swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
> - atomic_setbits_int(&p->pg_flags, PG_BUSY);
> - UVM_PAGE_OWN(p, "scan_inactive");
> - pmap_page_protect(p, PROT_READ);
> - uvmexp.pgswapout++;
> -
> - /*
> - * for swap-backed pages we need to (re)allocate
> - * swap space.
> - */
> - if (swap_backed) {
> - /* free old swap slot (if any) */
> - uvmpd_dropswap(p);
> -
> - /* start new cluster (if necessary) */
> - if (swapcluster_allocslots(&swc)) {
> - atomic_clearbits_int(&p->pg_flags,
> - PG_BUSY);
> - UVM_PAGE_OWN(p, NULL);
> - dirtyreacts++;
> - uvm_unlock_pageq();
> - uvm_pageactivate(p);
> - rw_exit(slock);
> - uvm_lock_pageq();
> - continue;
> - }
> -
> - /* add block to cluster */
> - if (swapcluster_add(&swc, p)) {
> - atomic_clearbits_int(&p->pg_flags,
> - PG_BUSY);
> - UVM_PAGE_OWN(p, NULL);
> - dirtyreacts++;
> - uvm_unlock_pageq();
> - uvm_pageactivate(p);
> - rw_exit(slock);
> - uvm_lock_pageq();
> - continue;
> - }
> - rw_exit(slock);
> -
> - /* cluster not full yet? */
> - if (swc.swc_nused < swc.swc_nallocated)
> - continue;
> - }
> - } else {
> - /* if p == NULL we must be doing a last swap i/o */
> - swap_backed = TRUE;
> + /* cluster not full yet? */
> + if (swc.swc_nused < swc.swc_nallocated)
> + continue;
> }
>
> /*
> @@ -884,6 +862,18 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
> }
> }
> TAILQ_REMOVE(pglst, &iter, pageq);
> +
> + /* final swap-backed clustered pageout */
> + if (swc.swc_slot > 0) {
> + uvm_unlock_pageq();
> + npages = swc.swc_nused;
> + result = swapcluster_flush(&swc);
> + uvm_lock_pageq();
> + if (result == VM_PAGER_PEND) {
> + atomic_add_int(&uvmexp.paging, npages);
> + uvmexp.pdpending++;
> + }
> + }
>
> return freed;
> }
Make pdaemon understandable