From: Martin Pieuchot Subject: Make pdaemon understandable To: sthen@openbsd.org, bluhm@openbsd.org, tb@openbsd.org Cc: tech@openbsd.org Date: Mon, 22 Dec 2025 12:11:47 +0100 Diff below has been generated with "-b" to be easily reviewable. Attached is the one I intend to commit which mostly includes indent changes. It moves the final pageout of swap cluster outside of the loop. This allows to remove the NULL checks and make the body of the loop readable. I intentionally didn't change anything *inside* the loop even if we now see incoherency. ok? Index: uvm/uvm_pdaemon.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v diff -u -p -b -r1.143 uvm_pdaemon.c --- uvm/uvm_pdaemon.c 18 Dec 2025 16:50:42 -0000 1.143 +++ uvm/uvm_pdaemon.c 22 Dec 2025 11:03:02 -0000 @@ -484,7 +484,7 @@ swapcluster_flush(struct swapcluster *sw int result; if (swc->swc_slot == 0) - return 0; // XXX + return 0; KASSERT(swc->swc_nused <= swc->swc_nallocated); slot = swc->swc_slot; @@ -633,14 +633,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc * /* Insert iterator. */ TAILQ_INSERT_AFTER(pglst, p, &iter, pageq); - for (; p != NULL || swc.swc_slot != 0; p = uvmpd_iterator(pglst, p, &iter)) { - /* - * note that p can be NULL iff we have traversed the whole - * list and need to do one final swap-backed clustered pageout. - */ - uobj = NULL; - anon = NULL; - if (p) { + while ((p = uvmpd_iterator(pglst, p, &iter)) != NULL) { /* * see if we've met our target */ @@ -648,16 +641,8 @@ uvmpd_scan_inactive(struct uvm_pmalloc * (uvmexp.paging + swapcluster_nused(&swc) >= (shortage - freed))) || dirtyreacts == UVMPD_NUMDIRTYREACTS) { - if (swc.swc_slot == 0) { - /* exit now if no swap-i/o pending */ break; } - - /* set p to null to signal final swap i/o */ - p = NULL; - } - } - if (p) { /* if (we have a new page to consider) */ /* * we are below target and have a new page to consider. */ @@ -792,7 +777,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc * * of the page so that no one touches it while it is * in I/O. */ - swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0); atomic_setbits_int(&p->pg_flags, PG_BUSY); UVM_PAGE_OWN(p, "scan_inactive"); @@ -809,8 +793,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc * /* start new cluster (if necessary) */ if (swapcluster_allocslots(&swc)) { - atomic_clearbits_int(&p->pg_flags, - PG_BUSY); + atomic_clearbits_int(&p->pg_flags, PG_BUSY); UVM_PAGE_OWN(p, NULL); dirtyreacts++; uvm_unlock_pageq(); @@ -822,8 +805,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc * /* add block to cluster */ if (swapcluster_add(&swc, p)) { - atomic_clearbits_int(&p->pg_flags, - PG_BUSY); + atomic_clearbits_int(&p->pg_flags, PG_BUSY); UVM_PAGE_OWN(p, NULL); dirtyreacts++; uvm_unlock_pageq(); @@ -838,10 +820,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc * if (swc.swc_nused < swc.swc_nallocated) continue; } - } else { - /* if p == NULL we must be doing a last swap i/o */ - swap_backed = TRUE; - } /* * now consider doing the pageout. @@ -884,6 +862,18 @@ uvmpd_scan_inactive(struct uvm_pmalloc * } } TAILQ_REMOVE(pglst, &iter, pageq); + + /* final swap-backed clustered pageout */ + if (swc.swc_slot > 0) { + uvm_unlock_pageq(); + npages = swc.swc_nused; + result = swapcluster_flush(&swc); + uvm_lock_pageq(); + if (result == VM_PAGER_PEND) { + atomic_add_int(&uvmexp.paging, npages); + uvmexp.pdpending++; + } + } return freed; } Index: uvm/uvm_pdaemon.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v diff -u -p -r1.143 uvm_pdaemon.c --- uvm/uvm_pdaemon.c 18 Dec 2025 16:50:42 -0000 1.143 +++ uvm/uvm_pdaemon.c 22 Dec 2025 11:03:02 -0000 @@ -484,7 +484,7 @@ swapcluster_flush(struct swapcluster *sw int result; if (swc->swc_slot == 0) - return 0; // XXX + return 0; KASSERT(swc->swc_nused <= swc->swc_nallocated); slot = swc->swc_slot; @@ -633,141 +633,180 @@ uvmpd_scan_inactive(struct uvm_pmalloc * /* Insert iterator. */ TAILQ_INSERT_AFTER(pglst, p, &iter, pageq); - for (; p != NULL || swc.swc_slot != 0; p = uvmpd_iterator(pglst, p, &iter)) { + while ((p = uvmpd_iterator(pglst, p, &iter)) != NULL) { /* - * note that p can be NULL iff we have traversed the whole - * list and need to do one final swap-backed clustered pageout. + * see if we've met our target */ - uobj = NULL; - anon = NULL; - if (p) { - /* - * see if we've met our target - */ - if ((uvmpd_pma_done(pma) && - (uvmexp.paging + swapcluster_nused(&swc) - >= (shortage - freed))) || - dirtyreacts == UVMPD_NUMDIRTYREACTS) { - if (swc.swc_slot == 0) { - /* exit now if no swap-i/o pending */ - break; - } + if ((uvmpd_pma_done(pma) && + (uvmexp.paging + swapcluster_nused(&swc) + >= (shortage - freed))) || + dirtyreacts == UVMPD_NUMDIRTYREACTS) { + break; + } + /* + * we are below target and have a new page to consider. + */ + uvmexp.pdscans++; - /* set p to null to signal final swap i/o */ - p = NULL; - } + /* + * If we are not short on memory and only interested + * in releasing pages from a given memory range, do not + * bother with other pages. + */ + if (uvmexp.paging >= (shortage - freed) && + !uvmpd_pma_done(pma) && + !uvmpd_match_constraint(p, &pma->pm_constraint)) + continue; + + anon = p->uanon; + uobj = p->uobject; + + /* + * first we attempt to lock the object that this page + * belongs to. if our attempt fails we skip on to + * the next page (no harm done). it is important to + * "try" locking the object as we are locking in the + * wrong order (pageq -> object) and we don't want to + * deadlock. + */ + slock = uvmpd_trylockowner(p); + if (slock == NULL) { + continue; } - if (p) { /* if (we have a new page to consider) */ - /* - * we are below target and have a new page to consider. - */ - uvmexp.pdscans++; - /* - * If we are not short on memory and only interested - * in releasing pages from a given memory range, do not - * bother with other pages. - */ - if (uvmexp.paging >= (shortage - freed) && - !uvmpd_pma_done(pma) && - !uvmpd_match_constraint(p, &pma->pm_constraint)) - continue; + /* + * move referenced pages back to active queue + * and skip to next page. + */ + if (pmap_is_referenced(p)) { + uvm_unlock_pageq(); + uvm_pageactivate(p); + rw_exit(slock); + uvm_lock_pageq(); + uvmexp.pdreact++; + continue; + } - anon = p->uanon; - uobj = p->uobject; + if (p->pg_flags & PG_BUSY) { + rw_exit(slock); + uvmexp.pdbusy++; + continue; + } - /* - * first we attempt to lock the object that this page - * belongs to. if our attempt fails we skip on to - * the next page (no harm done). it is important to - * "try" locking the object as we are locking in the - * wrong order (pageq -> object) and we don't want to - * deadlock. - */ - slock = uvmpd_trylockowner(p); - if (slock == NULL) { - continue; - } + /* does the page belong to an object? */ + if (uobj != NULL) { + uvmexp.pdobscan++; + } else { + KASSERT(anon != NULL); + uvmexp.pdanscan++; + } - /* - * move referenced pages back to active queue - * and skip to next page. - */ - if (pmap_is_referenced(p)) { - uvm_unlock_pageq(); - uvm_pageactivate(p); - rw_exit(slock); - uvm_lock_pageq(); - uvmexp.pdreact++; - continue; + /* + * we now have the page queues locked. + * the page is not busy. if the page is clean we + * can free it now and continue. + */ + if (p->pg_flags & PG_CLEAN) { + if (p->pg_flags & PQ_SWAPBACKED) { + /* this page now lives only in swap */ + atomic_inc_int(&uvmexp.swpgonly); } - if (p->pg_flags & PG_BUSY) { - rw_exit(slock); - uvmexp.pdbusy++; - continue; - } + /* zap all mappings with pmap_page_protect... */ + pmap_page_protect(p, PROT_NONE); + /* dequeue first to prevent lock recursion */ + if (p->pg_flags & (PQ_ACTIVE|PQ_INACTIVE)) + uvm_pagedequeue(p); + uvm_pagefree(p); + freed++; + + if (anon) { + + /* + * an anonymous page can only be clean + * if it has backing store assigned. + */ - /* does the page belong to an object? */ - if (uobj != NULL) { - uvmexp.pdobscan++; - } else { - KASSERT(anon != NULL); - uvmexp.pdanscan++; - } + KASSERT(anon->an_swslot != 0); - /* - * we now have the page queues locked. - * the page is not busy. if the page is clean we - * can free it now and continue. - */ - if (p->pg_flags & PG_CLEAN) { - if (p->pg_flags & PQ_SWAPBACKED) { - /* this page now lives only in swap */ - atomic_inc_int(&uvmexp.swpgonly); - } - - /* zap all mappings with pmap_page_protect... */ - pmap_page_protect(p, PROT_NONE); - /* dequeue first to prevent lock recursion */ - if (p->pg_flags & (PQ_ACTIVE|PQ_INACTIVE)) - uvm_pagedequeue(p); - uvm_pagefree(p); - freed++; - - if (anon) { - - /* - * an anonymous page can only be clean - * if it has backing store assigned. - */ - - KASSERT(anon->an_swslot != 0); - - /* remove from object */ - anon->an_page = NULL; - } - rw_exit(slock); - continue; + /* remove from object */ + anon->an_page = NULL; } + rw_exit(slock); + continue; + } - /* - * this page is dirty, skip it if we'll have met our - * free target when all the current pageouts complete. - */ - if (uvmpd_pma_done(pma) && - (uvmexp.paging > (shortage - freed))) { + /* + * this page is dirty, skip it if we'll have met our + * free target when all the current pageouts complete. + */ + if (uvmpd_pma_done(pma) && + (uvmexp.paging > (shortage - freed))) { + rw_exit(slock); + continue; + } + + /* + * this page is dirty, but we can't page it out + * since all pages in swap are only in swap. + * reactivate it so that we eventually cycle + * all pages thru the inactive queue. + */ + if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) { + dirtyreacts++; + uvm_unlock_pageq(); + uvm_pageactivate(p); + rw_exit(slock); + uvm_lock_pageq(); + continue; + } + + /* + * if the page is swap-backed and dirty and swap space + * is full, free any swap allocated to the page + * so that other pages can be paged out. + */ + if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled()) + uvmpd_dropswap(p); + + /* + * the page we are looking at is dirty. we must + * clean it before it can be freed. to do this we + * first mark the page busy so that no one else will + * touch the page. we write protect all the mappings + * of the page so that no one touches it while it is + * in I/O. + */ + swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0); + atomic_setbits_int(&p->pg_flags, PG_BUSY); + UVM_PAGE_OWN(p, "scan_inactive"); + pmap_page_protect(p, PROT_READ); + uvmexp.pgswapout++; + + /* + * for swap-backed pages we need to (re)allocate + * swap space. + */ + if (swap_backed) { + /* free old swap slot (if any) */ + uvmpd_dropswap(p); + + /* start new cluster (if necessary) */ + if (swapcluster_allocslots(&swc)) { + atomic_clearbits_int(&p->pg_flags, PG_BUSY); + UVM_PAGE_OWN(p, NULL); + dirtyreacts++; + uvm_unlock_pageq(); + uvm_pageactivate(p); rw_exit(slock); + uvm_lock_pageq(); continue; } - /* - * this page is dirty, but we can't page it out - * since all pages in swap are only in swap. - * reactivate it so that we eventually cycle - * all pages thru the inactive queue. - */ - if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) { + /* add block to cluster */ + if (swapcluster_add(&swc, p)) { + atomic_clearbits_int(&p->pg_flags, PG_BUSY); + UVM_PAGE_OWN(p, NULL); dirtyreacts++; uvm_unlock_pageq(); uvm_pageactivate(p); @@ -775,72 +814,11 @@ uvmpd_scan_inactive(struct uvm_pmalloc * uvm_lock_pageq(); continue; } + rw_exit(slock); - /* - * if the page is swap-backed and dirty and swap space - * is full, free any swap allocated to the page - * so that other pages can be paged out. - */ - if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled()) - uvmpd_dropswap(p); - - /* - * the page we are looking at is dirty. we must - * clean it before it can be freed. to do this we - * first mark the page busy so that no one else will - * touch the page. we write protect all the mappings - * of the page so that no one touches it while it is - * in I/O. - */ - - swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0); - atomic_setbits_int(&p->pg_flags, PG_BUSY); - UVM_PAGE_OWN(p, "scan_inactive"); - pmap_page_protect(p, PROT_READ); - uvmexp.pgswapout++; - - /* - * for swap-backed pages we need to (re)allocate - * swap space. - */ - if (swap_backed) { - /* free old swap slot (if any) */ - uvmpd_dropswap(p); - - /* start new cluster (if necessary) */ - if (swapcluster_allocslots(&swc)) { - atomic_clearbits_int(&p->pg_flags, - PG_BUSY); - UVM_PAGE_OWN(p, NULL); - dirtyreacts++; - uvm_unlock_pageq(); - uvm_pageactivate(p); - rw_exit(slock); - uvm_lock_pageq(); - continue; - } - - /* add block to cluster */ - if (swapcluster_add(&swc, p)) { - atomic_clearbits_int(&p->pg_flags, - PG_BUSY); - UVM_PAGE_OWN(p, NULL); - dirtyreacts++; - uvm_unlock_pageq(); - uvm_pageactivate(p); - rw_exit(slock); - uvm_lock_pageq(); - continue; - } - rw_exit(slock); - - /* cluster not full yet? */ - if (swc.swc_nused < swc.swc_nallocated) - continue; - } - } else { - /* if p == NULL we must be doing a last swap i/o */ - swap_backed = TRUE; + /* cluster not full yet? */ + if (swc.swc_nused < swc.swc_nallocated) + continue; } /* @@ -884,6 +862,18 @@ uvmpd_scan_inactive(struct uvm_pmalloc * } } TAILQ_REMOVE(pglst, &iter, pageq); + + /* final swap-backed clustered pageout */ + if (swc.swc_slot > 0) { + uvm_unlock_pageq(); + npages = swc.swc_nused; + result = swapcluster_flush(&swc); + uvm_lock_pageq(); + if (result == VM_PAGER_PEND) { + atomic_add_int(&uvmexp.paging, npages); + uvmexp.pdpending++; + } + } return freed; }