From: Alexander Bluhm Subject: Re: Make pdaemon understandable To: tech@openbsd.org Date: Mon, 22 Dec 2025 22:53:03 +0100 On Mon, Dec 22, 2025 at 12:11:47PM +0100, Martin Pieuchot wrote: > Diff below has been generated with "-b" to be easily reviewable. > Attached is the one I intend to commit which mostly includes indent > changes. > > It moves the final pageout of swap cluster outside of the loop. This > allows to remove the NULL checks and make the body of the loop readable. > > I intentionally didn't change anything *inside* the loop even if we now > see incoherency. > > ok? this diff passeed regress with witness on i386 bluhm > Index: uvm/uvm_pdaemon.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v > diff -u -p -b -r1.143 uvm_pdaemon.c > --- uvm/uvm_pdaemon.c 18 Dec 2025 16:50:42 -0000 1.143 > +++ uvm/uvm_pdaemon.c 22 Dec 2025 11:03:02 -0000 > @@ -484,7 +484,7 @@ swapcluster_flush(struct swapcluster *sw > int result; > > if (swc->swc_slot == 0) > - return 0; // XXX > + return 0; > KASSERT(swc->swc_nused <= swc->swc_nallocated); > > slot = swc->swc_slot; > @@ -633,14 +633,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > > /* Insert iterator. */ > TAILQ_INSERT_AFTER(pglst, p, &iter, pageq); > - for (; p != NULL || swc.swc_slot != 0; p = uvmpd_iterator(pglst, p, &iter)) { > - /* > - * note that p can be NULL iff we have traversed the whole > - * list and need to do one final swap-backed clustered pageout. > - */ > - uobj = NULL; > - anon = NULL; > - if (p) { > + while ((p = uvmpd_iterator(pglst, p, &iter)) != NULL) { > /* > * see if we've met our target > */ > @@ -648,16 +641,8 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > (uvmexp.paging + swapcluster_nused(&swc) > >= (shortage - freed))) || > dirtyreacts == UVMPD_NUMDIRTYREACTS) { > - if (swc.swc_slot == 0) { > - /* exit now if no swap-i/o pending */ > break; > } > - > - /* set p to null to signal final swap i/o */ > - p = NULL; > - } > - } > - if (p) { /* if (we have a new page to consider) */ > /* > * we are below target and have a new page to consider. > */ > @@ -792,7 +777,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > * of the page so that no one touches it while it is > * in I/O. > */ > - > swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0); > atomic_setbits_int(&p->pg_flags, PG_BUSY); > UVM_PAGE_OWN(p, "scan_inactive"); > @@ -809,8 +793,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > > /* start new cluster (if necessary) */ > if (swapcluster_allocslots(&swc)) { > - atomic_clearbits_int(&p->pg_flags, > - PG_BUSY); > + atomic_clearbits_int(&p->pg_flags, PG_BUSY); > UVM_PAGE_OWN(p, NULL); > dirtyreacts++; > uvm_unlock_pageq(); > @@ -822,8 +805,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > > /* add block to cluster */ > if (swapcluster_add(&swc, p)) { > - atomic_clearbits_int(&p->pg_flags, > - PG_BUSY); > + atomic_clearbits_int(&p->pg_flags, PG_BUSY); > UVM_PAGE_OWN(p, NULL); > dirtyreacts++; > uvm_unlock_pageq(); > @@ -838,10 +820,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > if (swc.swc_nused < swc.swc_nallocated) > continue; > } > - } else { > - /* if p == NULL we must be doing a last swap i/o */ > - swap_backed = TRUE; > - } > > /* > * now consider doing the pageout. > @@ -884,6 +862,18 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > } > } > TAILQ_REMOVE(pglst, &iter, pageq); > + > + /* final swap-backed clustered pageout */ > + if (swc.swc_slot > 0) { > + uvm_unlock_pageq(); > + npages = swc.swc_nused; > + result = swapcluster_flush(&swc); > + uvm_lock_pageq(); > + if (result == VM_PAGER_PEND) { > + atomic_add_int(&uvmexp.paging, npages); > + uvmexp.pdpending++; > + } > + } > > return freed; > } > Index: uvm/uvm_pdaemon.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v > diff -u -p -r1.143 uvm_pdaemon.c > --- uvm/uvm_pdaemon.c 18 Dec 2025 16:50:42 -0000 1.143 > +++ uvm/uvm_pdaemon.c 22 Dec 2025 11:03:02 -0000 > @@ -484,7 +484,7 @@ swapcluster_flush(struct swapcluster *sw > int result; > > if (swc->swc_slot == 0) > - return 0; // XXX > + return 0; > KASSERT(swc->swc_nused <= swc->swc_nallocated); > > slot = swc->swc_slot; > @@ -633,141 +633,180 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > > /* Insert iterator. */ > TAILQ_INSERT_AFTER(pglst, p, &iter, pageq); > - for (; p != NULL || swc.swc_slot != 0; p = uvmpd_iterator(pglst, p, &iter)) { > + while ((p = uvmpd_iterator(pglst, p, &iter)) != NULL) { > /* > - * note that p can be NULL iff we have traversed the whole > - * list and need to do one final swap-backed clustered pageout. > + * see if we've met our target > */ > - uobj = NULL; > - anon = NULL; > - if (p) { > - /* > - * see if we've met our target > - */ > - if ((uvmpd_pma_done(pma) && > - (uvmexp.paging + swapcluster_nused(&swc) > - >= (shortage - freed))) || > - dirtyreacts == UVMPD_NUMDIRTYREACTS) { > - if (swc.swc_slot == 0) { > - /* exit now if no swap-i/o pending */ > - break; > - } > + if ((uvmpd_pma_done(pma) && > + (uvmexp.paging + swapcluster_nused(&swc) > + >= (shortage - freed))) || > + dirtyreacts == UVMPD_NUMDIRTYREACTS) { > + break; > + } > + /* > + * we are below target and have a new page to consider. > + */ > + uvmexp.pdscans++; > > - /* set p to null to signal final swap i/o */ > - p = NULL; > - } > + /* > + * If we are not short on memory and only interested > + * in releasing pages from a given memory range, do not > + * bother with other pages. > + */ > + if (uvmexp.paging >= (shortage - freed) && > + !uvmpd_pma_done(pma) && > + !uvmpd_match_constraint(p, &pma->pm_constraint)) > + continue; > + > + anon = p->uanon; > + uobj = p->uobject; > + > + /* > + * first we attempt to lock the object that this page > + * belongs to. if our attempt fails we skip on to > + * the next page (no harm done). it is important to > + * "try" locking the object as we are locking in the > + * wrong order (pageq -> object) and we don't want to > + * deadlock. > + */ > + slock = uvmpd_trylockowner(p); > + if (slock == NULL) { > + continue; > } > - if (p) { /* if (we have a new page to consider) */ > - /* > - * we are below target and have a new page to consider. > - */ > - uvmexp.pdscans++; > > - /* > - * If we are not short on memory and only interested > - * in releasing pages from a given memory range, do not > - * bother with other pages. > - */ > - if (uvmexp.paging >= (shortage - freed) && > - !uvmpd_pma_done(pma) && > - !uvmpd_match_constraint(p, &pma->pm_constraint)) > - continue; > + /* > + * move referenced pages back to active queue > + * and skip to next page. > + */ > + if (pmap_is_referenced(p)) { > + uvm_unlock_pageq(); > + uvm_pageactivate(p); > + rw_exit(slock); > + uvm_lock_pageq(); > + uvmexp.pdreact++; > + continue; > + } > > - anon = p->uanon; > - uobj = p->uobject; > + if (p->pg_flags & PG_BUSY) { > + rw_exit(slock); > + uvmexp.pdbusy++; > + continue; > + } > > - /* > - * first we attempt to lock the object that this page > - * belongs to. if our attempt fails we skip on to > - * the next page (no harm done). it is important to > - * "try" locking the object as we are locking in the > - * wrong order (pageq -> object) and we don't want to > - * deadlock. > - */ > - slock = uvmpd_trylockowner(p); > - if (slock == NULL) { > - continue; > - } > + /* does the page belong to an object? */ > + if (uobj != NULL) { > + uvmexp.pdobscan++; > + } else { > + KASSERT(anon != NULL); > + uvmexp.pdanscan++; > + } > > - /* > - * move referenced pages back to active queue > - * and skip to next page. > - */ > - if (pmap_is_referenced(p)) { > - uvm_unlock_pageq(); > - uvm_pageactivate(p); > - rw_exit(slock); > - uvm_lock_pageq(); > - uvmexp.pdreact++; > - continue; > + /* > + * we now have the page queues locked. > + * the page is not busy. if the page is clean we > + * can free it now and continue. > + */ > + if (p->pg_flags & PG_CLEAN) { > + if (p->pg_flags & PQ_SWAPBACKED) { > + /* this page now lives only in swap */ > + atomic_inc_int(&uvmexp.swpgonly); > } > > - if (p->pg_flags & PG_BUSY) { > - rw_exit(slock); > - uvmexp.pdbusy++; > - continue; > - } > + /* zap all mappings with pmap_page_protect... */ > + pmap_page_protect(p, PROT_NONE); > + /* dequeue first to prevent lock recursion */ > + if (p->pg_flags & (PQ_ACTIVE|PQ_INACTIVE)) > + uvm_pagedequeue(p); > + uvm_pagefree(p); > + freed++; > + > + if (anon) { > + > + /* > + * an anonymous page can only be clean > + * if it has backing store assigned. > + */ > > - /* does the page belong to an object? */ > - if (uobj != NULL) { > - uvmexp.pdobscan++; > - } else { > - KASSERT(anon != NULL); > - uvmexp.pdanscan++; > - } > + KASSERT(anon->an_swslot != 0); > > - /* > - * we now have the page queues locked. > - * the page is not busy. if the page is clean we > - * can free it now and continue. > - */ > - if (p->pg_flags & PG_CLEAN) { > - if (p->pg_flags & PQ_SWAPBACKED) { > - /* this page now lives only in swap */ > - atomic_inc_int(&uvmexp.swpgonly); > - } > - > - /* zap all mappings with pmap_page_protect... */ > - pmap_page_protect(p, PROT_NONE); > - /* dequeue first to prevent lock recursion */ > - if (p->pg_flags & (PQ_ACTIVE|PQ_INACTIVE)) > - uvm_pagedequeue(p); > - uvm_pagefree(p); > - freed++; > - > - if (anon) { > - > - /* > - * an anonymous page can only be clean > - * if it has backing store assigned. > - */ > - > - KASSERT(anon->an_swslot != 0); > - > - /* remove from object */ > - anon->an_page = NULL; > - } > - rw_exit(slock); > - continue; > + /* remove from object */ > + anon->an_page = NULL; > } > + rw_exit(slock); > + continue; > + } > > - /* > - * this page is dirty, skip it if we'll have met our > - * free target when all the current pageouts complete. > - */ > - if (uvmpd_pma_done(pma) && > - (uvmexp.paging > (shortage - freed))) { > + /* > + * this page is dirty, skip it if we'll have met our > + * free target when all the current pageouts complete. > + */ > + if (uvmpd_pma_done(pma) && > + (uvmexp.paging > (shortage - freed))) { > + rw_exit(slock); > + continue; > + } > + > + /* > + * this page is dirty, but we can't page it out > + * since all pages in swap are only in swap. > + * reactivate it so that we eventually cycle > + * all pages thru the inactive queue. > + */ > + if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) { > + dirtyreacts++; > + uvm_unlock_pageq(); > + uvm_pageactivate(p); > + rw_exit(slock); > + uvm_lock_pageq(); > + continue; > + } > + > + /* > + * if the page is swap-backed and dirty and swap space > + * is full, free any swap allocated to the page > + * so that other pages can be paged out. > + */ > + if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled()) > + uvmpd_dropswap(p); > + > + /* > + * the page we are looking at is dirty. we must > + * clean it before it can be freed. to do this we > + * first mark the page busy so that no one else will > + * touch the page. we write protect all the mappings > + * of the page so that no one touches it while it is > + * in I/O. > + */ > + swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0); > + atomic_setbits_int(&p->pg_flags, PG_BUSY); > + UVM_PAGE_OWN(p, "scan_inactive"); > + pmap_page_protect(p, PROT_READ); > + uvmexp.pgswapout++; > + > + /* > + * for swap-backed pages we need to (re)allocate > + * swap space. > + */ > + if (swap_backed) { > + /* free old swap slot (if any) */ > + uvmpd_dropswap(p); > + > + /* start new cluster (if necessary) */ > + if (swapcluster_allocslots(&swc)) { > + atomic_clearbits_int(&p->pg_flags, PG_BUSY); > + UVM_PAGE_OWN(p, NULL); > + dirtyreacts++; > + uvm_unlock_pageq(); > + uvm_pageactivate(p); > rw_exit(slock); > + uvm_lock_pageq(); > continue; > } > > - /* > - * this page is dirty, but we can't page it out > - * since all pages in swap are only in swap. > - * reactivate it so that we eventually cycle > - * all pages thru the inactive queue. > - */ > - if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) { > + /* add block to cluster */ > + if (swapcluster_add(&swc, p)) { > + atomic_clearbits_int(&p->pg_flags, PG_BUSY); > + UVM_PAGE_OWN(p, NULL); > dirtyreacts++; > uvm_unlock_pageq(); > uvm_pageactivate(p); > @@ -775,72 +814,11 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > uvm_lock_pageq(); > continue; > } > + rw_exit(slock); > > - /* > - * if the page is swap-backed and dirty and swap space > - * is full, free any swap allocated to the page > - * so that other pages can be paged out. > - */ > - if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfilled()) > - uvmpd_dropswap(p); > - > - /* > - * the page we are looking at is dirty. we must > - * clean it before it can be freed. to do this we > - * first mark the page busy so that no one else will > - * touch the page. we write protect all the mappings > - * of the page so that no one touches it while it is > - * in I/O. > - */ > - > - swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0); > - atomic_setbits_int(&p->pg_flags, PG_BUSY); > - UVM_PAGE_OWN(p, "scan_inactive"); > - pmap_page_protect(p, PROT_READ); > - uvmexp.pgswapout++; > - > - /* > - * for swap-backed pages we need to (re)allocate > - * swap space. > - */ > - if (swap_backed) { > - /* free old swap slot (if any) */ > - uvmpd_dropswap(p); > - > - /* start new cluster (if necessary) */ > - if (swapcluster_allocslots(&swc)) { > - atomic_clearbits_int(&p->pg_flags, > - PG_BUSY); > - UVM_PAGE_OWN(p, NULL); > - dirtyreacts++; > - uvm_unlock_pageq(); > - uvm_pageactivate(p); > - rw_exit(slock); > - uvm_lock_pageq(); > - continue; > - } > - > - /* add block to cluster */ > - if (swapcluster_add(&swc, p)) { > - atomic_clearbits_int(&p->pg_flags, > - PG_BUSY); > - UVM_PAGE_OWN(p, NULL); > - dirtyreacts++; > - uvm_unlock_pageq(); > - uvm_pageactivate(p); > - rw_exit(slock); > - uvm_lock_pageq(); > - continue; > - } > - rw_exit(slock); > - > - /* cluster not full yet? */ > - if (swc.swc_nused < swc.swc_nallocated) > - continue; > - } > - } else { > - /* if p == NULL we must be doing a last swap i/o */ > - swap_backed = TRUE; > + /* cluster not full yet? */ > + if (swc.swc_nused < swc.swc_nallocated) > + continue; > } > > /* > @@ -884,6 +862,18 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > } > } > TAILQ_REMOVE(pglst, &iter, pageq); > + > + /* final swap-backed clustered pageout */ > + if (swc.swc_slot > 0) { > + uvm_unlock_pageq(); > + npages = swc.swc_nused; > + result = swapcluster_flush(&swc); > + uvm_lock_pageq(); > + if (result == VM_PAGER_PEND) { > + atomic_add_int(&uvmexp.paging, npages); > + uvmexp.pdpending++; > + } > + } > > return freed; > }