From: Martin Pieuchot Subject: uvmpd_iterator To: tech@openbsd.org Date: Wed, 26 Feb 2025 15:09:50 +0100 Currently the pdaemon does funny dances when iterating the inactive list in order to grab the next page on the list. Diff below simplifies those dances by inserting a marker (iterator) on the list. This is necessary to reduce the scope of the uvm_pageq_lock. ok? Index: uvm/uvm_page.h =================================================================== RCS file: /cvs/src/sys/uvm/uvm_page.h,v diff -u -p -r1.72 uvm_page.h --- uvm/uvm_page.h 19 Feb 2025 11:07:47 -0000 1.72 +++ uvm/uvm_page.h 26 Feb 2025 14:00:08 -0000 @@ -147,13 +147,12 @@ struct vm_page { #define PG_RDONLY 0x00000080 /* page must be mapped read-only */ #define PG_ZERO 0x00000100 /* page is pre-zero'd */ #define PG_DEV 0x00000200 /* page is in device space, lay off */ - -#define PG_PAGER1 0x00001000 /* pager-specific flag */ #define PG_MASK 0x0000ffff #define PQ_FREE 0x00010000 /* page is on free list */ #define PQ_INACTIVE 0x00020000 /* page is in inactive list */ #define PQ_ACTIVE 0x00040000 /* page is in active list */ +#define PQ_ITER 0x00080000 /* page is an iterator marker */ #define PQ_ANON 0x00100000 /* page is part of an anon, rather than an uvm_object */ #define PQ_AOBJ 0x00200000 /* page is part of an anonymous Index: uvm/uvm_pdaemon.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v diff -u -p -r1.134 uvm_pdaemon.c --- uvm/uvm_pdaemon.c 25 Jan 2025 08:55:52 -0000 1.134 +++ uvm/uvm_pdaemon.c 26 Feb 2025 14:02:39 -0000 @@ -453,6 +453,29 @@ uvmpd_match_constraint(struct vm_page *p return 0; } +struct vm_page * +uvmpd_iterator(struct pglist *pglst, struct vm_page *p, struct vm_page *iter) +{ + struct vm_page *nextpg = NULL; + + MUTEX_ASSERT_LOCKED(&uvm.pageqlock); + + /* p is null to signal final swap i/o. */ + if (p == NULL) + return NULL; + + do { + nextpg = TAILQ_NEXT(iter, pageq); + } while (nextpg && (nextpg->pg_flags & PQ_ITER)); + + if (nextpg) { + TAILQ_REMOVE(pglst, iter, pageq); + TAILQ_INSERT_AFTER(pglst, nextpg, iter, pageq); + } + + return nextpg; +} + /* * uvmpd_scan_inactive: scan an inactive list for pages to clean or free. * @@ -467,7 +490,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc * { struct pglist *pglst = &uvm.page_inactive; int result, freed = 0; - struct vm_page *p, *nextpg; + struct vm_page *p, iter = { .pg_flags = PQ_ITER }; struct uvm_object *uobj; struct vm_page *pps[SWCLUSTPAGES], **ppsp; int npages; @@ -501,7 +524,12 @@ uvmpd_scan_inactive(struct uvm_pmalloc * break; } - for (; p != NULL || swslot != 0; p = nextpg) { + if (p == NULL) + return 0; + + /* Insert iterator. */ + TAILQ_INSERT_AFTER(pglst, p, &iter, pageq); + for (; p != NULL || swslot != 0; p = uvmpd_iterator(pglst, p, &iter)) { /* * note that p can be NULL iff we have traversed the whole * list and need to do one final swap-backed clustered pageout. @@ -522,7 +550,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc * /* set p to null to signal final swap i/o */ p = NULL; - nextpg = NULL; } } if (p) { /* if (we have a new page to consider) */ @@ -530,7 +557,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc * * we are below target and have a new page to consider. */ uvmexp.pdscans++; - nextpg = TAILQ_NEXT(p, pageq); /* * If we are not short on memory and only interested @@ -772,26 +798,11 @@ uvmpd_scan_inactive(struct uvm_pmalloc * * async I/O is in progress and the async I/O done routine * will clean up after us. in this case we move on to the * next page. - * - * there is a very remote chance that the pending async i/o can - * finish _before_ we get here. if that happens, our page "p" - * may no longer be on the inactive queue. so we verify this - * when determining the next page (starting over at the head if - * we've lost our inactive page). */ - if (result == VM_PAGER_PEND) { atomic_add_int(&uvmexp.paging, npages); uvm_lock_pageq(); uvmexp.pdpending++; - if (p) { - if (p->pg_flags & PQ_INACTIVE) - nextpg = TAILQ_NEXT(p, pageq); - else - nextpg = TAILQ_FIRST(pglst); - } else { - nextpg = NULL; - } continue; } @@ -852,12 +863,10 @@ uvmpd_scan_inactive(struct uvm_pmalloc * pmap_page_protect(p, PROT_NONE); anon = NULL; uvm_lock_pageq(); - nextpg = TAILQ_NEXT(p, pageq); /* free released page */ uvm_pagefree(p); } else { /* page was not released during I/O */ uvm_lock_pageq(); - nextpg = TAILQ_NEXT(p, pageq); if (result != VM_PAGER_OK) { /* pageout was a failure... */ if (result != VM_PAGER_AGAIN) @@ -871,33 +880,16 @@ uvmpd_scan_inactive(struct uvm_pmalloc * PG_CLEAN); } } - - /* - * drop object lock (if there is an object left). do - * a safety check of nextpg to make sure it is on the - * inactive queue (it should be since PG_BUSY pages on - * the inactive queue can't be re-queued [note: not - * true for active queue]). - */ rw_exit(slock); - - if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) { - nextpg = TAILQ_FIRST(pglst); /* reload! */ - } } else { /* - * if p is null in this loop, make sure it stays null - * in the next loop. - */ - nextpg = NULL; - - /* * lock page queues here just so they're always locked * at the end of the loop. */ uvm_lock_pageq(); } } + TAILQ_REMOVE(pglst, &iter, pageq); return freed; }