From: mpi@grenadille.net Subject: Improve uvm_pageout() logic for segmented memory space To: tech@openbsd.org Date: Wed, 06 Nov 2024 15:58:01 +0100 Diff below greatly improves the responsiveness of the page daemon for 64bit archs with a low/high memory split. The improvement comes from a more precise calculation of how many low pages have to be freed. As a result the amount of pages written to swap is decreased by ~50% in my tests and my arm64 machine becomes responsive during heavy swapping. The diff includes: - Use a global "struct uvm_pmalloc" to notify failed nowait allocations in order to look at the managed lists. The current algorithm does not call uvmpd_scan() if there have been only nowait allocations. - Skip calling the shrinkers and grabbing some locks if the page daemon is awoken to rebalance the active/inactive lists. - Do not bother releasing high pages if all we are interested in are low pages - Try to deactivate low pages first only if we are not short on swap slots ok? Index: uvm/uvm_pdaemon.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v diff -u -p -r1.122 uvm_pdaemon.c --- uvm/uvm_pdaemon.c 6 Nov 2024 10:45:51 -0000 1.122 +++ uvm/uvm_pdaemon.c 6 Nov 2024 14:19:18 -0000 @@ -102,10 +102,9 @@ extern unsigned long drmbackoff(long); */ struct rwlock *uvmpd_trylockowner(struct vm_page *); -void uvmpd_scan(struct uvm_pmalloc *, int, int, - struct uvm_constraint_range *); -int uvmpd_scan_inactive(struct uvm_pmalloc *, int, - struct uvm_constraint_range *); +void uvmpd_scan(struct uvm_pmalloc *, int, int); +int uvmpd_scan_inactive(struct uvm_pmalloc *, int); +void uvmpd_scan_active(struct uvm_pmalloc *, int, int); void uvmpd_tune(void); void uvmpd_drop(struct pglist *); int uvmpd_dropswap(struct vm_page *); @@ -196,7 +195,15 @@ uvmpd_tune(void) * recover at least some memory in the most restricted region (assumed * to be dma_constraint). */ -volatile int uvm_nowait_failed; +struct uvm_pmalloc nowait_pma; + +static inline int +uvmpd_pma_done(struct uvm_pmalloc *pma) +{ + if (pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) + return 1; + return 0; +} /* * uvm_pageout: the main loop for the pagedaemon @@ -213,11 +220,19 @@ uvm_pageout(void *arg) (void) spl0(); uvmpd_tune(); + /* + * XXX realistically, this is what our nowait callers probably + * care about + */ + nowait_pma.pm_constraint = dma_constraint; + nowait_pma.pm_size = (16 << PAGE_SHIFT); + nowait_pma.pm_flags = 0; + for (;;) { long size; uvm_lock_fpageq(); - if (!uvm_nowait_failed && TAILQ_EMPTY(&uvm.pmr_control.allocs)) { + if (TAILQ_EMPTY(&uvm.pmr_control.allocs)) { msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM, "pgdaemon", INFSLP); uvmexp.pdwoke++; @@ -227,16 +242,9 @@ uvm_pageout(void *arg) pma->pm_flags |= UVM_PMA_BUSY; constraint = pma->pm_constraint; } else { - if (uvm_nowait_failed) { - /* - * XXX realistically, this is what our - * nowait callers probably care about - */ - constraint = dma_constraint; - uvm_nowait_failed = 0; - } else - constraint = no_constraint; + constraint = no_constraint; } + /* How many pages do we need to free during this round? */ shortage = uvmexp.freetarg - uvmexp.free + BUFPAGES_DEFICIT; uvm_unlock_fpageq(); @@ -253,15 +261,27 @@ uvm_pageout(void *arg) uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT; uvm_unlock_pageq(); - /* Reclaim pages from the buffer cache if possible. */ size = 0; if (pma != NULL) size += pma->pm_size >> PAGE_SHIFT; if (shortage > 0) size += shortage; - if (size == 0) - size = 16; /* XXX */ + if (size == 0) { + if (inactive_shortage) { + uvm_lock_pageq(); + uvmpd_scan_active(NULL, 0, inactive_shortage); + uvm_unlock_pageq(); +#ifdef DIAGNOSTIC + } else { + printf("%s: nothing to free & balance\n", + __func__); +#endif + } + continue; + } + + /* Reclaim pages from the buffer cache if possible. */ shortage -= bufbackoff(&constraint, size * 2); #if NDRM > 0 shortage -= drmbackoff(size * 2); @@ -273,9 +293,9 @@ uvm_pageout(void *arg) * scan if needed */ uvm_lock_pageq(); - if (pma != NULL || (shortage > 0) || (inactive_shortage > 0)) { - uvmpd_scan(pma, shortage, inactive_shortage, - &constraint); + if (!uvmpd_pma_done(pma) || + (shortage > 0) || (inactive_shortage > 0)) { + uvmpd_scan(pma, shortage, inactive_shortage); } /* @@ -297,8 +317,7 @@ uvm_pageout(void *arg) pma->pm_flags &= ~UVM_PMA_BUSY; if (pma->pm_flags & UVM_PMA_FREED) { pma->pm_flags &= ~UVM_PMA_LINKED; - TAILQ_REMOVE(&uvm.pmr_control.allocs, pma, - pmq); + TAILQ_REMOVE(&uvm.pmr_control.allocs, pma, pmq); wakeup(pma); } } @@ -414,6 +433,23 @@ uvmpd_dropswap(struct vm_page *pg) } /* + * Return 1 if the page `p' belongs to the memory range described by + * 'constraint', 0 otherwise. + */ +static inline int +uvmpd_match_constraint(struct vm_page *p, + struct uvm_constraint_range *constraint) +{ + paddr_t paddr; + + paddr = atop(VM_PAGE_TO_PHYS(p)); + if (paddr >= constraint->ucr_low && paddr < constraint->ucr_high) + return 1; + + return 0; +} + +/* * uvmpd_scan_inactive: scan an inactive list for pages to clean or free. * * => called with page queues locked @@ -423,8 +459,7 @@ uvmpd_dropswap(struct vm_page *pg) * => we return TRUE if we are exiting because we met our target */ int -uvmpd_scan_inactive(struct uvm_pmalloc *pma, int shortage, - struct uvm_constraint_range *constraint) +uvmpd_scan_inactive(struct uvm_pmalloc *pma, int shortage) { struct pglist *pglst = &uvm.page_inactive; int result, freed = 0; @@ -440,7 +475,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc * boolean_t swap_backed; vaddr_t start; int dirtyreacts; - paddr_t paddr; /* * swslot is non-zero if we are building a swap cluster. we want @@ -452,11 +486,14 @@ uvmpd_scan_inactive(struct uvm_pmalloc * dirtyreacts = 0; p = NULL; - /* Start with the first page on the list that fit in `constraint' */ + /* + * If a thread is waiting for us to release memory from a specific + * range start with the first page on the list that fits in + * `constraint' + */ TAILQ_FOREACH(p, pglst, pageq) { - paddr = atop(VM_PAGE_TO_PHYS(p)); - if (paddr >= constraint->ucr_low && - paddr < constraint->ucr_high) + if (uvmpd_pma_done(pma) || + uvmpd_match_constraint(p, &pma->pm_constraint)) break; } @@ -471,7 +508,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc * /* * see if we've met our target */ - if (((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) && + if ((uvmpd_pma_done(pma) && (uvmexp.paging >= (shortage - freed))) || dirtyreacts == UVMPD_NUMDIRTYREACTS) { if (swslot == 0) { @@ -491,6 +528,16 @@ uvmpd_scan_inactive(struct uvm_pmalloc * uvmexp.pdscans++; nextpg = TAILQ_NEXT(p, pageq); + /* + * If we are not short on memory and only interested + * in releasing pages from a given memory range don't + * bother with other pages. + */ + if (uvmexp.paging >= (shortage - freed) && + !uvmpd_pma_done(pma) && + !uvmpd_match_constraint(p, &pma->pm_constraint)) + continue; + anon = p->uanon; uobj = p->uobject; @@ -861,13 +908,9 @@ uvmpd_scan_inactive(struct uvm_pmalloc * */ void -uvmpd_scan(struct uvm_pmalloc *pma, int shortage, int inactive_shortage, - struct uvm_constraint_range *constraint) +uvmpd_scan(struct uvm_pmalloc *pma, int shortage, int inactive_shortage) { int swap_shortage, pages_freed; - struct vm_page *p, *nextpg; - struct rwlock *slock; - paddr_t paddr; MUTEX_ASSERT_LOCKED(&uvm.pageqlock); @@ -893,7 +936,7 @@ uvmpd_scan(struct uvm_pmalloc *pma, int * we work on meeting our inactive target by converting active pages * to inactive ones. */ - pages_freed = uvmpd_scan_inactive(pma, shortage, constraint); + pages_freed = uvmpd_scan_inactive(pma, shortage); uvmexp.pdfreed += pages_freed; shortage -= pages_freed; @@ -910,6 +953,18 @@ uvmpd_scan(struct uvm_pmalloc *pma, int swap_shortage = shortage; } + uvmpd_scan_active(pma, swap_shortage, inactive_shortage); +} + +void +uvmpd_scan_active(struct uvm_pmalloc *pma, int swap_shortage, + int inactive_shortage) +{ + struct vm_page *p, *nextpg; + struct rwlock *slock; + + MUTEX_ASSERT_LOCKED(&uvm.pageqlock); + for (p = TAILQ_FIRST(&uvm.page_active); p != NULL && (inactive_shortage > 0 || swap_shortage > 0); p = nextpg) { @@ -919,11 +974,16 @@ uvmpd_scan(struct uvm_pmalloc *pma, int } /* - * skip this page if it doesn't match the constraint. - */ - paddr = atop(VM_PAGE_TO_PHYS(p)); - if (paddr < constraint->ucr_low && - paddr >= constraint->ucr_high) + * If we couldn't release enough pages from a given memory + * range try to deactivate them first... + * + * ...unless we are low on swap slots, in such case we are + * probably OOM and want to release swap resources as fast + * as possible. + */ + if (inactive_shortage > 0 && swap_shortage == 0 && + !uvmpd_pma_done(pma) && + !uvmpd_match_constraint(p, &pma->pm_constraint)) continue; /* Index: uvm/uvm_pmemrange.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_pmemrange.c,v diff -u -p -r1.73 uvm_pmemrange.c --- uvm/uvm_pmemrange.c 6 Nov 2024 10:41:12 -0000 1.73 +++ uvm/uvm_pmemrange.c 6 Nov 2024 13:39:13 -0000 @@ -841,7 +841,7 @@ uvm_pmr_extract_range(struct uvm_pmemran * recover at least some memory in the most restricted region (assumed * to be dma_constraint). */ -extern volatile int uvm_nowait_failed; +extern struct uvm_pmalloc nowait_pma; /* * Acquire a number of pages. @@ -1190,9 +1190,12 @@ fail: flags & UVM_PLA_FAILOK) == 0) goto retry; KASSERT(flags & UVM_PLA_FAILOK); - } else { - if (!(flags & UVM_PLA_NOWAKE)) { - uvm_nowait_failed = 1; + } else if (!(flags & UVM_PLA_NOWAKE)) { + struct uvm_pmalloc *pma = &nowait_pma; + + if (!(nowait_pma.pm_flags & UVM_PMA_LINKED)) { + nowait_pma.pm_flags = UVM_PMA_LINKED; + TAILQ_INSERT_TAIL(&uvm.pmr_control.allocs, pma, pmq); wakeup(&uvm.pagedaemon); } }