Index | Thread | Search

From:
mpi@grenadille.net <mpi@grenadille.net>
Subject:
Improve uvm_pageout() logic for segmented memory space
To:
tech@openbsd.org
Date:
Wed, 06 Nov 2024 15:58:01 +0100

Download raw body.

Thread
Diff below greatly improves the responsiveness of the page daemon for
64bit archs with a low/high memory split.  The improvement comes from
a more precise calculation of how many low pages have to be freed.  As
a result the amount of pages written to swap is decreased by ~50% in my
tests and my arm64 machine becomes responsive during heavy swapping.

The diff includes:

- Use a global "struct uvm_pmalloc" to notify failed nowait allocations
  in order to look at the managed lists.  The current algorithm does not
  call uvmpd_scan() if there have been only nowait allocations.

- Skip calling the shrinkers and grabbing some locks if the page daemon
  is awoken to rebalance the active/inactive lists.

- Do not bother releasing high pages if all we are interested in are low
  pages

- Try to deactivate low pages first only if we are not short on swap
  slots

ok?

Index: uvm/uvm_pdaemon.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
diff -u -p -r1.122 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c	6 Nov 2024 10:45:51 -0000	1.122
+++ uvm/uvm_pdaemon.c	6 Nov 2024 14:19:18 -0000
@@ -102,10 +102,9 @@ extern unsigned long drmbackoff(long);
  */
 
 struct rwlock	*uvmpd_trylockowner(struct vm_page *);
-void		uvmpd_scan(struct uvm_pmalloc *, int, int,
-		    struct uvm_constraint_range *);
-int		uvmpd_scan_inactive(struct uvm_pmalloc *, int,
-		    struct uvm_constraint_range *);
+void		uvmpd_scan(struct uvm_pmalloc *, int, int);
+int		uvmpd_scan_inactive(struct uvm_pmalloc *, int);
+void		uvmpd_scan_active(struct uvm_pmalloc *, int, int);
 void		uvmpd_tune(void);
 void		uvmpd_drop(struct pglist *);
 int		uvmpd_dropswap(struct vm_page *);
@@ -196,7 +195,15 @@ uvmpd_tune(void)
  * recover at least some memory in the most restricted region (assumed
  * to be dma_constraint).
  */
-volatile int uvm_nowait_failed;
+struct uvm_pmalloc nowait_pma;
+
+static inline int
+uvmpd_pma_done(struct uvm_pmalloc *pma)
+{
+	if (pma == NULL || (pma->pm_flags & UVM_PMA_FREED))
+		return 1;
+	return 0;
+}
 
 /*
  * uvm_pageout: the main loop for the pagedaemon
@@ -213,11 +220,19 @@ uvm_pageout(void *arg)
 	(void) spl0();
 	uvmpd_tune();
 
+	/*
+	 * XXX realistically, this is what our nowait callers probably
+	 * care about
+	 */
+	nowait_pma.pm_constraint = dma_constraint;
+	nowait_pma.pm_size = (16 << PAGE_SHIFT);
+	nowait_pma.pm_flags = 0;
+
 	for (;;) {
 		long size;
 
 		uvm_lock_fpageq();
-		if (!uvm_nowait_failed && TAILQ_EMPTY(&uvm.pmr_control.allocs)) {
+		if (TAILQ_EMPTY(&uvm.pmr_control.allocs)) {
 			msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM,
 			    "pgdaemon", INFSLP);
 			uvmexp.pdwoke++;
@@ -227,16 +242,9 @@ uvm_pageout(void *arg)
 			pma->pm_flags |= UVM_PMA_BUSY;
 			constraint = pma->pm_constraint;
 		} else {
-			if (uvm_nowait_failed) {
-				/*
-				 * XXX realistically, this is what our
-				 * nowait callers probably care about
-				 */
-				constraint = dma_constraint;
-				uvm_nowait_failed = 0;
-			} else
-				constraint = no_constraint;
+			constraint = no_constraint;
 		}
+
 		/* How many pages do we need to free during this round? */
 		shortage = uvmexp.freetarg - uvmexp.free + BUFPAGES_DEFICIT;
 		uvm_unlock_fpageq();
@@ -253,15 +261,27 @@ uvm_pageout(void *arg)
 			uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
 		uvm_unlock_pageq();
 
-		/* Reclaim pages from the buffer cache if possible. */
 		size = 0;
 		if (pma != NULL)
 			size += pma->pm_size >> PAGE_SHIFT;
 		if (shortage > 0)
 			size += shortage;
-		if (size == 0)
-			size = 16; /* XXX */
 
+		if (size == 0) {
+			if (inactive_shortage) {
+				uvm_lock_pageq();
+				uvmpd_scan_active(NULL, 0, inactive_shortage);
+				uvm_unlock_pageq();
+#ifdef DIAGNOSTIC
+			} else {
+				printf("%s: nothing to free & balance\n",
+				    __func__);
+#endif
+			}
+			continue;
+		}
+
+		/* Reclaim pages from the buffer cache if possible. */
 		shortage -= bufbackoff(&constraint, size * 2);
 #if NDRM > 0
 		shortage -= drmbackoff(size * 2);
@@ -273,9 +293,9 @@ uvm_pageout(void *arg)
 		 * scan if needed
 		 */
 		uvm_lock_pageq();
-		if (pma != NULL || (shortage > 0) || (inactive_shortage > 0)) {
-			uvmpd_scan(pma, shortage, inactive_shortage,
-			    &constraint);
+		if (!uvmpd_pma_done(pma) ||
+		    (shortage > 0) || (inactive_shortage > 0)) {
+			uvmpd_scan(pma, shortage, inactive_shortage);
 		}
 
 		/*
@@ -297,8 +317,7 @@ uvm_pageout(void *arg)
 			pma->pm_flags &= ~UVM_PMA_BUSY;
 			if (pma->pm_flags & UVM_PMA_FREED) {
 				pma->pm_flags &= ~UVM_PMA_LINKED;
-				TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,
-				    pmq);
+				TAILQ_REMOVE(&uvm.pmr_control.allocs, pma, pmq);
 				wakeup(pma);
 			}
 		}
@@ -414,6 +433,23 @@ uvmpd_dropswap(struct vm_page *pg)
 }
 
 /*
+ * Return 1 if the page `p' belongs to the memory range described by
+ * 'constraint', 0 otherwise.
+ */
+static inline int
+uvmpd_match_constraint(struct vm_page *p,
+    struct uvm_constraint_range *constraint)
+{
+	paddr_t paddr;
+
+	paddr = atop(VM_PAGE_TO_PHYS(p));
+	if (paddr >= constraint->ucr_low && paddr < constraint->ucr_high)
+		return 1;
+
+	return 0;
+}
+
+/*
  * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
  *
  * => called with page queues locked
@@ -423,8 +459,7 @@ uvmpd_dropswap(struct vm_page *pg)
  * => we return TRUE if we are exiting because we met our target
  */
 int
-uvmpd_scan_inactive(struct uvm_pmalloc *pma, int shortage,
-    struct uvm_constraint_range *constraint)
+uvmpd_scan_inactive(struct uvm_pmalloc *pma, int shortage)
 {
 	struct pglist *pglst = &uvm.page_inactive;
 	int result, freed = 0;
@@ -440,7 +475,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 	boolean_t swap_backed;
 	vaddr_t start;
 	int dirtyreacts;
-	paddr_t paddr;
 
 	/*
 	 * swslot is non-zero if we are building a swap cluster.  we want
@@ -452,11 +486,14 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 	dirtyreacts = 0;
 	p = NULL;
 
-	/* Start with the first page on the list that fit in `constraint' */
+	/*
+	 * If a thread is waiting for us to release memory from a specific
+	 * range start with the first page on the list that fits in
+	 * `constraint'
+	 */
 	TAILQ_FOREACH(p, pglst, pageq) {
-		paddr = atop(VM_PAGE_TO_PHYS(p));
-		if (paddr >= constraint->ucr_low &&
-		    paddr < constraint->ucr_high)
+		if (uvmpd_pma_done(pma) ||
+		    uvmpd_match_constraint(p, &pma->pm_constraint))
 			break;
 	}
 
@@ -471,7 +508,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 			/*
 			 * see if we've met our target
 			 */
-			if (((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) &&
+			if ((uvmpd_pma_done(pma) &&
 			    (uvmexp.paging >= (shortage - freed))) ||
 			    dirtyreacts == UVMPD_NUMDIRTYREACTS) {
 				if (swslot == 0) {
@@ -491,6 +528,16 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 			uvmexp.pdscans++;
 			nextpg = TAILQ_NEXT(p, pageq);
 
+			/*
+			 * If we are not short on memory and only interested
+			 * in releasing pages from a given memory range don't
+			 * bother with other pages.
+			 */
+			if (uvmexp.paging >= (shortage - freed) &&
+			    !uvmpd_pma_done(pma) &&
+			    !uvmpd_match_constraint(p, &pma->pm_constraint))
+				continue;
+
 			anon = p->uanon;
 			uobj = p->uobject;
 
@@ -861,13 +908,9 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
  */
 
 void
-uvmpd_scan(struct uvm_pmalloc *pma, int shortage, int inactive_shortage,
-    struct uvm_constraint_range *constraint)
+uvmpd_scan(struct uvm_pmalloc *pma, int shortage, int inactive_shortage)
 {
 	int swap_shortage, pages_freed;
-	struct vm_page *p, *nextpg;
-	struct rwlock *slock;
-	paddr_t paddr;
 
 	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
 
@@ -893,7 +936,7 @@ uvmpd_scan(struct uvm_pmalloc *pma, int 
 	 * we work on meeting our inactive target by converting active pages
 	 * to inactive ones.
 	 */
-	pages_freed = uvmpd_scan_inactive(pma, shortage, constraint);
+	pages_freed = uvmpd_scan_inactive(pma, shortage);
 	uvmexp.pdfreed += pages_freed;
 	shortage -= pages_freed;
 
@@ -910,6 +953,18 @@ uvmpd_scan(struct uvm_pmalloc *pma, int 
 		swap_shortage = shortage;
 	}
 
+	uvmpd_scan_active(pma, swap_shortage, inactive_shortage);
+}
+
+void
+uvmpd_scan_active(struct uvm_pmalloc *pma, int swap_shortage,
+    int inactive_shortage)
+{
+	struct vm_page *p, *nextpg;
+	struct rwlock *slock;
+
+	MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
+
 	for (p = TAILQ_FIRST(&uvm.page_active);
 	     p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
 	     p = nextpg) {
@@ -919,11 +974,16 @@ uvmpd_scan(struct uvm_pmalloc *pma, int 
 		}
 
 		/*
-		 * skip this page if it doesn't match the constraint.
-		 */
-		paddr = atop(VM_PAGE_TO_PHYS(p));
-		if (paddr < constraint->ucr_low &&
-		    paddr >= constraint->ucr_high)
+		 * If we couldn't release enough pages from a given memory
+		 * range try to deactivate them first...
+		 *
+		 * ...unless we are low on swap slots, in such case we are
+		 * probably OOM and want to release swap resources as fast
+		 * as possible.
+		 */
+		if (inactive_shortage > 0 && swap_shortage == 0 &&
+		    !uvmpd_pma_done(pma) &&
+		    !uvmpd_match_constraint(p, &pma->pm_constraint))
 			continue;
 
 		/*
Index: uvm/uvm_pmemrange.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_pmemrange.c,v
diff -u -p -r1.73 uvm_pmemrange.c
--- uvm/uvm_pmemrange.c	6 Nov 2024 10:41:12 -0000	1.73
+++ uvm/uvm_pmemrange.c	6 Nov 2024 13:39:13 -0000
@@ -841,7 +841,7 @@ uvm_pmr_extract_range(struct uvm_pmemran
  * recover at least some memory in the most restricted region (assumed
  * to be dma_constraint).
  */
-extern volatile int uvm_nowait_failed;
+extern struct uvm_pmalloc nowait_pma;
 
 /*
  * Acquire a number of pages.
@@ -1190,9 +1190,12 @@ fail:
 		    flags & UVM_PLA_FAILOK) == 0)
 			goto retry;
 		KASSERT(flags & UVM_PLA_FAILOK);
-	} else {
-		if (!(flags & UVM_PLA_NOWAKE)) {
-			uvm_nowait_failed = 1;
+	} else if (!(flags & UVM_PLA_NOWAKE)) {
+		struct uvm_pmalloc *pma = &nowait_pma;
+
+		if (!(nowait_pma.pm_flags & UVM_PMA_LINKED)) {
+			nowait_pma.pm_flags = UVM_PMA_LINKED;
+			TAILQ_INSERT_TAIL(&uvm.pmr_control.allocs, pma, pmq);
 			wakeup(&uvm.pagedaemon);
 		}
 	}