From: Theo Buehler Subject: Re: Swap retries & OOM To: tech@openbsd.org Date: Sat, 23 Nov 2024 21:43:09 +0100 On Fri, Nov 08, 2024 at 11:27:18AM +0100, Martin Pieuchot wrote: > The page daemon always tries to build a 64K cluster of pages to > write to the swap partition. When one of the many uvm_swap_io() > allocations fails and returns VM_PAGER_AGAIN, the pager layer, > re-try with a single page. > > Since r2k22 there is a pre-allocated 64K cluster of pages in swap > layer. If it is already used, the page daemon will wait for it > to be released by the aiodoned thread. > > In fact this pre-allocated cluster and the sleep associated to it, > is what makes the page daemon release the KERNEL_LOCK() and allows > the aiodoned thread to release pages written to disk. Without this > the page daemon enters an infinite loop holding the KERNEL_LOCK() > and never allows the aiodoned to release pages. > > So I'd like to kill this dead code. This is a step towards cleanup > the uvm_pager_* layer a requirement to clean the uvnode layer. > > ok? This diff and your explanations in this and the following mails make sense to me ok tb > > Index: uvm/uvm_pager.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_pager.c,v > diff -u -p -r1.92 uvm_pager.c > --- uvm/uvm_pager.c 24 Jul 2024 12:18:10 -0000 1.92 > +++ uvm/uvm_pager.c 8 Nov 2024 10:11:25 -0000 > @@ -520,7 +520,6 @@ uvm_pager_put(struct uvm_object *uobj, s > * now attempt the I/O. if we have a failure and we are > * clustered, we will drop the cluster and try again. > */ > -ReTry: > if (uobj) { > result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags); > } else { > @@ -564,48 +563,34 @@ ReTry: > * "swblk" (for transient errors, so we can retry), > * or 0 (for hard errors). > */ > - if (uobj == NULL && pg != NULL) { > - /* XXX daddr_t -> int */ > - int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0; > - if (pg->pg_flags & PQ_ANON) { > - rw_enter(pg->uanon->an_lock, RW_WRITE); > - pg->uanon->an_swslot = nswblk; > - rw_exit(pg->uanon->an_lock); > - } else { > - rw_enter(pg->uobject->vmobjlock, RW_WRITE); > - uao_set_swslot(pg->uobject, > - pg->offset >> PAGE_SHIFT, > - nswblk); > - rw_exit(pg->uobject->vmobjlock); > - } > - } > - if (result == VM_PAGER_AGAIN) { > - /* > - * for transient failures, free all the swslots that > - * we're not going to retry with. > - */ > - if (uobj == NULL) { > - if (pg) { > - /* XXX daddr_t -> int */ > - uvm_swap_free(swblk + 1, *npages - 1); > + if (uobj == NULL) { > + if (pg != NULL) { > + if (pg->pg_flags & PQ_ANON) { > + rw_enter(pg->uanon->an_lock, RW_WRITE); > + pg->uanon->an_swslot = 0; > + rw_exit(pg->uanon->an_lock); > } else { > - /* XXX daddr_t -> int */ > - uvm_swap_free(swblk, *npages); > + rw_enter(pg->uobject->vmobjlock, RW_WRITE); > + uao_set_swslot(pg->uobject, > + pg->offset >> PAGE_SHIFT, 0); > + rw_exit(pg->uobject->vmobjlock); > } > } > - if (pg) { > - ppsp[0] = pg; > - *npages = 1; > - goto ReTry; > - } > - } else if (uobj == NULL) { > /* > - * for hard errors on swap-backed pageouts, > - * mark the swslots as bad. note that we do not > - * free swslots that we mark bad. > + * for transient failures, free all the swslots > */ > - /* XXX daddr_t -> int */ > - uvm_swap_markbad(swblk, *npages); > + if (result == VM_PAGER_AGAIN) { > + /* XXX daddr_t -> int */ > + uvm_swap_free(swblk, *npages); > + } else { > + /* > + * for hard errors on swap-backed pageouts, > + * mark the swslots as bad. note that we do not > + * free swslots that we mark bad. > + */ > + /* XXX daddr_t -> int */ > + uvm_swap_markbad(swblk, *npages); > + } > } > } > > @@ -614,7 +599,6 @@ ReTry: > * was one). give up! the caller only has one page ("pg") > * to worry about. > */ > - > return result; > } > > Index: uvm/uvm_pdaemon.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v > diff -u -p -r1.129 uvm_pdaemon.c > --- uvm/uvm_pdaemon.c 7 Nov 2024 10:46:52 -0000 1.129 > +++ uvm/uvm_pdaemon.c 8 Nov 2024 09:34:31 -0000 > @@ -859,8 +859,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc * > if (result != VM_PAGER_AGAIN) > uvm_pageactivate(p); > pmap_clear_reference(p); > - /* XXXCDC: if (swap_backed) FREE p's > - * swap block? */ > } else { > /* pageout was a success... */ > pmap_clear_reference(p); >