Download raw body.
Swap retries & OOM
I've been through a few mkr and bulks on i386 with this.
Problem seen initially but Martin suggested also trying the
"Fix swapping for MP archs" diff. First build there resulted
in the i386 uvm fix, has been stable since then.
On 2024/11/20 10:17, Martin Pieuchot wrote:
> On 09/11/24(Sat) 11:37, Martin Pieuchot wrote:
> > On 08/11/24(Fri) 11:27, Martin Pieuchot wrote:
> > > The page daemon always tries to build a 64K cluster of pages to
> > > write to the swap partition. When one of the many uvm_swap_io()
> > > allocations fails and returns VM_PAGER_AGAIN, the pager layer,
> > > re-try with a single page.
> > >
> > > Since r2k22 there is a pre-allocated 64K cluster of pages in swap
> > > layer. If it is already used, the page daemon will wait for it
> > > to be released by the aiodoned thread.
> > >
> > > In fact this pre-allocated cluster and the sleep associated to it,
> > > is what makes the page daemon release the KERNEL_LOCK() and allows
> > > the aiodoned thread to release pages written to disk. Without this
> > > the page daemon enters an infinite loop holding the KERNEL_LOCK()
> > > and never allows the aiodoned to release pages.
> > >
> > > So I'd like to kill this dead code. This is a step towards cleanup
> > > the uvm_pager_* layer a requirement to clean the uvnode layer.
> >
> > I've been asked off-list why this is dead code. Here is why:
> >
> > VM_PAGER_AGAIN is returned by uvm_swap_put() when an allocation failed.
> > There is at most 3 allocations in uvm_swap_io():
> >
> > - buffer descriptor for the VFS
> > - kva mapping for the pages
> > - (optionally) cluster-size pages for bouncing
> >
> > In case bouncing is necessary, the page daemon is using, or waiting for,
> > a pre-allocated 64K buffer in uvm_swap_allocpages(). So the remaining
> > allocations are independent from the size of the cluster being written
> > to disk. That's why retrying with a smaller cluster isn't more likely
> > to succeed.
>
> Anyone?
>
> >
> > > Index: uvm/uvm_pager.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/uvm/uvm_pager.c,v
> > > diff -u -p -r1.92 uvm_pager.c
> > > --- uvm/uvm_pager.c 24 Jul 2024 12:18:10 -0000 1.92
> > > +++ uvm/uvm_pager.c 8 Nov 2024 10:11:25 -0000
> > > @@ -520,7 +520,6 @@ uvm_pager_put(struct uvm_object *uobj, s
> > > * now attempt the I/O. if we have a failure and we are
> > > * clustered, we will drop the cluster and try again.
> > > */
> > > -ReTry:
> > > if (uobj) {
> > > result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags);
> > > } else {
> > > @@ -564,48 +563,34 @@ ReTry:
> > > * "swblk" (for transient errors, so we can retry),
> > > * or 0 (for hard errors).
> > > */
> > > - if (uobj == NULL && pg != NULL) {
> > > - /* XXX daddr_t -> int */
> > > - int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0;
> > > - if (pg->pg_flags & PQ_ANON) {
> > > - rw_enter(pg->uanon->an_lock, RW_WRITE);
> > > - pg->uanon->an_swslot = nswblk;
> > > - rw_exit(pg->uanon->an_lock);
> > > - } else {
> > > - rw_enter(pg->uobject->vmobjlock, RW_WRITE);
> > > - uao_set_swslot(pg->uobject,
> > > - pg->offset >> PAGE_SHIFT,
> > > - nswblk);
> > > - rw_exit(pg->uobject->vmobjlock);
> > > - }
> > > - }
> > > - if (result == VM_PAGER_AGAIN) {
> > > - /*
> > > - * for transient failures, free all the swslots that
> > > - * we're not going to retry with.
> > > - */
> > > - if (uobj == NULL) {
> > > - if (pg) {
> > > - /* XXX daddr_t -> int */
> > > - uvm_swap_free(swblk + 1, *npages - 1);
> > > + if (uobj == NULL) {
> > > + if (pg != NULL) {
> > > + if (pg->pg_flags & PQ_ANON) {
> > > + rw_enter(pg->uanon->an_lock, RW_WRITE);
> > > + pg->uanon->an_swslot = 0;
> > > + rw_exit(pg->uanon->an_lock);
> > > } else {
> > > - /* XXX daddr_t -> int */
> > > - uvm_swap_free(swblk, *npages);
> > > + rw_enter(pg->uobject->vmobjlock, RW_WRITE);
> > > + uao_set_swslot(pg->uobject,
> > > + pg->offset >> PAGE_SHIFT, 0);
> > > + rw_exit(pg->uobject->vmobjlock);
> > > }
> > > }
> > > - if (pg) {
> > > - ppsp[0] = pg;
> > > - *npages = 1;
> > > - goto ReTry;
> > > - }
> > > - } else if (uobj == NULL) {
> > > /*
> > > - * for hard errors on swap-backed pageouts,
> > > - * mark the swslots as bad. note that we do not
> > > - * free swslots that we mark bad.
> > > + * for transient failures, free all the swslots
> > > */
> > > - /* XXX daddr_t -> int */
> > > - uvm_swap_markbad(swblk, *npages);
> > > + if (result == VM_PAGER_AGAIN) {
> > > + /* XXX daddr_t -> int */
> > > + uvm_swap_free(swblk, *npages);
> > > + } else {
> > > + /*
> > > + * for hard errors on swap-backed pageouts,
> > > + * mark the swslots as bad. note that we do not
> > > + * free swslots that we mark bad.
> > > + */
> > > + /* XXX daddr_t -> int */
> > > + uvm_swap_markbad(swblk, *npages);
> > > + }
> > > }
> > > }
> > >
> > > @@ -614,7 +599,6 @@ ReTry:
> > > * was one). give up! the caller only has one page ("pg")
> > > * to worry about.
> > > */
> > > -
> > > return result;
> > > }
> > >
> > > Index: uvm/uvm_pdaemon.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> > > diff -u -p -r1.129 uvm_pdaemon.c
> > > --- uvm/uvm_pdaemon.c 7 Nov 2024 10:46:52 -0000 1.129
> > > +++ uvm/uvm_pdaemon.c 8 Nov 2024 09:34:31 -0000
> > > @@ -859,8 +859,6 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
> > > if (result != VM_PAGER_AGAIN)
> > > uvm_pageactivate(p);
> > > pmap_clear_reference(p);
> > > - /* XXXCDC: if (swap_backed) FREE p's
> > > - * swap block? */
> > > } else {
> > > /* pageout was a success... */
> > > pmap_clear_reference(p);
> > >
> >
>
>
Swap retries & OOM