Download raw body.
One reaper per CPU
On Wed, Jul 10, 2024 at 05:36:02PM +0000, Christian Ludwig wrote:
> Hi,
>
> On Wed, 2024-07-10 at 17:42 +0200, Mark Kettenis wrote:
> > > Date: Wed, 10 Jul 2024 17:21:35 +0200
> > > From: Christian Ludwig <cludwig@genua.de>
> >
> > Hi Christian,
> >
> > > Hi,
> > >
> > > This implements one reaper process per CPU. The benefit is that we
> > > can
> > > switch to a safe stack (the reaper) on process teardown without the
> > > need
> > > to lock data structures between sched_exit() and the local reaper.
> > > That
> > > means the global deadproc mutex goes away. And there is also no
> > > need to
> > > go through idle anymore, because we know that the local reaper is
> > > not
> > > currently running when the dying proc enlists itself.
> > >
> > > I have tested it lightly. It does not fall apart immediately. I do
> > > not
> > > see any performance regressions in my test case, which is
> > > recompiling
> > > the kernel over and over again, but your mileage my vary. So please
> > > give
> > > this some serious testing on your boxes.
> > >
> > > So long,
> > >
> > > - Christian
> > >
> > > ---
> > > sys/kern/init_main.c | 5 ----
> > > sys/kern/kern_exit.c | 59 ++++++++++++---------------------------
> > > ----
> > > sys/kern/kern_sched.c | 43 ++++++++++++++++++++-----------
> > > sys/sys/proc.h | 1 -
> > > 4 files changed, 44 insertions(+), 64 deletions(-)
> > >
> > > diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
> > > index d571b0855cff..83a03a14a042 100644
> > > --- a/sys/kern/init_main.c
> > > +++ b/sys/kern/init_main.c
> > > @@ -117,7 +117,6 @@ struct plimit limit0;
> > > struct vmspace vmspace0;
> > > struct sigacts sigacts0;
> > > struct process *initprocess;
> > > -struct proc *reaperproc;
> > >
> > > extern struct user *proc0paddr;
> > >
> > > @@ -495,10 +494,6 @@ main(void *framep)
> > > if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon"))
> > > panic("fork pagedaemon");
> > >
> > > - /* Create the reaper daemon kernel thread. */
> > > - if (kthread_create(reaper, NULL, &reaperproc, "reaper"))
> > > - panic("fork reaper");
> > > -
> > > /* Create the cleaner daemon kernel thread. */
> > > if (kthread_create(buf_daemon, NULL, &cleanerproc,
> > > "cleaner"))
> > > panic("fork cleaner");
> > > diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
> > > index ba87dcd42cd0..a5cc37ac0f74 100644
> > > --- a/sys/kern/kern_exit.c
> > > +++ b/sys/kern/kern_exit.c
> > > @@ -378,11 +378,10 @@ exit1(struct proc *p, int xexit, int xsig,
> > > int flags)
> > > */
> > >
> > > /*
> > > - * Finally, call machine-dependent code to switch to a new
> > > - * context (possibly the idle context). Once we are no
> > > longer
> > > - * using the dead process's vmspace and stack, exit2() will
> > > be
> > > - * called to schedule those resources to be released by the
> > > - * reaper thread.
> > > + * Finally, call machine-dependent code to switch away.
> > > Then,
> > > + * when we are no longer using the dead process's vmspace
> > > and
> > > + * stack, the current CPU's reaper thread releases those
> > > + * resources.
> > > *
> > > * Note that cpu_exit() will end with a call equivalent to
> > > * cpu_switch(), finishing our execution (pun intended).
> > > @@ -392,38 +391,6 @@ exit1(struct proc *p, int xexit, int xsig, int
> > > flags)
> > > panic("cpu_exit returned");
> > > }
> > >
> > > -/*
> > > - * Locking of this proclist is special; it's accessed in a
> > > - * critical section of process exit, and thus locking it can't
> > > - * modify interrupt state. We use a simple spin lock for this
> > > - * proclist. We use the p_hash member to linkup to deadproc.
> > > - */
> > > -struct mutex deadproc_mutex =
> > > - MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS);
> > > -struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc);
> > > -
> > > -/*
> > > - * We are called from sched_idle() once it is safe to schedule the
> > > - * dead process's resources to be freed. So this is not allowed to
> > > sleep.
> > > - *
> > > - * We lock the deadproc list, place the proc on that list (using
> > > - * the p_hash member), and wake up the reaper.
> > > - */
> > > -void
> > > -exit2(struct proc *p)
> > > -{
> > > - /* account the remainder of time spent in exit1() */
> > > - mtx_enter(&p->p_p->ps_mtx);
> > > - tuagg_add_process(p->p_p, p);
> > > - mtx_leave(&p->p_p->ps_mtx);
> > > -
> > > - mtx_enter(&deadproc_mutex);
> > > - LIST_INSERT_HEAD(&deadproc, p, p_hash);
> > > - mtx_leave(&deadproc_mutex);
> > > -
> > > - wakeup(&deadproc);
> > > -}
> > > -
> > > void
> > > proc_free(struct proc *p)
> > > {
> > > @@ -441,20 +408,26 @@ void
> > > reaper(void *arg)
> > > {
> > > struct proc *p;
> > > + struct cpu_info *ci = arg;
> > > + struct schedstate_percpu *spc;
> > >
> > > KERNEL_UNLOCK();
> > > -
> > > SCHED_ASSERT_UNLOCKED();
> > >
> > > + sched_peg_curproc(ci);
> > > + spc = &ci->ci_schedstate;
> > > +
> > > + KASSERT(ci == curcpu());
> > > +
> > > for (;;) {
> > > - mtx_enter(&deadproc_mutex);
> > > - while ((p = LIST_FIRST(&deadproc)) == NULL)
> > > - msleep_nsec(&deadproc, &deadproc_mutex,
> > > PVM, "reaper",
> > > - INFSLP);
> > > + while ((p = LIST_FIRST(&spc->spc_deadproc)) ==
> > > NULL) {
> > > + KERNEL_LOCK();
> > > + tsleep_nsec(&spc->spc_deadproc, PVM,
> > > "reaper", INFSLP);
> > > + KERNEL_UNLOCK();
> > > + }
> >
> > I don't think going back to taking the kernel lock here makes sense.
> >
> > But if you now have a reaper per-CPU, the spc_deadproc list is only
> > ever tocuhed by the CPU that owns it isn't it? So there would be no
> > need to do any locking...
>
> Correct! I put it in, because tsleep() does not know about that case.
> It has the following assertion that triggers if the kernel lock is not
> held.
>
> KASSERT(ident == &nowake || timo || _kernel_lock_held());
>
> We can unroll tsleep() here without holding the kernel lock. I tested
> it, and it works.
>
You actually don't need a tsleep at all. This code needs no wakeup_one()
either. You need to think this like the idleproc. Its constant state is
SSLEEP and then by calling sched_toproc(&spc->...) it goes SONPROC runs
does its work and just mi_switch() at the end.
--
:wq Claudio
One reaper per CPU