Download raw body.
One reaper per CPU
Hi,
On Wed, 2024-07-10 at 17:42 +0200, Mark Kettenis wrote:
> > Date: Wed, 10 Jul 2024 17:21:35 +0200
> > From: Christian Ludwig <cludwig@genua.de>
>
> Hi Christian,
>
> > Hi,
> >
> > This implements one reaper process per CPU. The benefit is that we
> > can
> > switch to a safe stack (the reaper) on process teardown without the
> > need
> > to lock data structures between sched_exit() and the local reaper.
> > That
> > means the global deadproc mutex goes away. And there is also no
> > need to
> > go through idle anymore, because we know that the local reaper is
> > not
> > currently running when the dying proc enlists itself.
> >
> > I have tested it lightly. It does not fall apart immediately. I do
> > not
> > see any performance regressions in my test case, which is
> > recompiling
> > the kernel over and over again, but your mileage my vary. So please
> > give
> > this some serious testing on your boxes.
> >
> > So long,
> >
> > - Christian
> >
> > ---
> > sys/kern/init_main.c | 5 ----
> > sys/kern/kern_exit.c | 59 ++++++++++++---------------------------
> > ----
> > sys/kern/kern_sched.c | 43 ++++++++++++++++++++-----------
> > sys/sys/proc.h | 1 -
> > 4 files changed, 44 insertions(+), 64 deletions(-)
> >
> > diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
> > index d571b0855cff..83a03a14a042 100644
> > --- a/sys/kern/init_main.c
> > +++ b/sys/kern/init_main.c
> > @@ -117,7 +117,6 @@ struct plimit limit0;
> > struct vmspace vmspace0;
> > struct sigacts sigacts0;
> > struct process *initprocess;
> > -struct proc *reaperproc;
> >
> > extern struct user *proc0paddr;
> >
> > @@ -495,10 +494,6 @@ main(void *framep)
> > if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon"))
> > panic("fork pagedaemon");
> >
> > - /* Create the reaper daemon kernel thread. */
> > - if (kthread_create(reaper, NULL, &reaperproc, "reaper"))
> > - panic("fork reaper");
> > -
> > /* Create the cleaner daemon kernel thread. */
> > if (kthread_create(buf_daemon, NULL, &cleanerproc,
> > "cleaner"))
> > panic("fork cleaner");
> > diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
> > index ba87dcd42cd0..a5cc37ac0f74 100644
> > --- a/sys/kern/kern_exit.c
> > +++ b/sys/kern/kern_exit.c
> > @@ -378,11 +378,10 @@ exit1(struct proc *p, int xexit, int xsig,
> > int flags)
> > */
> >
> > /*
> > - * Finally, call machine-dependent code to switch to a new
> > - * context (possibly the idle context). Once we are no
> > longer
> > - * using the dead process's vmspace and stack, exit2() will
> > be
> > - * called to schedule those resources to be released by the
> > - * reaper thread.
> > + * Finally, call machine-dependent code to switch away.
> > Then,
> > + * when we are no longer using the dead process's vmspace
> > and
> > + * stack, the current CPU's reaper thread releases those
> > + * resources.
> > *
> > * Note that cpu_exit() will end with a call equivalent to
> > * cpu_switch(), finishing our execution (pun intended).
> > @@ -392,38 +391,6 @@ exit1(struct proc *p, int xexit, int xsig, int
> > flags)
> > panic("cpu_exit returned");
> > }
> >
> > -/*
> > - * Locking of this proclist is special; it's accessed in a
> > - * critical section of process exit, and thus locking it can't
> > - * modify interrupt state. We use a simple spin lock for this
> > - * proclist. We use the p_hash member to linkup to deadproc.
> > - */
> > -struct mutex deadproc_mutex =
> > - MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS);
> > -struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc);
> > -
> > -/*
> > - * We are called from sched_idle() once it is safe to schedule the
> > - * dead process's resources to be freed. So this is not allowed to
> > sleep.
> > - *
> > - * We lock the deadproc list, place the proc on that list (using
> > - * the p_hash member), and wake up the reaper.
> > - */
> > -void
> > -exit2(struct proc *p)
> > -{
> > - /* account the remainder of time spent in exit1() */
> > - mtx_enter(&p->p_p->ps_mtx);
> > - tuagg_add_process(p->p_p, p);
> > - mtx_leave(&p->p_p->ps_mtx);
> > -
> > - mtx_enter(&deadproc_mutex);
> > - LIST_INSERT_HEAD(&deadproc, p, p_hash);
> > - mtx_leave(&deadproc_mutex);
> > -
> > - wakeup(&deadproc);
> > -}
> > -
> > void
> > proc_free(struct proc *p)
> > {
> > @@ -441,20 +408,26 @@ void
> > reaper(void *arg)
> > {
> > struct proc *p;
> > + struct cpu_info *ci = arg;
> > + struct schedstate_percpu *spc;
> >
> > KERNEL_UNLOCK();
> > -
> > SCHED_ASSERT_UNLOCKED();
> >
> > + sched_peg_curproc(ci);
> > + spc = &ci->ci_schedstate;
> > +
> > + KASSERT(ci == curcpu());
> > +
> > for (;;) {
> > - mtx_enter(&deadproc_mutex);
> > - while ((p = LIST_FIRST(&deadproc)) == NULL)
> > - msleep_nsec(&deadproc, &deadproc_mutex,
> > PVM, "reaper",
> > - INFSLP);
> > + while ((p = LIST_FIRST(&spc->spc_deadproc)) ==
> > NULL) {
> > + KERNEL_LOCK();
> > + tsleep_nsec(&spc->spc_deadproc, PVM,
> > "reaper", INFSLP);
> > + KERNEL_UNLOCK();
> > + }
>
> I don't think going back to taking the kernel lock here makes sense.
>
> But if you now have a reaper per-CPU, the spc_deadproc list is only
> ever tocuhed by the CPU that owns it isn't it? So there would be no
> need to do any locking...
Correct! I put it in, because tsleep() does not know about that case.
It has the following assertion that triggers if the kernel lock is not
held.
KASSERT(ident == &nowake || timo || _kernel_lock_held());
We can unroll tsleep() here without holding the kernel lock. I tested
it, and it works.
- Christian
>
> > /* Remove us from the deadproc list. */
> > LIST_REMOVE(p, p_hash);
> > - mtx_leave(&deadproc_mutex);
> >
> > WITNESS_THREAD_EXIT(p);
> >
> > diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
> > index 0ee965183323..c3e3d4a1c14b 100644
> > --- a/sys/kern/kern_sched.c
> > +++ b/sys/kern/kern_sched.c
> > @@ -32,6 +32,7 @@
> >
> > void sched_kthreads_create(void *);
> >
> > +void sched_toproc(struct proc *);
> > int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
> > struct proc *sched_steal_proc(struct cpu_info *);
> >
> > @@ -116,6 +117,7 @@ sched_kthreads_create(void *v)
> > {
> > struct cpu_info *ci = v;
> > struct schedstate_percpu *spc = &ci->ci_schedstate;
> > + char comm[_MAXCOMLEN];
> > static int num;
> >
> > if (fork1(&proc0,
> > FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
> > @@ -128,6 +130,11 @@ sched_kthreads_create(void *v)
> > sizeof(spc->spc_idleproc->p_p->ps_comm),
> > "idle%d", num);
> >
> > + /* Create the reaper daemon kernel thread. */
> > + snprintf(comm, sizeof(comm), "reaper%d", num);
> > + if (kthread_create(reaper, ci, NULL, comm))
> > + panic("fork reaper");
> > +
> > num++;
> > }
> >
> > @@ -160,17 +167,10 @@ sched_idle(void *v)
> >
> > while (1) {
> > while (!cpu_is_idle(curcpu())) {
> > - struct proc *dead;
> > -
> > SCHED_LOCK();
> > p->p_stat = SSLEEP;
> > mi_switch();
> > SCHED_UNLOCK();
> > -
> > - while ((dead = LIST_FIRST(&spc-
> > >spc_deadproc))) {
> > - LIST_REMOVE(dead, p_hash);
> > - exit2(dead);
> > - }
> > }
> >
> > splassert(IPL_NONE);
> > @@ -230,14 +230,17 @@ sched_exit(struct proc *p)
> > tu_leave(&p->p_tu);
> >
> > KERNEL_ASSERT_LOCKED();
> > - sched_toidle();
> > + KERNEL_UNLOCK();
> > +
> > + wakeup_one(&spc->spc_deadproc);
> > + sched_toproc(NULL);
> > + /* NOTREACHED */
> > }
> >
> > void
> > -sched_toidle(void)
> > +sched_toproc(struct proc *nextproc)
> > {
> > struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
> > - struct proc *idle;
> >
> > #ifdef MULTIPROCESSOR
> > /* This process no longer needs to hold the kernel lock. */
> > @@ -257,17 +260,27 @@ sched_toidle(void)
> > atomic_clearbits_int(&spc->spc_schedflags,
> > SPCF_SWITCHCLEAR);
> >
> > SCHED_LOCK();
> > - idle = spc->spc_idleproc;
> > - idle->p_stat = SRUN;
> > +
> > + if (nextproc == NULL)
> > + nextproc = sched_chooseproc();
> > + else
> > + nextproc->p_stat = SRUN;
> >
> > uvmexp.swtch++;
> > if (curproc != NULL)
> > - TRACEPOINT(sched, off__cpu, idle->p_tid +
> > THREAD_PID_OFFSET,
> > - idle->p_p->ps_pid);
> > - cpu_switchto(NULL, idle);
> > + TRACEPOINT(sched, off__cpu, nextproc->p_tid +
> > THREAD_PID_OFFSET,
> > + nextproc->p_p->ps_pid);
> > + cpu_switchto(NULL, nextproc);
> > panic("cpu_switchto returned");
> > }
> >
> > +void
> > +sched_toidle(void)
> > +{
> > + struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
> > + sched_toproc(spc->spc_idleproc);
> > +}
> > +
> > /*
> > * Run queue management.
> > */
> > diff --git a/sys/sys/proc.h b/sys/sys/proc.h
> > index 50b708954bff..6139f25c3640 100644
> > --- a/sys/sys/proc.h
> > +++ b/sys/sys/proc.h
> > @@ -562,7 +562,6 @@ int wakeup_proc(struct proc *, int);
> > void unsleep(struct proc *);
> > void reaper(void *);
> > __dead void exit1(struct proc *, int, int, int);
> > -void exit2(struct proc *);
> > void cpu_fork(struct proc *_curp, struct proc *_child, void
> > *_stack,
> > void *_tcb, void (*_func)(void *), void *_arg);
> > void cpu_exit(struct proc *);
> > --
> > 2.34.1
> >
> >
> > [2:application/pkcs7-signature Show Save:smime.p7s (5kB)]
> >
One reaper per CPU