Download raw body.
One reaper per CPU
> Date: Wed, 10 Jul 2024 17:21:35 +0200
> From: Christian Ludwig <cludwig@genua.de>
Hi Christian,
> Hi,
>
> This implements one reaper process per CPU. The benefit is that we can
> switch to a safe stack (the reaper) on process teardown without the need
> to lock data structures between sched_exit() and the local reaper. That
> means the global deadproc mutex goes away. And there is also no need to
> go through idle anymore, because we know that the local reaper is not
> currently running when the dying proc enlists itself.
>
> I have tested it lightly. It does not fall apart immediately. I do not
> see any performance regressions in my test case, which is recompiling
> the kernel over and over again, but your mileage my vary. So please give
> this some serious testing on your boxes.
>
> So long,
>
> - Christian
>
> ---
> sys/kern/init_main.c | 5 ----
> sys/kern/kern_exit.c | 59 ++++++++++++-------------------------------
> sys/kern/kern_sched.c | 43 ++++++++++++++++++++-----------
> sys/sys/proc.h | 1 -
> 4 files changed, 44 insertions(+), 64 deletions(-)
>
> diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
> index d571b0855cff..83a03a14a042 100644
> --- a/sys/kern/init_main.c
> +++ b/sys/kern/init_main.c
> @@ -117,7 +117,6 @@ struct plimit limit0;
> struct vmspace vmspace0;
> struct sigacts sigacts0;
> struct process *initprocess;
> -struct proc *reaperproc;
>
> extern struct user *proc0paddr;
>
> @@ -495,10 +494,6 @@ main(void *framep)
> if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon"))
> panic("fork pagedaemon");
>
> - /* Create the reaper daemon kernel thread. */
> - if (kthread_create(reaper, NULL, &reaperproc, "reaper"))
> - panic("fork reaper");
> -
> /* Create the cleaner daemon kernel thread. */
> if (kthread_create(buf_daemon, NULL, &cleanerproc, "cleaner"))
> panic("fork cleaner");
> diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
> index ba87dcd42cd0..a5cc37ac0f74 100644
> --- a/sys/kern/kern_exit.c
> +++ b/sys/kern/kern_exit.c
> @@ -378,11 +378,10 @@ exit1(struct proc *p, int xexit, int xsig, int flags)
> */
>
> /*
> - * Finally, call machine-dependent code to switch to a new
> - * context (possibly the idle context). Once we are no longer
> - * using the dead process's vmspace and stack, exit2() will be
> - * called to schedule those resources to be released by the
> - * reaper thread.
> + * Finally, call machine-dependent code to switch away. Then,
> + * when we are no longer using the dead process's vmspace and
> + * stack, the current CPU's reaper thread releases those
> + * resources.
> *
> * Note that cpu_exit() will end with a call equivalent to
> * cpu_switch(), finishing our execution (pun intended).
> @@ -392,38 +391,6 @@ exit1(struct proc *p, int xexit, int xsig, int flags)
> panic("cpu_exit returned");
> }
>
> -/*
> - * Locking of this proclist is special; it's accessed in a
> - * critical section of process exit, and thus locking it can't
> - * modify interrupt state. We use a simple spin lock for this
> - * proclist. We use the p_hash member to linkup to deadproc.
> - */
> -struct mutex deadproc_mutex =
> - MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS);
> -struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc);
> -
> -/*
> - * We are called from sched_idle() once it is safe to schedule the
> - * dead process's resources to be freed. So this is not allowed to sleep.
> - *
> - * We lock the deadproc list, place the proc on that list (using
> - * the p_hash member), and wake up the reaper.
> - */
> -void
> -exit2(struct proc *p)
> -{
> - /* account the remainder of time spent in exit1() */
> - mtx_enter(&p->p_p->ps_mtx);
> - tuagg_add_process(p->p_p, p);
> - mtx_leave(&p->p_p->ps_mtx);
> -
> - mtx_enter(&deadproc_mutex);
> - LIST_INSERT_HEAD(&deadproc, p, p_hash);
> - mtx_leave(&deadproc_mutex);
> -
> - wakeup(&deadproc);
> -}
> -
> void
> proc_free(struct proc *p)
> {
> @@ -441,20 +408,26 @@ void
> reaper(void *arg)
> {
> struct proc *p;
> + struct cpu_info *ci = arg;
> + struct schedstate_percpu *spc;
>
> KERNEL_UNLOCK();
> -
> SCHED_ASSERT_UNLOCKED();
>
> + sched_peg_curproc(ci);
> + spc = &ci->ci_schedstate;
> +
> + KASSERT(ci == curcpu());
> +
> for (;;) {
> - mtx_enter(&deadproc_mutex);
> - while ((p = LIST_FIRST(&deadproc)) == NULL)
> - msleep_nsec(&deadproc, &deadproc_mutex, PVM, "reaper",
> - INFSLP);
> + while ((p = LIST_FIRST(&spc->spc_deadproc)) == NULL) {
> + KERNEL_LOCK();
> + tsleep_nsec(&spc->spc_deadproc, PVM, "reaper", INFSLP);
> + KERNEL_UNLOCK();
> + }
I don't think going back to taking the kernel lock here makes sense.
But if you now have a reaper per-CPU, the spc_deadproc list is only
ever tocuhed by the CPU that owns it isn't it? So there would be no
need to do any locking...
> /* Remove us from the deadproc list. */
> LIST_REMOVE(p, p_hash);
> - mtx_leave(&deadproc_mutex);
>
> WITNESS_THREAD_EXIT(p);
>
> diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
> index 0ee965183323..c3e3d4a1c14b 100644
> --- a/sys/kern/kern_sched.c
> +++ b/sys/kern/kern_sched.c
> @@ -32,6 +32,7 @@
>
> void sched_kthreads_create(void *);
>
> +void sched_toproc(struct proc *);
> int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
> struct proc *sched_steal_proc(struct cpu_info *);
>
> @@ -116,6 +117,7 @@ sched_kthreads_create(void *v)
> {
> struct cpu_info *ci = v;
> struct schedstate_percpu *spc = &ci->ci_schedstate;
> + char comm[_MAXCOMLEN];
> static int num;
>
> if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
> @@ -128,6 +130,11 @@ sched_kthreads_create(void *v)
> sizeof(spc->spc_idleproc->p_p->ps_comm),
> "idle%d", num);
>
> + /* Create the reaper daemon kernel thread. */
> + snprintf(comm, sizeof(comm), "reaper%d", num);
> + if (kthread_create(reaper, ci, NULL, comm))
> + panic("fork reaper");
> +
> num++;
> }
>
> @@ -160,17 +167,10 @@ sched_idle(void *v)
>
> while (1) {
> while (!cpu_is_idle(curcpu())) {
> - struct proc *dead;
> -
> SCHED_LOCK();
> p->p_stat = SSLEEP;
> mi_switch();
> SCHED_UNLOCK();
> -
> - while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
> - LIST_REMOVE(dead, p_hash);
> - exit2(dead);
> - }
> }
>
> splassert(IPL_NONE);
> @@ -230,14 +230,17 @@ sched_exit(struct proc *p)
> tu_leave(&p->p_tu);
>
> KERNEL_ASSERT_LOCKED();
> - sched_toidle();
> + KERNEL_UNLOCK();
> +
> + wakeup_one(&spc->spc_deadproc);
> + sched_toproc(NULL);
> + /* NOTREACHED */
> }
>
> void
> -sched_toidle(void)
> +sched_toproc(struct proc *nextproc)
> {
> struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
> - struct proc *idle;
>
> #ifdef MULTIPROCESSOR
> /* This process no longer needs to hold the kernel lock. */
> @@ -257,17 +260,27 @@ sched_toidle(void)
> atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR);
>
> SCHED_LOCK();
> - idle = spc->spc_idleproc;
> - idle->p_stat = SRUN;
> +
> + if (nextproc == NULL)
> + nextproc = sched_chooseproc();
> + else
> + nextproc->p_stat = SRUN;
>
> uvmexp.swtch++;
> if (curproc != NULL)
> - TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET,
> - idle->p_p->ps_pid);
> - cpu_switchto(NULL, idle);
> + TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET,
> + nextproc->p_p->ps_pid);
> + cpu_switchto(NULL, nextproc);
> panic("cpu_switchto returned");
> }
>
> +void
> +sched_toidle(void)
> +{
> + struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
> + sched_toproc(spc->spc_idleproc);
> +}
> +
> /*
> * Run queue management.
> */
> diff --git a/sys/sys/proc.h b/sys/sys/proc.h
> index 50b708954bff..6139f25c3640 100644
> --- a/sys/sys/proc.h
> +++ b/sys/sys/proc.h
> @@ -562,7 +562,6 @@ int wakeup_proc(struct proc *, int);
> void unsleep(struct proc *);
> void reaper(void *);
> __dead void exit1(struct proc *, int, int, int);
> -void exit2(struct proc *);
> void cpu_fork(struct proc *_curp, struct proc *_child, void *_stack,
> void *_tcb, void (*_func)(void *), void *_arg);
> void cpu_exit(struct proc *);
> --
> 2.34.1
>
>
> [2:application/pkcs7-signature Show Save:smime.p7s (5kB)]
>
One reaper per CPU