From: Christian Ludwig Subject: One reaper per CPU To: Date: Wed, 10 Jul 2024 17:21:35 +0200 Hi, This implements one reaper process per CPU. The benefit is that we can switch to a safe stack (the reaper) on process teardown without the need to lock data structures between sched_exit() and the local reaper. That means the global deadproc mutex goes away. And there is also no need to go through idle anymore, because we know that the local reaper is not currently running when the dying proc enlists itself. I have tested it lightly. It does not fall apart immediately. I do not see any performance regressions in my test case, which is recompiling the kernel over and over again, but your mileage my vary. So please give this some serious testing on your boxes. So long, - Christian --- sys/kern/init_main.c | 5 ---- sys/kern/kern_exit.c | 59 ++++++++++++------------------------------- sys/kern/kern_sched.c | 43 ++++++++++++++++++++----------- sys/sys/proc.h | 1 - 4 files changed, 44 insertions(+), 64 deletions(-) diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index d571b0855cff..83a03a14a042 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -117,7 +117,6 @@ struct plimit limit0; struct vmspace vmspace0; struct sigacts sigacts0; struct process *initprocess; -struct proc *reaperproc; extern struct user *proc0paddr; @@ -495,10 +494,6 @@ main(void *framep) if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon")) panic("fork pagedaemon"); - /* Create the reaper daemon kernel thread. */ - if (kthread_create(reaper, NULL, &reaperproc, "reaper")) - panic("fork reaper"); - /* Create the cleaner daemon kernel thread. */ if (kthread_create(buf_daemon, NULL, &cleanerproc, "cleaner")) panic("fork cleaner"); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index ba87dcd42cd0..a5cc37ac0f74 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -378,11 +378,10 @@ exit1(struct proc *p, int xexit, int xsig, int flags) */ /* - * Finally, call machine-dependent code to switch to a new - * context (possibly the idle context). Once we are no longer - * using the dead process's vmspace and stack, exit2() will be - * called to schedule those resources to be released by the - * reaper thread. + * Finally, call machine-dependent code to switch away. Then, + * when we are no longer using the dead process's vmspace and + * stack, the current CPU's reaper thread releases those + * resources. * * Note that cpu_exit() will end with a call equivalent to * cpu_switch(), finishing our execution (pun intended). @@ -392,38 +391,6 @@ exit1(struct proc *p, int xexit, int xsig, int flags) panic("cpu_exit returned"); } -/* - * Locking of this proclist is special; it's accessed in a - * critical section of process exit, and thus locking it can't - * modify interrupt state. We use a simple spin lock for this - * proclist. We use the p_hash member to linkup to deadproc. - */ -struct mutex deadproc_mutex = - MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS); -struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc); - -/* - * We are called from sched_idle() once it is safe to schedule the - * dead process's resources to be freed. So this is not allowed to sleep. - * - * We lock the deadproc list, place the proc on that list (using - * the p_hash member), and wake up the reaper. - */ -void -exit2(struct proc *p) -{ - /* account the remainder of time spent in exit1() */ - mtx_enter(&p->p_p->ps_mtx); - tuagg_add_process(p->p_p, p); - mtx_leave(&p->p_p->ps_mtx); - - mtx_enter(&deadproc_mutex); - LIST_INSERT_HEAD(&deadproc, p, p_hash); - mtx_leave(&deadproc_mutex); - - wakeup(&deadproc); -} - void proc_free(struct proc *p) { @@ -441,20 +408,26 @@ void reaper(void *arg) { struct proc *p; + struct cpu_info *ci = arg; + struct schedstate_percpu *spc; KERNEL_UNLOCK(); - SCHED_ASSERT_UNLOCKED(); + sched_peg_curproc(ci); + spc = &ci->ci_schedstate; + + KASSERT(ci == curcpu()); + for (;;) { - mtx_enter(&deadproc_mutex); - while ((p = LIST_FIRST(&deadproc)) == NULL) - msleep_nsec(&deadproc, &deadproc_mutex, PVM, "reaper", - INFSLP); + while ((p = LIST_FIRST(&spc->spc_deadproc)) == NULL) { + KERNEL_LOCK(); + tsleep_nsec(&spc->spc_deadproc, PVM, "reaper", INFSLP); + KERNEL_UNLOCK(); + } /* Remove us from the deadproc list. */ LIST_REMOVE(p, p_hash); - mtx_leave(&deadproc_mutex); WITNESS_THREAD_EXIT(p); diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c index 0ee965183323..c3e3d4a1c14b 100644 --- a/sys/kern/kern_sched.c +++ b/sys/kern/kern_sched.c @@ -32,6 +32,7 @@ void sched_kthreads_create(void *); +void sched_toproc(struct proc *); int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); struct proc *sched_steal_proc(struct cpu_info *); @@ -116,6 +117,7 @@ sched_kthreads_create(void *v) { struct cpu_info *ci = v; struct schedstate_percpu *spc = &ci->ci_schedstate; + char comm[_MAXCOMLEN]; static int num; if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE| @@ -128,6 +130,11 @@ sched_kthreads_create(void *v) sizeof(spc->spc_idleproc->p_p->ps_comm), "idle%d", num); + /* Create the reaper daemon kernel thread. */ + snprintf(comm, sizeof(comm), "reaper%d", num); + if (kthread_create(reaper, ci, NULL, comm)) + panic("fork reaper"); + num++; } @@ -160,17 +167,10 @@ sched_idle(void *v) while (1) { while (!cpu_is_idle(curcpu())) { - struct proc *dead; - SCHED_LOCK(); p->p_stat = SSLEEP; mi_switch(); SCHED_UNLOCK(); - - while ((dead = LIST_FIRST(&spc->spc_deadproc))) { - LIST_REMOVE(dead, p_hash); - exit2(dead); - } } splassert(IPL_NONE); @@ -230,14 +230,17 @@ sched_exit(struct proc *p) tu_leave(&p->p_tu); KERNEL_ASSERT_LOCKED(); - sched_toidle(); + KERNEL_UNLOCK(); + + wakeup_one(&spc->spc_deadproc); + sched_toproc(NULL); + /* NOTREACHED */ } void -sched_toidle(void) +sched_toproc(struct proc *nextproc) { struct schedstate_percpu *spc = &curcpu()->ci_schedstate; - struct proc *idle; #ifdef MULTIPROCESSOR /* This process no longer needs to hold the kernel lock. */ @@ -257,17 +260,27 @@ sched_toidle(void) atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR); SCHED_LOCK(); - idle = spc->spc_idleproc; - idle->p_stat = SRUN; + + if (nextproc == NULL) + nextproc = sched_chooseproc(); + else + nextproc->p_stat = SRUN; uvmexp.swtch++; if (curproc != NULL) - TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET, - idle->p_p->ps_pid); - cpu_switchto(NULL, idle); + TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET, + nextproc->p_p->ps_pid); + cpu_switchto(NULL, nextproc); panic("cpu_switchto returned"); } +void +sched_toidle(void) +{ + struct schedstate_percpu *spc = &curcpu()->ci_schedstate; + sched_toproc(spc->spc_idleproc); +} + /* * Run queue management. */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 50b708954bff..6139f25c3640 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -562,7 +562,6 @@ int wakeup_proc(struct proc *, int); void unsleep(struct proc *); void reaper(void *); __dead void exit1(struct proc *, int, int, int); -void exit2(struct proc *); void cpu_fork(struct proc *_curp, struct proc *_child, void *_stack, void *_tcb, void (*_func)(void *), void *_arg); void cpu_exit(struct proc *); -- 2.34.1