From: Mark Kettenis Subject: Re: One reaper per CPU To: Christian Ludwig Cc: tech@openbsd.org Date: Wed, 10 Jul 2024 17:42:45 +0200 > Date: Wed, 10 Jul 2024 17:21:35 +0200 > From: Christian Ludwig Hi Christian, > Hi, > > This implements one reaper process per CPU. The benefit is that we can > switch to a safe stack (the reaper) on process teardown without the need > to lock data structures between sched_exit() and the local reaper. That > means the global deadproc mutex goes away. And there is also no need to > go through idle anymore, because we know that the local reaper is not > currently running when the dying proc enlists itself. > > I have tested it lightly. It does not fall apart immediately. I do not > see any performance regressions in my test case, which is recompiling > the kernel over and over again, but your mileage my vary. So please give > this some serious testing on your boxes. > > So long, > > - Christian > > --- > sys/kern/init_main.c | 5 ---- > sys/kern/kern_exit.c | 59 ++++++++++++------------------------------- > sys/kern/kern_sched.c | 43 ++++++++++++++++++++----------- > sys/sys/proc.h | 1 - > 4 files changed, 44 insertions(+), 64 deletions(-) > > diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c > index d571b0855cff..83a03a14a042 100644 > --- a/sys/kern/init_main.c > +++ b/sys/kern/init_main.c > @@ -117,7 +117,6 @@ struct plimit limit0; > struct vmspace vmspace0; > struct sigacts sigacts0; > struct process *initprocess; > -struct proc *reaperproc; > > extern struct user *proc0paddr; > > @@ -495,10 +494,6 @@ main(void *framep) > if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon")) > panic("fork pagedaemon"); > > - /* Create the reaper daemon kernel thread. */ > - if (kthread_create(reaper, NULL, &reaperproc, "reaper")) > - panic("fork reaper"); > - > /* Create the cleaner daemon kernel thread. */ > if (kthread_create(buf_daemon, NULL, &cleanerproc, "cleaner")) > panic("fork cleaner"); > diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c > index ba87dcd42cd0..a5cc37ac0f74 100644 > --- a/sys/kern/kern_exit.c > +++ b/sys/kern/kern_exit.c > @@ -378,11 +378,10 @@ exit1(struct proc *p, int xexit, int xsig, int flags) > */ > > /* > - * Finally, call machine-dependent code to switch to a new > - * context (possibly the idle context). Once we are no longer > - * using the dead process's vmspace and stack, exit2() will be > - * called to schedule those resources to be released by the > - * reaper thread. > + * Finally, call machine-dependent code to switch away. Then, > + * when we are no longer using the dead process's vmspace and > + * stack, the current CPU's reaper thread releases those > + * resources. > * > * Note that cpu_exit() will end with a call equivalent to > * cpu_switch(), finishing our execution (pun intended). > @@ -392,38 +391,6 @@ exit1(struct proc *p, int xexit, int xsig, int flags) > panic("cpu_exit returned"); > } > > -/* > - * Locking of this proclist is special; it's accessed in a > - * critical section of process exit, and thus locking it can't > - * modify interrupt state. We use a simple spin lock for this > - * proclist. We use the p_hash member to linkup to deadproc. > - */ > -struct mutex deadproc_mutex = > - MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS); > -struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc); > - > -/* > - * We are called from sched_idle() once it is safe to schedule the > - * dead process's resources to be freed. So this is not allowed to sleep. > - * > - * We lock the deadproc list, place the proc on that list (using > - * the p_hash member), and wake up the reaper. > - */ > -void > -exit2(struct proc *p) > -{ > - /* account the remainder of time spent in exit1() */ > - mtx_enter(&p->p_p->ps_mtx); > - tuagg_add_process(p->p_p, p); > - mtx_leave(&p->p_p->ps_mtx); > - > - mtx_enter(&deadproc_mutex); > - LIST_INSERT_HEAD(&deadproc, p, p_hash); > - mtx_leave(&deadproc_mutex); > - > - wakeup(&deadproc); > -} > - > void > proc_free(struct proc *p) > { > @@ -441,20 +408,26 @@ void > reaper(void *arg) > { > struct proc *p; > + struct cpu_info *ci = arg; > + struct schedstate_percpu *spc; > > KERNEL_UNLOCK(); > - > SCHED_ASSERT_UNLOCKED(); > > + sched_peg_curproc(ci); > + spc = &ci->ci_schedstate; > + > + KASSERT(ci == curcpu()); > + > for (;;) { > - mtx_enter(&deadproc_mutex); > - while ((p = LIST_FIRST(&deadproc)) == NULL) > - msleep_nsec(&deadproc, &deadproc_mutex, PVM, "reaper", > - INFSLP); > + while ((p = LIST_FIRST(&spc->spc_deadproc)) == NULL) { > + KERNEL_LOCK(); > + tsleep_nsec(&spc->spc_deadproc, PVM, "reaper", INFSLP); > + KERNEL_UNLOCK(); > + } I don't think going back to taking the kernel lock here makes sense. But if you now have a reaper per-CPU, the spc_deadproc list is only ever tocuhed by the CPU that owns it isn't it? So there would be no need to do any locking... > /* Remove us from the deadproc list. */ > LIST_REMOVE(p, p_hash); > - mtx_leave(&deadproc_mutex); > > WITNESS_THREAD_EXIT(p); > > diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c > index 0ee965183323..c3e3d4a1c14b 100644 > --- a/sys/kern/kern_sched.c > +++ b/sys/kern/kern_sched.c > @@ -32,6 +32,7 @@ > > void sched_kthreads_create(void *); > > +void sched_toproc(struct proc *); > int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); > struct proc *sched_steal_proc(struct cpu_info *); > > @@ -116,6 +117,7 @@ sched_kthreads_create(void *v) > { > struct cpu_info *ci = v; > struct schedstate_percpu *spc = &ci->ci_schedstate; > + char comm[_MAXCOMLEN]; > static int num; > > if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE| > @@ -128,6 +130,11 @@ sched_kthreads_create(void *v) > sizeof(spc->spc_idleproc->p_p->ps_comm), > "idle%d", num); > > + /* Create the reaper daemon kernel thread. */ > + snprintf(comm, sizeof(comm), "reaper%d", num); > + if (kthread_create(reaper, ci, NULL, comm)) > + panic("fork reaper"); > + > num++; > } > > @@ -160,17 +167,10 @@ sched_idle(void *v) > > while (1) { > while (!cpu_is_idle(curcpu())) { > - struct proc *dead; > - > SCHED_LOCK(); > p->p_stat = SSLEEP; > mi_switch(); > SCHED_UNLOCK(); > - > - while ((dead = LIST_FIRST(&spc->spc_deadproc))) { > - LIST_REMOVE(dead, p_hash); > - exit2(dead); > - } > } > > splassert(IPL_NONE); > @@ -230,14 +230,17 @@ sched_exit(struct proc *p) > tu_leave(&p->p_tu); > > KERNEL_ASSERT_LOCKED(); > - sched_toidle(); > + KERNEL_UNLOCK(); > + > + wakeup_one(&spc->spc_deadproc); > + sched_toproc(NULL); > + /* NOTREACHED */ > } > > void > -sched_toidle(void) > +sched_toproc(struct proc *nextproc) > { > struct schedstate_percpu *spc = &curcpu()->ci_schedstate; > - struct proc *idle; > > #ifdef MULTIPROCESSOR > /* This process no longer needs to hold the kernel lock. */ > @@ -257,17 +260,27 @@ sched_toidle(void) > atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR); > > SCHED_LOCK(); > - idle = spc->spc_idleproc; > - idle->p_stat = SRUN; > + > + if (nextproc == NULL) > + nextproc = sched_chooseproc(); > + else > + nextproc->p_stat = SRUN; > > uvmexp.swtch++; > if (curproc != NULL) > - TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET, > - idle->p_p->ps_pid); > - cpu_switchto(NULL, idle); > + TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET, > + nextproc->p_p->ps_pid); > + cpu_switchto(NULL, nextproc); > panic("cpu_switchto returned"); > } > > +void > +sched_toidle(void) > +{ > + struct schedstate_percpu *spc = &curcpu()->ci_schedstate; > + sched_toproc(spc->spc_idleproc); > +} > + > /* > * Run queue management. > */ > diff --git a/sys/sys/proc.h b/sys/sys/proc.h > index 50b708954bff..6139f25c3640 100644 > --- a/sys/sys/proc.h > +++ b/sys/sys/proc.h > @@ -562,7 +562,6 @@ int wakeup_proc(struct proc *, int); > void unsleep(struct proc *); > void reaper(void *); > __dead void exit1(struct proc *, int, int, int); > -void exit2(struct proc *); > void cpu_fork(struct proc *_curp, struct proc *_child, void *_stack, > void *_tcb, void (*_func)(void *), void *_arg); > void cpu_exit(struct proc *); > -- > 2.34.1 > > > [2:application/pkcs7-signature Show Save:smime.p7s (5kB)] >