From: Claudio Jeker Subject: Re: Use a FIFO for the reaper To: tech@openbsd.org Date: Wed, 14 May 2025 21:01:09 +0200 On Wed, May 14, 2025 at 06:54:57PM +0200, Martin Pieuchot wrote: > Instead of adding dead threads at the head of the deadproc list insert > them at the tail. > > This improve latency and reduce variance when building with 24 CPUs. > > While here remove a comment talking about per-CPU reapers. > > ok? Looks good to me. Will try tomorrow. > Index: kern/kern_exit.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_exit.c,v > diff -u -p -r1.245 kern_exit.c > --- kern/kern_exit.c 2 May 2025 05:04:38 -0000 1.245 > +++ kern/kern_exit.c 14 May 2025 16:12:52 -0000 > @@ -257,7 +257,7 @@ exit1(struct proc *p, int xexit, int xsi > /* > * Remove proc from pidhash chain and allproc so looking > * it up won't work. We will put the proc on the > - * deadproc list later (using the p_hash member), and > + * deadproc list later (using the p_runq member), and > * wake up the reaper when we do. If this is the last > * thread of a process that isn't PS_NOZOMBIE, we'll put > * the process on the zombprocess list below. > @@ -396,21 +396,21 @@ exit1(struct proc *p, int xexit, int xsi > } > > /* > - * Locking of this proclist is special; it's accessed in a > + * Locking of this prochead is special; it's accessed in a > * critical section of process exit, and thus locking it can't > * modify interrupt state. We use a simple spin lock for this > - * proclist. We use the p_hash member to linkup to deadproc. > + * prochead. We use the p_runq member to linkup to deadproc. > */ > struct mutex deadproc_mutex = > MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS); > -struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc); > +struct prochead deadproc = TAILQ_HEAD_INITIALIZER(deadproc); > > /* > * We are called from sched_idle() once it is safe to schedule the > * dead process's resources to be freed. So this is not allowed to sleep. > * > * We lock the deadproc list, place the proc on that list (using > - * the p_hash member), and wake up the reaper. > + * the p_runq member), and wake up the reaper. > */ > void > exit2(struct proc *p) > @@ -421,7 +421,7 @@ exit2(struct proc *p) > mtx_leave(&p->p_p->ps_mtx); > > mtx_enter(&deadproc_mutex); > - LIST_INSERT_HEAD(&deadproc, p, p_hash); > + TAILQ_INSERT_TAIL(&deadproc, p, p_runq); > mtx_leave(&deadproc_mutex); > > wakeup(&deadproc); > @@ -451,12 +451,12 @@ reaper(void *arg) > > for (;;) { > mtx_enter(&deadproc_mutex); > - while ((p = LIST_FIRST(&deadproc)) == NULL) > + while ((p = TAILQ_FIRST(&deadproc)) == NULL) > msleep_nsec(&deadproc, &deadproc_mutex, PVM, "reaper", > INFSLP); > > /* Remove us from the deadproc list. */ > - LIST_REMOVE(p, p_hash); > + TAILQ_REMOVE(&deadproc, p, p_runq); > mtx_leave(&deadproc_mutex); > > WITNESS_THREAD_EXIT(p); > Index: kern/kern_sched.c > =================================================================== > RCS file: /cvs/src/sys/kern/kern_sched.c,v > diff -u -p -r1.104 kern_sched.c > --- kern/kern_sched.c 10 Mar 2025 09:28:56 -0000 1.104 > +++ kern/kern_sched.c 14 May 2025 16:12:52 -0000 > @@ -95,7 +95,7 @@ sched_init_cpu(struct cpu_info *ci) > > kthread_create_deferred(sched_kthreads_create, ci); > > - LIST_INIT(&spc->spc_deadproc); > + TAILQ_INIT(&spc->spc_deadproc); > SIMPLEQ_INIT(&spc->spc_deferred); > > /* > @@ -167,8 +167,8 @@ sched_idle(void *v) > mi_switch(); > SCHED_UNLOCK(); > > - while ((dead = LIST_FIRST(&spc->spc_deadproc))) { > - LIST_REMOVE(dead, p_hash); > + while ((dead = TAILQ_FIRST(&spc->spc_deadproc))) { > + TAILQ_REMOVE(&spc->spc_deadproc, dead, p_runq); > exit2(dead); > } > } > @@ -206,15 +206,13 @@ sched_idle(void *v) > * stack torn from under us before we manage to switch to another proc. > * Therefore we have a per-cpu list of dead processes where we put this > * proc and have idle clean up that list and move it to the reaper list. > - * All this will be unnecessary once we can bind the reaper this cpu > - * and not risk having it switch to another in case it sleeps. > */ > void > sched_exit(struct proc *p) > { > struct schedstate_percpu *spc = &curcpu()->ci_schedstate; > > - LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash); > + TAILQ_INSERT_TAIL(&spc->spc_deadproc, p, p_runq); > > tuagg_add_runtime(); > > Index: sys/sched.h > =================================================================== > RCS file: /cvs/src/sys/sys/sched.h,v > diff -u -p -r1.73 sched.h > --- sys/sched.h 8 Jul 2024 14:46:47 -0000 1.73 > +++ sys/sched.h 14 May 2025 16:12:52 -0000 > @@ -108,7 +108,7 @@ struct smr_entry; > struct schedstate_percpu { > struct proc *spc_idleproc; /* idle proc for this cpu */ > TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS]; > - LIST_HEAD(,proc) spc_deadproc; > + TAILQ_HEAD(,proc) spc_deadproc; > struct timespec spc_runtime; /* time curproc started running */ > volatile int spc_schedflags; /* flags; see below */ > u_int spc_schedticks; /* ticks for schedclock() */ > > -- :wq Claudio