From: Christian Ludwig Subject: Re: One reaper per CPU To: "mark.kettenis@xs4all.nl" Cc: "tech@openbsd.org" Date: Wed, 10 Jul 2024 17:36:02 +0000 Hi, On Wed, 2024-07-10 at 17:42 +0200, Mark Kettenis wrote: > > Date: Wed, 10 Jul 2024 17:21:35 +0200 > > From: Christian Ludwig > > Hi Christian, > > > Hi, > > > > This implements one reaper process per CPU. The benefit is that we > > can > > switch to a safe stack (the reaper) on process teardown without the > > need > > to lock data structures between sched_exit() and the local reaper. > > That > > means the global deadproc mutex goes away. And there is also no > > need to > > go through idle anymore, because we know that the local reaper is > > not > > currently running when the dying proc enlists itself. > > > > I have tested it lightly. It does not fall apart immediately. I do > > not > > see any performance regressions in my test case, which is > > recompiling > > the kernel over and over again, but your mileage my vary. So please > > give > > this some serious testing on your boxes. > > > > So long, > > > >  - Christian > > > > --- > >  sys/kern/init_main.c  |  5 ---- > >  sys/kern/kern_exit.c  | 59 ++++++++++++--------------------------- > > ---- > >  sys/kern/kern_sched.c | 43 ++++++++++++++++++++----------- > >  sys/sys/proc.h        |  1 - > >  4 files changed, 44 insertions(+), 64 deletions(-) > > > > diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c > > index d571b0855cff..83a03a14a042 100644 > > --- a/sys/kern/init_main.c > > +++ b/sys/kern/init_main.c > > @@ -117,7 +117,6 @@ struct      plimit limit0; > >  struct vmspace vmspace0; > >  struct sigacts sigacts0; > >  struct process *initprocess; > > -struct proc *reaperproc; > >   > >  extern struct user *proc0paddr; > >   > > @@ -495,10 +494,6 @@ main(void *framep) > >         if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon")) > >                 panic("fork pagedaemon"); > >   > > -       /* Create the reaper daemon kernel thread. */ > > -       if (kthread_create(reaper, NULL, &reaperproc, "reaper")) > > -               panic("fork reaper"); > > - > >         /* Create the cleaner daemon kernel thread. */ > >         if (kthread_create(buf_daemon, NULL, &cleanerproc, > > "cleaner")) > >                 panic("fork cleaner"); > > diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c > > index ba87dcd42cd0..a5cc37ac0f74 100644 > > --- a/sys/kern/kern_exit.c > > +++ b/sys/kern/kern_exit.c > > @@ -378,11 +378,10 @@ exit1(struct proc *p, int xexit, int xsig, > > int flags) > >          */ > >   > >         /* > > -        * Finally, call machine-dependent code to switch to a new > > -        * context (possibly the idle context).  Once we are no > > longer > > -        * using the dead process's vmspace and stack, exit2() will > > be > > -        * called to schedule those resources to be released by the > > -        * reaper thread. > > +        * Finally, call machine-dependent code to switch away.  > > Then, > > +        * when we are no longer using the dead process's vmspace > > and > > +        * stack, the current CPU's reaper thread releases those > > +        * resources. > >          * > >          * Note that cpu_exit() will end with a call equivalent to > >          * cpu_switch(), finishing our execution (pun intended). > > @@ -392,38 +391,6 @@ exit1(struct proc *p, int xexit, int xsig, int > > flags) > >         panic("cpu_exit returned"); > >  } > >   > > -/* > > - * Locking of this proclist is special; it's accessed in a > > - * critical section of process exit, and thus locking it can't > > - * modify interrupt state.  We use a simple spin lock for this > > - * proclist.  We use the p_hash member to linkup to deadproc. > > - */ > > -struct mutex deadproc_mutex = > > -    MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS); > > -struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc); > > - > > -/* > > - * We are called from sched_idle() once it is safe to schedule the > > - * dead process's resources to be freed. So this is not allowed to > > sleep. > > - * > > - * We lock the deadproc list, place the proc on that list (using > > - * the p_hash member), and wake up the reaper. > > - */ > > -void > > -exit2(struct proc *p) > > -{ > > -       /* account the remainder of time spent in exit1() */ > > -       mtx_enter(&p->p_p->ps_mtx); > > -       tuagg_add_process(p->p_p, p); > > -       mtx_leave(&p->p_p->ps_mtx); > > - > > -       mtx_enter(&deadproc_mutex); > > -       LIST_INSERT_HEAD(&deadproc, p, p_hash); > > -       mtx_leave(&deadproc_mutex); > > - > > -       wakeup(&deadproc); > > -} > > - > >  void > >  proc_free(struct proc *p) > >  { > > @@ -441,20 +408,26 @@ void > >  reaper(void *arg) > >  { > >         struct proc *p; > > +       struct cpu_info *ci = arg; > > +       struct schedstate_percpu *spc; > >   > >         KERNEL_UNLOCK(); > > - > >         SCHED_ASSERT_UNLOCKED(); > >   > > +       sched_peg_curproc(ci); > > +       spc = &ci->ci_schedstate; > > + > > +       KASSERT(ci == curcpu()); > > + > >         for (;;) { > > -               mtx_enter(&deadproc_mutex); > > -               while ((p = LIST_FIRST(&deadproc)) == NULL) > > -                       msleep_nsec(&deadproc, &deadproc_mutex, > > PVM, "reaper", > > -                           INFSLP); > > +               while ((p = LIST_FIRST(&spc->spc_deadproc)) == > > NULL) { > > +                       KERNEL_LOCK(); > > +                       tsleep_nsec(&spc->spc_deadproc, PVM, > > "reaper", INFSLP); > > +                       KERNEL_UNLOCK(); > > +               } > > I don't think going back to taking the kernel lock here makes sense. > > But if you now have a reaper per-CPU, the spc_deadproc list is only > ever tocuhed by the CPU that owns it isn't it?  So there would be no > need to do any locking... Correct! I put it in, because tsleep() does not know about that case. It has the following assertion that triggers if the kernel lock is not held. KASSERT(ident == &nowake || timo || _kernel_lock_held()); We can unroll tsleep() here without holding the kernel lock. I tested it, and it works. - Christian > > >                 /* Remove us from the deadproc list. */ > >                 LIST_REMOVE(p, p_hash); > > -               mtx_leave(&deadproc_mutex); > >   > >                 WITNESS_THREAD_EXIT(p); > >   > > diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c > > index 0ee965183323..c3e3d4a1c14b 100644 > > --- a/sys/kern/kern_sched.c > > +++ b/sys/kern/kern_sched.c > > @@ -32,6 +32,7 @@ > >   > >  void sched_kthreads_create(void *); > >   > > +void sched_toproc(struct proc *); > >  int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); > >  struct proc *sched_steal_proc(struct cpu_info *); > >   > > @@ -116,6 +117,7 @@ sched_kthreads_create(void *v) > >  { > >         struct cpu_info *ci = v; > >         struct schedstate_percpu *spc = &ci->ci_schedstate; > > +       char comm[_MAXCOMLEN]; > >         static int num; > >   > >         if (fork1(&proc0, > > FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE| > > @@ -128,6 +130,11 @@ sched_kthreads_create(void *v) > >             sizeof(spc->spc_idleproc->p_p->ps_comm), > >             "idle%d", num); > >   > > +       /* Create the reaper daemon kernel thread. */ > > +       snprintf(comm, sizeof(comm), "reaper%d", num); > > +       if (kthread_create(reaper, ci, NULL, comm)) > > +               panic("fork reaper"); > > + > >         num++; > >  } > >   > > @@ -160,17 +167,10 @@ sched_idle(void *v) > >   > >         while (1) { > >                 while (!cpu_is_idle(curcpu())) { > > -                       struct proc *dead; > > - > >                         SCHED_LOCK(); > >                         p->p_stat = SSLEEP; > >                         mi_switch(); > >                         SCHED_UNLOCK(); > > - > > -                       while ((dead = LIST_FIRST(&spc- > > >spc_deadproc))) { > > -                               LIST_REMOVE(dead, p_hash); > > -                               exit2(dead); > > -                       } > >                 } > >   > >                 splassert(IPL_NONE); > > @@ -230,14 +230,17 @@ sched_exit(struct proc *p) > >         tu_leave(&p->p_tu); > >   > >         KERNEL_ASSERT_LOCKED(); > > -       sched_toidle(); > > +       KERNEL_UNLOCK(); > > + > > +       wakeup_one(&spc->spc_deadproc); > > +       sched_toproc(NULL); > > +       /* NOTREACHED */ > >  } > >   > >  void > > -sched_toidle(void) > > +sched_toproc(struct proc *nextproc) > >  { > >         struct schedstate_percpu *spc = &curcpu()->ci_schedstate; > > -       struct proc *idle; > >   > >  #ifdef MULTIPROCESSOR > >         /* This process no longer needs to hold the kernel lock. */ > > @@ -257,17 +260,27 @@ sched_toidle(void) > >         atomic_clearbits_int(&spc->spc_schedflags, > > SPCF_SWITCHCLEAR); > >   > >         SCHED_LOCK(); > > -       idle = spc->spc_idleproc; > > -       idle->p_stat = SRUN; > > + > > +       if (nextproc == NULL) > > +               nextproc = sched_chooseproc(); > > +       else > > +               nextproc->p_stat = SRUN; > >   > >         uvmexp.swtch++; > >         if (curproc != NULL) > > -               TRACEPOINT(sched, off__cpu, idle->p_tid + > > THREAD_PID_OFFSET, > > -                   idle->p_p->ps_pid); > > -       cpu_switchto(NULL, idle); > > +               TRACEPOINT(sched, off__cpu, nextproc->p_tid + > > THREAD_PID_OFFSET, > > +                   nextproc->p_p->ps_pid); > > +       cpu_switchto(NULL, nextproc); > >         panic("cpu_switchto returned"); > >  } > >   > > +void > > +sched_toidle(void) > > +{ > > +       struct schedstate_percpu *spc = &curcpu()->ci_schedstate; > > +       sched_toproc(spc->spc_idleproc); > > +} > > + > >  /* > >   * Run queue management. > >   */ > > diff --git a/sys/sys/proc.h b/sys/sys/proc.h > > index 50b708954bff..6139f25c3640 100644 > > --- a/sys/sys/proc.h > > +++ b/sys/sys/proc.h > > @@ -562,7 +562,6 @@ int wakeup_proc(struct proc *, int); > >  void   unsleep(struct proc *); > >  void   reaper(void *); > >  __dead void exit1(struct proc *, int, int, int); > > -void   exit2(struct proc *); > >  void   cpu_fork(struct proc *_curp, struct proc *_child, void > > *_stack, > >             void *_tcb, void (*_func)(void *), void *_arg); > >  void   cpu_exit(struct proc *); > > -- > > 2.34.1 > > > > > > [2:application/pkcs7-signature Show Save:smime.p7s (5kB)] > >