From: Claudio Jeker Subject: Re: One reaper per CPU To: Christian Ludwig Cc: "mark.kettenis@xs4all.nl" , "tech@openbsd.org" Date: Wed, 10 Jul 2024 20:25:00 +0200 On Wed, Jul 10, 2024 at 05:36:02PM +0000, Christian Ludwig wrote: > Hi, > > On Wed, 2024-07-10 at 17:42 +0200, Mark Kettenis wrote: > > > Date: Wed, 10 Jul 2024 17:21:35 +0200 > > > From: Christian Ludwig > > > > Hi Christian, > > > > > Hi, > > > > > > This implements one reaper process per CPU. The benefit is that we > > > can > > > switch to a safe stack (the reaper) on process teardown without the > > > need > > > to lock data structures between sched_exit() and the local reaper. > > > That > > > means the global deadproc mutex goes away. And there is also no > > > need to > > > go through idle anymore, because we know that the local reaper is > > > not > > > currently running when the dying proc enlists itself. > > > > > > I have tested it lightly. It does not fall apart immediately. I do > > > not > > > see any performance regressions in my test case, which is > > > recompiling > > > the kernel over and over again, but your mileage my vary. So please > > > give > > > this some serious testing on your boxes. > > > > > > So long, > > > > > >  - Christian > > > > > > --- > > >  sys/kern/init_main.c  |  5 ---- > > >  sys/kern/kern_exit.c  | 59 ++++++++++++--------------------------- > > > ---- > > >  sys/kern/kern_sched.c | 43 ++++++++++++++++++++----------- > > >  sys/sys/proc.h        |  1 - > > >  4 files changed, 44 insertions(+), 64 deletions(-) > > > > > > diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c > > > index d571b0855cff..83a03a14a042 100644 > > > --- a/sys/kern/init_main.c > > > +++ b/sys/kern/init_main.c > > > @@ -117,7 +117,6 @@ struct      plimit limit0; > > >  struct vmspace vmspace0; > > >  struct sigacts sigacts0; > > >  struct process *initprocess; > > > -struct proc *reaperproc; > > >   > > >  extern struct user *proc0paddr; > > >   > > > @@ -495,10 +494,6 @@ main(void *framep) > > >         if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon")) > > >                 panic("fork pagedaemon"); > > >   > > > -       /* Create the reaper daemon kernel thread. */ > > > -       if (kthread_create(reaper, NULL, &reaperproc, "reaper")) > > > -               panic("fork reaper"); > > > - > > >         /* Create the cleaner daemon kernel thread. */ > > >         if (kthread_create(buf_daemon, NULL, &cleanerproc, > > > "cleaner")) > > >                 panic("fork cleaner"); > > > diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c > > > index ba87dcd42cd0..a5cc37ac0f74 100644 > > > --- a/sys/kern/kern_exit.c > > > +++ b/sys/kern/kern_exit.c > > > @@ -378,11 +378,10 @@ exit1(struct proc *p, int xexit, int xsig, > > > int flags) > > >          */ > > >   > > >         /* > > > -        * Finally, call machine-dependent code to switch to a new > > > -        * context (possibly the idle context).  Once we are no > > > longer > > > -        * using the dead process's vmspace and stack, exit2() will > > > be > > > -        * called to schedule those resources to be released by the > > > -        * reaper thread. > > > +        * Finally, call machine-dependent code to switch away.  > > > Then, > > > +        * when we are no longer using the dead process's vmspace > > > and > > > +        * stack, the current CPU's reaper thread releases those > > > +        * resources. > > >          * > > >          * Note that cpu_exit() will end with a call equivalent to > > >          * cpu_switch(), finishing our execution (pun intended). > > > @@ -392,38 +391,6 @@ exit1(struct proc *p, int xexit, int xsig, int > > > flags) > > >         panic("cpu_exit returned"); > > >  } > > >   > > > -/* > > > - * Locking of this proclist is special; it's accessed in a > > > - * critical section of process exit, and thus locking it can't > > > - * modify interrupt state.  We use a simple spin lock for this > > > - * proclist.  We use the p_hash member to linkup to deadproc. > > > - */ > > > -struct mutex deadproc_mutex = > > > -    MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS); > > > -struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc); > > > - > > > -/* > > > - * We are called from sched_idle() once it is safe to schedule the > > > - * dead process's resources to be freed. So this is not allowed to > > > sleep. > > > - * > > > - * We lock the deadproc list, place the proc on that list (using > > > - * the p_hash member), and wake up the reaper. > > > - */ > > > -void > > > -exit2(struct proc *p) > > > -{ > > > -       /* account the remainder of time spent in exit1() */ > > > -       mtx_enter(&p->p_p->ps_mtx); > > > -       tuagg_add_process(p->p_p, p); > > > -       mtx_leave(&p->p_p->ps_mtx); > > > - > > > -       mtx_enter(&deadproc_mutex); > > > -       LIST_INSERT_HEAD(&deadproc, p, p_hash); > > > -       mtx_leave(&deadproc_mutex); > > > - > > > -       wakeup(&deadproc); > > > -} > > > - > > >  void > > >  proc_free(struct proc *p) > > >  { > > > @@ -441,20 +408,26 @@ void > > >  reaper(void *arg) > > >  { > > >         struct proc *p; > > > +       struct cpu_info *ci = arg; > > > +       struct schedstate_percpu *spc; > > >   > > >         KERNEL_UNLOCK(); > > > - > > >         SCHED_ASSERT_UNLOCKED(); > > >   > > > +       sched_peg_curproc(ci); > > > +       spc = &ci->ci_schedstate; > > > + > > > +       KASSERT(ci == curcpu()); > > > + > > >         for (;;) { > > > -               mtx_enter(&deadproc_mutex); > > > -               while ((p = LIST_FIRST(&deadproc)) == NULL) > > > -                       msleep_nsec(&deadproc, &deadproc_mutex, > > > PVM, "reaper", > > > -                           INFSLP); > > > +               while ((p = LIST_FIRST(&spc->spc_deadproc)) == > > > NULL) { > > > +                       KERNEL_LOCK(); > > > +                       tsleep_nsec(&spc->spc_deadproc, PVM, > > > "reaper", INFSLP); > > > +                       KERNEL_UNLOCK(); > > > +               } > > > > I don't think going back to taking the kernel lock here makes sense. > > > > But if you now have a reaper per-CPU, the spc_deadproc list is only > > ever tocuhed by the CPU that owns it isn't it?  So there would be no > > need to do any locking... > > Correct! I put it in, because tsleep() does not know about that case. > It has the following assertion that triggers if the kernel lock is not > held. > > KASSERT(ident == &nowake || timo || _kernel_lock_held()); > > We can unroll tsleep() here without holding the kernel lock. I tested > it, and it works. > You actually don't need a tsleep at all. This code needs no wakeup_one() either. You need to think this like the idleproc. Its constant state is SSLEEP and then by calling sched_toproc(&spc->...) it goes SONPROC runs does its work and just mi_switch() at the end. -- :wq Claudio