Index | Thread | Search

From:
Christian Ludwig <christian_ludwig@genua.de>
Subject:
Re: One reaper per CPU
To:
"mark.kettenis@xs4all.nl" <mark.kettenis@xs4all.nl>
Cc:
"tech@openbsd.org" <tech@openbsd.org>
Date:
Wed, 10 Jul 2024 17:36:02 +0000

Download raw body.

Thread
Hi,

On Wed, 2024-07-10 at 17:42 +0200, Mark Kettenis wrote:
> > Date: Wed, 10 Jul 2024 17:21:35 +0200
> > From: Christian Ludwig <cludwig@genua.de>
> 
> Hi Christian,
> 
> > Hi,
> > 
> > This implements one reaper process per CPU. The benefit is that we
> > can
> > switch to a safe stack (the reaper) on process teardown without the
> > need
> > to lock data structures between sched_exit() and the local reaper.
> > That
> > means the global deadproc mutex goes away. And there is also no
> > need to
> > go through idle anymore, because we know that the local reaper is
> > not
> > currently running when the dying proc enlists itself.
> > 
> > I have tested it lightly. It does not fall apart immediately. I do
> > not
> > see any performance regressions in my test case, which is
> > recompiling
> > the kernel over and over again, but your mileage my vary. So please
> > give
> > this some serious testing on your boxes.
> > 
> > So long,
> > 
> >  - Christian
> > 
> > ---
> >  sys/kern/init_main.c  |  5 ----
> >  sys/kern/kern_exit.c  | 59 ++++++++++++---------------------------
> > ----
> >  sys/kern/kern_sched.c | 43 ++++++++++++++++++++-----------
> >  sys/sys/proc.h        |  1 -
> >  4 files changed, 44 insertions(+), 64 deletions(-)
> > 
> > diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
> > index d571b0855cff..83a03a14a042 100644
> > --- a/sys/kern/init_main.c
> > +++ b/sys/kern/init_main.c
> > @@ -117,7 +117,6 @@ struct      plimit limit0;
> >  struct vmspace vmspace0;
> >  struct sigacts sigacts0;
> >  struct process *initprocess;
> > -struct proc *reaperproc;
> >  
> >  extern struct user *proc0paddr;
> >  
> > @@ -495,10 +494,6 @@ main(void *framep)
> >         if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon"))
> >                 panic("fork pagedaemon");
> >  
> > -       /* Create the reaper daemon kernel thread. */
> > -       if (kthread_create(reaper, NULL, &reaperproc, "reaper"))
> > -               panic("fork reaper");
> > -
> >         /* Create the cleaner daemon kernel thread. */
> >         if (kthread_create(buf_daemon, NULL, &cleanerproc,
> > "cleaner"))
> >                 panic("fork cleaner");
> > diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
> > index ba87dcd42cd0..a5cc37ac0f74 100644
> > --- a/sys/kern/kern_exit.c
> > +++ b/sys/kern/kern_exit.c
> > @@ -378,11 +378,10 @@ exit1(struct proc *p, int xexit, int xsig,
> > int flags)
> >          */
> >  
> >         /*
> > -        * Finally, call machine-dependent code to switch to a new
> > -        * context (possibly the idle context).  Once we are no
> > longer
> > -        * using the dead process's vmspace and stack, exit2() will
> > be
> > -        * called to schedule those resources to be released by the
> > -        * reaper thread.
> > +        * Finally, call machine-dependent code to switch away. 
> > Then,
> > +        * when we are no longer using the dead process's vmspace
> > and
> > +        * stack, the current CPU's reaper thread releases those
> > +        * resources.
> >          *
> >          * Note that cpu_exit() will end with a call equivalent to
> >          * cpu_switch(), finishing our execution (pun intended).
> > @@ -392,38 +391,6 @@ exit1(struct proc *p, int xexit, int xsig, int
> > flags)
> >         panic("cpu_exit returned");
> >  }
> >  
> > -/*
> > - * Locking of this proclist is special; it's accessed in a
> > - * critical section of process exit, and thus locking it can't
> > - * modify interrupt state.  We use a simple spin lock for this
> > - * proclist.  We use the p_hash member to linkup to deadproc.
> > - */
> > -struct mutex deadproc_mutex =
> > -    MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS);
> > -struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc);
> > -
> > -/*
> > - * We are called from sched_idle() once it is safe to schedule the
> > - * dead process's resources to be freed. So this is not allowed to
> > sleep.
> > - *
> > - * We lock the deadproc list, place the proc on that list (using
> > - * the p_hash member), and wake up the reaper.
> > - */
> > -void
> > -exit2(struct proc *p)
> > -{
> > -       /* account the remainder of time spent in exit1() */
> > -       mtx_enter(&p->p_p->ps_mtx);
> > -       tuagg_add_process(p->p_p, p);
> > -       mtx_leave(&p->p_p->ps_mtx);
> > -
> > -       mtx_enter(&deadproc_mutex);
> > -       LIST_INSERT_HEAD(&deadproc, p, p_hash);
> > -       mtx_leave(&deadproc_mutex);
> > -
> > -       wakeup(&deadproc);
> > -}
> > -
> >  void
> >  proc_free(struct proc *p)
> >  {
> > @@ -441,20 +408,26 @@ void
> >  reaper(void *arg)
> >  {
> >         struct proc *p;
> > +       struct cpu_info *ci = arg;
> > +       struct schedstate_percpu *spc;
> >  
> >         KERNEL_UNLOCK();
> > -
> >         SCHED_ASSERT_UNLOCKED();
> >  
> > +       sched_peg_curproc(ci);
> > +       spc = &ci->ci_schedstate;
> > +
> > +       KASSERT(ci == curcpu());
> > +
> >         for (;;) {
> > -               mtx_enter(&deadproc_mutex);
> > -               while ((p = LIST_FIRST(&deadproc)) == NULL)
> > -                       msleep_nsec(&deadproc, &deadproc_mutex,
> > PVM, "reaper",
> > -                           INFSLP);
> > +               while ((p = LIST_FIRST(&spc->spc_deadproc)) ==
> > NULL) {
> > +                       KERNEL_LOCK();
> > +                       tsleep_nsec(&spc->spc_deadproc, PVM,
> > "reaper", INFSLP);
> > +                       KERNEL_UNLOCK();
> > +               }
> 
> I don't think going back to taking the kernel lock here makes sense.
> 
> But if you now have a reaper per-CPU, the spc_deadproc list is only
> ever tocuhed by the CPU that owns it isn't it?  So there would be no
> need to do any locking...

Correct! I put it in, because tsleep() does not know about that case.
It has the following assertion that triggers if the kernel lock is not
held.

    KASSERT(ident == &nowake || timo || _kernel_lock_held());

We can unroll tsleep() here without holding the kernel lock. I tested
it, and it works.


 - Christian

> 
> >                 /* Remove us from the deadproc list. */
> >                 LIST_REMOVE(p, p_hash);
> > -               mtx_leave(&deadproc_mutex);
> >  
> >                 WITNESS_THREAD_EXIT(p);
> >  
> > diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
> > index 0ee965183323..c3e3d4a1c14b 100644
> > --- a/sys/kern/kern_sched.c
> > +++ b/sys/kern/kern_sched.c
> > @@ -32,6 +32,7 @@
> >  
> >  void sched_kthreads_create(void *);
> >  
> > +void sched_toproc(struct proc *);
> >  int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
> >  struct proc *sched_steal_proc(struct cpu_info *);
> >  
> > @@ -116,6 +117,7 @@ sched_kthreads_create(void *v)
> >  {
> >         struct cpu_info *ci = v;
> >         struct schedstate_percpu *spc = &ci->ci_schedstate;
> > +       char comm[_MAXCOMLEN];
> >         static int num;
> >  
> >         if (fork1(&proc0,
> > FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
> > @@ -128,6 +130,11 @@ sched_kthreads_create(void *v)
> >             sizeof(spc->spc_idleproc->p_p->ps_comm),
> >             "idle%d", num);
> >  
> > +       /* Create the reaper daemon kernel thread. */
> > +       snprintf(comm, sizeof(comm), "reaper%d", num);
> > +       if (kthread_create(reaper, ci, NULL, comm))
> > +               panic("fork reaper");
> > +
> >         num++;
> >  }
> >  
> > @@ -160,17 +167,10 @@ sched_idle(void *v)
> >  
> >         while (1) {
> >                 while (!cpu_is_idle(curcpu())) {
> > -                       struct proc *dead;
> > -
> >                         SCHED_LOCK();
> >                         p->p_stat = SSLEEP;
> >                         mi_switch();
> >                         SCHED_UNLOCK();
> > -
> > -                       while ((dead = LIST_FIRST(&spc-
> > >spc_deadproc))) {
> > -                               LIST_REMOVE(dead, p_hash);
> > -                               exit2(dead);
> > -                       }
> >                 }
> >  
> >                 splassert(IPL_NONE);
> > @@ -230,14 +230,17 @@ sched_exit(struct proc *p)
> >         tu_leave(&p->p_tu);
> >  
> >         KERNEL_ASSERT_LOCKED();
> > -       sched_toidle();
> > +       KERNEL_UNLOCK();
> > +
> > +       wakeup_one(&spc->spc_deadproc);
> > +       sched_toproc(NULL);
> > +       /* NOTREACHED */
> >  }
> >  
> >  void
> > -sched_toidle(void)
> > +sched_toproc(struct proc *nextproc)
> >  {
> >         struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
> > -       struct proc *idle;
> >  
> >  #ifdef MULTIPROCESSOR
> >         /* This process no longer needs to hold the kernel lock. */
> > @@ -257,17 +260,27 @@ sched_toidle(void)
> >         atomic_clearbits_int(&spc->spc_schedflags,
> > SPCF_SWITCHCLEAR);
> >  
> >         SCHED_LOCK();
> > -       idle = spc->spc_idleproc;
> > -       idle->p_stat = SRUN;
> > +
> > +       if (nextproc == NULL)
> > +               nextproc = sched_chooseproc();
> > +       else
> > +               nextproc->p_stat = SRUN;
> >  
> >         uvmexp.swtch++;
> >         if (curproc != NULL)
> > -               TRACEPOINT(sched, off__cpu, idle->p_tid +
> > THREAD_PID_OFFSET,
> > -                   idle->p_p->ps_pid);
> > -       cpu_switchto(NULL, idle);
> > +               TRACEPOINT(sched, off__cpu, nextproc->p_tid +
> > THREAD_PID_OFFSET,
> > +                   nextproc->p_p->ps_pid);
> > +       cpu_switchto(NULL, nextproc);
> >         panic("cpu_switchto returned");
> >  }
> >  
> > +void
> > +sched_toidle(void)
> > +{
> > +       struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
> > +       sched_toproc(spc->spc_idleproc);
> > +}
> > +
> >  /*
> >   * Run queue management.
> >   */
> > diff --git a/sys/sys/proc.h b/sys/sys/proc.h
> > index 50b708954bff..6139f25c3640 100644
> > --- a/sys/sys/proc.h
> > +++ b/sys/sys/proc.h
> > @@ -562,7 +562,6 @@ int wakeup_proc(struct proc *, int);
> >  void   unsleep(struct proc *);
> >  void   reaper(void *);
> >  __dead void exit1(struct proc *, int, int, int);
> > -void   exit2(struct proc *);
> >  void   cpu_fork(struct proc *_curp, struct proc *_child, void
> > *_stack,
> >             void *_tcb, void (*_func)(void *), void *_arg);
> >  void   cpu_exit(struct proc *);
> > -- 
> > 2.34.1
> > 
> > 
> > [2:application/pkcs7-signature Show Save:smime.p7s (5kB)]
> >