From: Mark Kettenis <mark.kettenis@xs4all.nl>
Subject: Re: One reaper per CPU
To: Christian Ludwig <cludwig@genua.de>
Cc: tech@openbsd.org
Date: Wed, 10 Jul 2024 17:42:45 +0200

> Date: Wed, 10 Jul 2024 17:21:35 +0200
> From: Christian Ludwig <cludwig@genua.de>

Hi Christian,

> Hi,
> 
> This implements one reaper process per CPU. The benefit is that we can
> switch to a safe stack (the reaper) on process teardown without the need
> to lock data structures between sched_exit() and the local reaper. That
> means the global deadproc mutex goes away. And there is also no need to
> go through idle anymore, because we know that the local reaper is not
> currently running when the dying proc enlists itself.
> 
> I have tested it lightly. It does not fall apart immediately. I do not
> see any performance regressions in my test case, which is recompiling
> the kernel over and over again, but your mileage my vary. So please give
> this some serious testing on your boxes.
> 
> So long,
> 
>  - Christian
> 
> ---
>  sys/kern/init_main.c  |  5 ----
>  sys/kern/kern_exit.c  | 59 ++++++++++++-------------------------------
>  sys/kern/kern_sched.c | 43 ++++++++++++++++++++-----------
>  sys/sys/proc.h        |  1 -
>  4 files changed, 44 insertions(+), 64 deletions(-)
> 
> diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
> index d571b0855cff..83a03a14a042 100644
> --- a/sys/kern/init_main.c
> +++ b/sys/kern/init_main.c
> @@ -117,7 +117,6 @@ struct	plimit limit0;
>  struct	vmspace vmspace0;
>  struct	sigacts sigacts0;
>  struct	process *initprocess;
> -struct	proc *reaperproc;
>  
>  extern	struct user *proc0paddr;
>  
> @@ -495,10 +494,6 @@ main(void *framep)
>  	if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon"))
>  		panic("fork pagedaemon");
>  
> -	/* Create the reaper daemon kernel thread. */
> -	if (kthread_create(reaper, NULL, &reaperproc, "reaper"))
> -		panic("fork reaper");
> -
>  	/* Create the cleaner daemon kernel thread. */
>  	if (kthread_create(buf_daemon, NULL, &cleanerproc, "cleaner"))
>  		panic("fork cleaner");
> diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
> index ba87dcd42cd0..a5cc37ac0f74 100644
> --- a/sys/kern/kern_exit.c
> +++ b/sys/kern/kern_exit.c
> @@ -378,11 +378,10 @@ exit1(struct proc *p, int xexit, int xsig, int flags)
>  	 */
>  
>  	/*
> -	 * Finally, call machine-dependent code to switch to a new
> -	 * context (possibly the idle context).  Once we are no longer
> -	 * using the dead process's vmspace and stack, exit2() will be
> -	 * called to schedule those resources to be released by the
> -	 * reaper thread.
> +	 * Finally, call machine-dependent code to switch away.  Then,
> +	 * when we are no longer using the dead process's vmspace and
> +	 * stack, the current CPU's reaper thread releases those
> +	 * resources.
>  	 *
>  	 * Note that cpu_exit() will end with a call equivalent to
>  	 * cpu_switch(), finishing our execution (pun intended).
> @@ -392,38 +391,6 @@ exit1(struct proc *p, int xexit, int xsig, int flags)
>  	panic("cpu_exit returned");
>  }
>  
> -/*
> - * Locking of this proclist is special; it's accessed in a
> - * critical section of process exit, and thus locking it can't
> - * modify interrupt state.  We use a simple spin lock for this
> - * proclist.  We use the p_hash member to linkup to deadproc.
> - */
> -struct mutex deadproc_mutex =
> -    MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS);
> -struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc);
> -
> -/*
> - * We are called from sched_idle() once it is safe to schedule the
> - * dead process's resources to be freed. So this is not allowed to sleep.
> - *
> - * We lock the deadproc list, place the proc on that list (using
> - * the p_hash member), and wake up the reaper.
> - */
> -void
> -exit2(struct proc *p)
> -{
> -	/* account the remainder of time spent in exit1() */
> -	mtx_enter(&p->p_p->ps_mtx);
> -	tuagg_add_process(p->p_p, p);
> -	mtx_leave(&p->p_p->ps_mtx);
> -
> -	mtx_enter(&deadproc_mutex);
> -	LIST_INSERT_HEAD(&deadproc, p, p_hash);
> -	mtx_leave(&deadproc_mutex);
> -
> -	wakeup(&deadproc);
> -}
> -
>  void
>  proc_free(struct proc *p)
>  {
> @@ -441,20 +408,26 @@ void
>  reaper(void *arg)
>  {
>  	struct proc *p;
> +	struct cpu_info *ci = arg;
> +	struct schedstate_percpu *spc;
>  
>  	KERNEL_UNLOCK();
> -
>  	SCHED_ASSERT_UNLOCKED();
>  
> +	sched_peg_curproc(ci);
> +	spc = &ci->ci_schedstate;
> +
> +	KASSERT(ci == curcpu());
> +
>  	for (;;) {
> -		mtx_enter(&deadproc_mutex);
> -		while ((p = LIST_FIRST(&deadproc)) == NULL)
> -			msleep_nsec(&deadproc, &deadproc_mutex, PVM, "reaper",
> -			    INFSLP);
> +		while ((p = LIST_FIRST(&spc->spc_deadproc)) == NULL) {
> +			KERNEL_LOCK();
> +			tsleep_nsec(&spc->spc_deadproc, PVM, "reaper", INFSLP);
> +			KERNEL_UNLOCK();
> +		}

I don't think going back to taking the kernel lock here makes sense.

But if you now have a reaper per-CPU, the spc_deadproc list is only
ever tocuhed by the CPU that owns it isn't it?  So there would be no
need to do any locking...

>  		/* Remove us from the deadproc list. */
>  		LIST_REMOVE(p, p_hash);
> -		mtx_leave(&deadproc_mutex);
>  
>  		WITNESS_THREAD_EXIT(p);
>  
> diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
> index 0ee965183323..c3e3d4a1c14b 100644
> --- a/sys/kern/kern_sched.c
> +++ b/sys/kern/kern_sched.c
> @@ -32,6 +32,7 @@
>  
>  void sched_kthreads_create(void *);
>  
> +void sched_toproc(struct proc *);
>  int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
>  struct proc *sched_steal_proc(struct cpu_info *);
>  
> @@ -116,6 +117,7 @@ sched_kthreads_create(void *v)
>  {
>  	struct cpu_info *ci = v;
>  	struct schedstate_percpu *spc = &ci->ci_schedstate;
> +	char comm[_MAXCOMLEN];
>  	static int num;
>  
>  	if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
> @@ -128,6 +130,11 @@ sched_kthreads_create(void *v)
>  	    sizeof(spc->spc_idleproc->p_p->ps_comm),
>  	    "idle%d", num);
>  
> +	/* Create the reaper daemon kernel thread. */
> +	snprintf(comm, sizeof(comm), "reaper%d", num);
> +	if (kthread_create(reaper, ci, NULL, comm))
> +		panic("fork reaper");
> +
>  	num++;
>  }
>  
> @@ -160,17 +167,10 @@ sched_idle(void *v)
>  
>  	while (1) {
>  		while (!cpu_is_idle(curcpu())) {
> -			struct proc *dead;
> -
>  			SCHED_LOCK();
>  			p->p_stat = SSLEEP;
>  			mi_switch();
>  			SCHED_UNLOCK();
> -
> -			while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
> -				LIST_REMOVE(dead, p_hash);
> -				exit2(dead);
> -			}
>  		}
>  
>  		splassert(IPL_NONE);
> @@ -230,14 +230,17 @@ sched_exit(struct proc *p)
>  	tu_leave(&p->p_tu);
>  
>  	KERNEL_ASSERT_LOCKED();
> -	sched_toidle();
> +	KERNEL_UNLOCK();
> +
> +	wakeup_one(&spc->spc_deadproc);
> +	sched_toproc(NULL);
> +	/* NOTREACHED */
>  }
>  
>  void
> -sched_toidle(void)
> +sched_toproc(struct proc *nextproc)
>  {
>  	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
> -	struct proc *idle;
>  
>  #ifdef MULTIPROCESSOR
>  	/* This process no longer needs to hold the kernel lock. */
> @@ -257,17 +260,27 @@ sched_toidle(void)
>  	atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR);
>  
>  	SCHED_LOCK();
> -	idle = spc->spc_idleproc;
> -	idle->p_stat = SRUN;
> +
> +	if (nextproc == NULL)
> +		nextproc = sched_chooseproc();
> +	else
> +		nextproc->p_stat = SRUN;
>  
>  	uvmexp.swtch++;
>  	if (curproc != NULL)
> -		TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET,
> -		    idle->p_p->ps_pid);
> -	cpu_switchto(NULL, idle);
> +		TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET,
> +		    nextproc->p_p->ps_pid);
> +	cpu_switchto(NULL, nextproc);
>  	panic("cpu_switchto returned");
>  }
>  
> +void
> +sched_toidle(void)
> +{
> +	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
> +	sched_toproc(spc->spc_idleproc);
> +}
> +
>  /*
>   * Run queue management.
>   */
> diff --git a/sys/sys/proc.h b/sys/sys/proc.h
> index 50b708954bff..6139f25c3640 100644
> --- a/sys/sys/proc.h
> +++ b/sys/sys/proc.h
> @@ -562,7 +562,6 @@ int	wakeup_proc(struct proc *, int);
>  void	unsleep(struct proc *);
>  void	reaper(void *);
>  __dead void exit1(struct proc *, int, int, int);
> -void	exit2(struct proc *);
>  void	cpu_fork(struct proc *_curp, struct proc *_child, void *_stack,
>  	    void *_tcb, void (*_func)(void *), void *_arg);
>  void	cpu_exit(struct proc *);
> -- 
> 2.34.1
> 
> 
> [2:application/pkcs7-signature Show Save:smime.p7s (5kB)]
>