Index | Thread | Search

From:
Christian Ludwig <cludwig@genua.de>
Subject:
One reaper per CPU
To:
<tech@openbsd.org>
Date:
Wed, 10 Jul 2024 17:21:35 +0200

Download raw body.

Thread
Hi,

This implements one reaper process per CPU. The benefit is that we can
switch to a safe stack (the reaper) on process teardown without the need
to lock data structures between sched_exit() and the local reaper. That
means the global deadproc mutex goes away. And there is also no need to
go through idle anymore, because we know that the local reaper is not
currently running when the dying proc enlists itself.

I have tested it lightly. It does not fall apart immediately. I do not
see any performance regressions in my test case, which is recompiling
the kernel over and over again, but your mileage my vary. So please give
this some serious testing on your boxes.

So long,

 - Christian

---
 sys/kern/init_main.c  |  5 ----
 sys/kern/kern_exit.c  | 59 ++++++++++++-------------------------------
 sys/kern/kern_sched.c | 43 ++++++++++++++++++++-----------
 sys/sys/proc.h        |  1 -
 4 files changed, 44 insertions(+), 64 deletions(-)

diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index d571b0855cff..83a03a14a042 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -117,7 +117,6 @@ struct	plimit limit0;
 struct	vmspace vmspace0;
 struct	sigacts sigacts0;
 struct	process *initprocess;
-struct	proc *reaperproc;
 
 extern	struct user *proc0paddr;
 
@@ -495,10 +494,6 @@ main(void *framep)
 	if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon"))
 		panic("fork pagedaemon");
 
-	/* Create the reaper daemon kernel thread. */
-	if (kthread_create(reaper, NULL, &reaperproc, "reaper"))
-		panic("fork reaper");
-
 	/* Create the cleaner daemon kernel thread. */
 	if (kthread_create(buf_daemon, NULL, &cleanerproc, "cleaner"))
 		panic("fork cleaner");
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index ba87dcd42cd0..a5cc37ac0f74 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -378,11 +378,10 @@ exit1(struct proc *p, int xexit, int xsig, int flags)
 	 */
 
 	/*
-	 * Finally, call machine-dependent code to switch to a new
-	 * context (possibly the idle context).  Once we are no longer
-	 * using the dead process's vmspace and stack, exit2() will be
-	 * called to schedule those resources to be released by the
-	 * reaper thread.
+	 * Finally, call machine-dependent code to switch away.  Then,
+	 * when we are no longer using the dead process's vmspace and
+	 * stack, the current CPU's reaper thread releases those
+	 * resources.
 	 *
 	 * Note that cpu_exit() will end with a call equivalent to
 	 * cpu_switch(), finishing our execution (pun intended).
@@ -392,38 +391,6 @@ exit1(struct proc *p, int xexit, int xsig, int flags)
 	panic("cpu_exit returned");
 }
 
-/*
- * Locking of this proclist is special; it's accessed in a
- * critical section of process exit, and thus locking it can't
- * modify interrupt state.  We use a simple spin lock for this
- * proclist.  We use the p_hash member to linkup to deadproc.
- */
-struct mutex deadproc_mutex =
-    MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS);
-struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc);
-
-/*
- * We are called from sched_idle() once it is safe to schedule the
- * dead process's resources to be freed. So this is not allowed to sleep.
- *
- * We lock the deadproc list, place the proc on that list (using
- * the p_hash member), and wake up the reaper.
- */
-void
-exit2(struct proc *p)
-{
-	/* account the remainder of time spent in exit1() */
-	mtx_enter(&p->p_p->ps_mtx);
-	tuagg_add_process(p->p_p, p);
-	mtx_leave(&p->p_p->ps_mtx);
-
-	mtx_enter(&deadproc_mutex);
-	LIST_INSERT_HEAD(&deadproc, p, p_hash);
-	mtx_leave(&deadproc_mutex);
-
-	wakeup(&deadproc);
-}
-
 void
 proc_free(struct proc *p)
 {
@@ -441,20 +408,26 @@ void
 reaper(void *arg)
 {
 	struct proc *p;
+	struct cpu_info *ci = arg;
+	struct schedstate_percpu *spc;
 
 	KERNEL_UNLOCK();
-
 	SCHED_ASSERT_UNLOCKED();
 
+	sched_peg_curproc(ci);
+	spc = &ci->ci_schedstate;
+
+	KASSERT(ci == curcpu());
+
 	for (;;) {
-		mtx_enter(&deadproc_mutex);
-		while ((p = LIST_FIRST(&deadproc)) == NULL)
-			msleep_nsec(&deadproc, &deadproc_mutex, PVM, "reaper",
-			    INFSLP);
+		while ((p = LIST_FIRST(&spc->spc_deadproc)) == NULL) {
+			KERNEL_LOCK();
+			tsleep_nsec(&spc->spc_deadproc, PVM, "reaper", INFSLP);
+			KERNEL_UNLOCK();
+		}
 
 		/* Remove us from the deadproc list. */
 		LIST_REMOVE(p, p_hash);
-		mtx_leave(&deadproc_mutex);
 
 		WITNESS_THREAD_EXIT(p);
 
diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
index 0ee965183323..c3e3d4a1c14b 100644
--- a/sys/kern/kern_sched.c
+++ b/sys/kern/kern_sched.c
@@ -32,6 +32,7 @@
 
 void sched_kthreads_create(void *);
 
+void sched_toproc(struct proc *);
 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
 struct proc *sched_steal_proc(struct cpu_info *);
 
@@ -116,6 +117,7 @@ sched_kthreads_create(void *v)
 {
 	struct cpu_info *ci = v;
 	struct schedstate_percpu *spc = &ci->ci_schedstate;
+	char comm[_MAXCOMLEN];
 	static int num;
 
 	if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
@@ -128,6 +130,11 @@ sched_kthreads_create(void *v)
 	    sizeof(spc->spc_idleproc->p_p->ps_comm),
 	    "idle%d", num);
 
+	/* Create the reaper daemon kernel thread. */
+	snprintf(comm, sizeof(comm), "reaper%d", num);
+	if (kthread_create(reaper, ci, NULL, comm))
+		panic("fork reaper");
+
 	num++;
 }
 
@@ -160,17 +167,10 @@ sched_idle(void *v)
 
 	while (1) {
 		while (!cpu_is_idle(curcpu())) {
-			struct proc *dead;
-
 			SCHED_LOCK();
 			p->p_stat = SSLEEP;
 			mi_switch();
 			SCHED_UNLOCK();
-
-			while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
-				LIST_REMOVE(dead, p_hash);
-				exit2(dead);
-			}
 		}
 
 		splassert(IPL_NONE);
@@ -230,14 +230,17 @@ sched_exit(struct proc *p)
 	tu_leave(&p->p_tu);
 
 	KERNEL_ASSERT_LOCKED();
-	sched_toidle();
+	KERNEL_UNLOCK();
+
+	wakeup_one(&spc->spc_deadproc);
+	sched_toproc(NULL);
+	/* NOTREACHED */
 }
 
 void
-sched_toidle(void)
+sched_toproc(struct proc *nextproc)
 {
 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
-	struct proc *idle;
 
 #ifdef MULTIPROCESSOR
 	/* This process no longer needs to hold the kernel lock. */
@@ -257,17 +260,27 @@ sched_toidle(void)
 	atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR);
 
 	SCHED_LOCK();
-	idle = spc->spc_idleproc;
-	idle->p_stat = SRUN;
+
+	if (nextproc == NULL)
+		nextproc = sched_chooseproc();
+	else
+		nextproc->p_stat = SRUN;
 
 	uvmexp.swtch++;
 	if (curproc != NULL)
-		TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET,
-		    idle->p_p->ps_pid);
-	cpu_switchto(NULL, idle);
+		TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET,
+		    nextproc->p_p->ps_pid);
+	cpu_switchto(NULL, nextproc);
 	panic("cpu_switchto returned");
 }
 
+void
+sched_toidle(void)
+{
+	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
+	sched_toproc(spc->spc_idleproc);
+}
+
 /*
  * Run queue management.
  */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 50b708954bff..6139f25c3640 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -562,7 +562,6 @@ int	wakeup_proc(struct proc *, int);
 void	unsleep(struct proc *);
 void	reaper(void *);
 __dead void exit1(struct proc *, int, int, int);
-void	exit2(struct proc *);
 void	cpu_fork(struct proc *_curp, struct proc *_child, void *_stack,
 	    void *_tcb, void (*_func)(void *), void *_arg);
 void	cpu_exit(struct proc *);
-- 
2.34.1