Index | Thread | Search

From:
Tim Leslie <tleslie@protonmail.com>
Subject:
Move fork CPU selection into scheduler
To:
"tech@openbsd.org" <tech@openbsd.org>
Date:
Mon, 13 Oct 2025 21:53:52 +0000

Download raw body.

Thread
Tech,

The following patch consolidates fork-time CPU selection behavior entirely within the scheduler by integrating the fork_thread_start functionality into sched_choosecpu_fork.

Changes:
- Moves fork_thread_start from kern_fork.c into sched_choosecpu_fork
- Updates both callers (fork1 and thread_fork) to use the scheduler function directly
- Removes the last SCHED_LOCK calls from kern_fork.c
- Provides cleaner parent/child separation in the scheduler interface
- Removes commented-out code that has been unused for over 10 years
- Adjust CPU search to only calculate sets when idle CPUs exist
- Take the first available idle^unqueued CPU rather than searching all of that set

—
Tim Leslie

3 files changed, 36 insertions(+), 53 deletions(-)

diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -333,18 +333,6 @@ fork_check_maxthread(uid_t uid)
 	return 0;
 }
 
-static inline void
-fork_thread_start(struct proc *p, struct proc *parent, int flags)
-{
-	struct cpu_info *ci;
-
-	SCHED_LOCK();
-	ci = sched_choosecpu_fork(parent, flags);
-	TRACEPOINT(sched, fork, p->p_tid + THREAD_PID_OFFSET,
-	    p->p_p->ps_pid, CPU_INFO_UNIT(ci));
-	setrunqueue(ci, p, p->p_usrpri);
-	SCHED_UNLOCK();
-}
 
 int
 fork1(struct proc *curp, int flags, void (*func)(void *), void *arg,
@@ -494,7 +482,7 @@ fork1(struct proc *curp, int flags, void (*func)(void *), void *arg,
 		/* for consistency mark idle procs as pegged */
 		atomic_setbits_int(&p->p_flag, P_CPUPEG);
 	} else
-		fork_thread_start(p, curp, flags);
+		sched_choosecpu_fork(p, curp, flags);
 
 	free(newptstat, M_SUBPROC, sizeof(*newptstat));
 
@@ -617,7 +605,7 @@ thread_fork(struct proc *curp, void *stack, void *tcb, pid_t *tidptr,
 			psignal(curp, SIGSEGV);
 	}
 
-	fork_thread_start(p, curp, 0);
+	sched_choosecpu_fork(p, curp, 0);
 
 	/*
 	 * Update stats now that we know the fork was successful.
diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
--- a/sys/kern/kern_sched.c
+++ b/sys/kern/kern_sched.c
@@ -371,56 +371,51 @@ again:
 	return (p);
 }
 
-struct cpu_info *
-sched_choosecpu_fork(struct proc *parent, int flags)
+void
+sched_choosecpu_fork(struct proc *p, struct proc *parent, int flags)
 {
+	struct cpu_info *choice = NULL, *ci;
+
+	SCHED_LOCK();
 #ifdef MULTIPROCESSOR
-	struct cpu_info *choice = NULL;
 	int run, best_run = INT_MAX;
-	struct cpu_info *ci;
 	struct cpuset set;
 
-#if 0
-	/*
-	 * XXX
-	 * Don't do this until we have a painless way to move the cpu in exec.
-	 * Preferably when nuking the old pmap and getting a new one on a
-	 * new cpu.
-	 */
-	/*
-	 * PPWAIT forks are simple. We know that the parent will not
-	 * run until we exec and choose another cpu, so we just steal its
-	 * cpu.
-	 */
-	if (flags & FORK_PPWAIT)
-		return (parent->p_cpu);
-#endif
-
-	/*
-	 * Look at all cpus that are currently idle and have nothing queued.
-	 * If there are none, pick the one with least queued procs first,
-	 * then the one with lowest load average.
-	 */
-	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
-	cpuset_intersection(&set, &set, &sched_all_cpus);
-	if (cpuset_first(&set) == NULL)
-		cpuset_copy(&set, &sched_all_cpus);
-
-	while ((ci = cpuset_first(&set)) != NULL) {
-		cpuset_del(&set, ci);
-
-		run = ci->ci_schedstate.spc_nrun;
+	/* Fast path: any idle CPUs at all? */
+	if (cpuset_first(&sched_idle_cpus) != NULL) {
+		/* Intersect idle with unqueued and available set */ 
+		cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
+		cpuset_intersection(&set, &set, &sched_all_cpus);
 
-		if (choice == NULL || run < best_run) {
+		/* Take the first idle-and-unqueued CPU if any */
+		if ((ci = cpuset_first(&set)) != NULL)
 			choice = ci;
-			best_run = run;
+	}
+	
+	if (choice == NULL) {
+		/* Fallback: scan all CPUs for least spc_nrun */
+		cpuset_copy(&set, &sched_all_cpus);
+		while ((ci = cpuset_first(&set)) != NULL) {
+			cpuset_del(&set, ci);
+			run = ci->ci_schedstate.spc_nrun;
+			if (choice == NULL || run < best_run) {
+				choice = ci;
+				best_run = run;
+			}
 		}
 	}
 
+	/* current CPU if nothing else was suitable */
 	if (choice != NULL)
-		return (choice);
+		choice = (curcpu());
+#else
+	choice = curcpu();
 #endif
-	return (curcpu());
+
+	TRACEPOINT(sched, fork, p->p_tid + THREAD_PID_OFFSET,
+		p->p_p->ps_pid, CPU_INFO_UNIT(choice));
+	setrunqueue(choice, p, p->p_usrpri);
+	SCHED_UNLOCK();
 }
 
 struct cpu_info *
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -168,7 +168,7 @@ void mi_switch(void);
 void cpu_switchto(struct proc *, struct proc *);
 struct proc *sched_chooseproc(void);
 struct cpu_info *sched_choosecpu(struct proc *);
-struct cpu_info *sched_choosecpu_fork(struct proc *parent, int);
+void sched_choosecpu_fork(struct proc *p, struct proc *parent, int flags);
 void cpu_idle_enter(void);
 void cpu_idle_cycle(void);
 void cpu_idle_leave(void);