Index | Thread | Search

From:
Tim Leslie <tleslie@protonmail.com>
Subject:
Re: Move fork CPU selection into scheduler
To:
"tech@openbsd.org" <tech@openbsd.org>
Date:
Wed, 26 Nov 2025 04:00:53 +0000

Download raw body.

Thread
> > The following patch consolidates fork-time CPU selection behavior entirely within the scheduler by integrating the fork_thread_start functionality into sched_choosecpu_fork.

Updated diff now uses smr to do the initial cpu placement decision without taking the SCHED_LOCK. After a selection is made, the choice is validated under the lock and the process enqueued.

---

diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -333,19 +333,6 @@ fork_check_maxthread(uid_t uid)
 	return 0;
 }

-static inline void
-fork_thread_start(struct proc *p, struct proc *parent, int flags)
-{
-	struct cpu_info *ci;
-
-	SCHED_LOCK();
-	ci = sched_choosecpu_fork(parent, flags);
-	TRACEPOINT(sched, fork, p->p_tid + THREAD_PID_OFFSET,
-	    p->p_p->ps_pid, CPU_INFO_UNIT(ci));
-	setrunqueue(ci, p, p->p_usrpri);
-	SCHED_UNLOCK();
-}
-
 int
 fork1(struct proc *curp, int flags, void (*func)(void *), void *arg,
     register_t *retval, struct proc **rnewprocp)
@@ -494,7 +481,7 @@ fork1(struct proc *curp, int flags, void (*func)(void *), void *arg,
 		/* for consistency mark idle procs as pegged */
 		atomic_setbits_int(&p->p_flag, P_CPUPEG);
 	} else
-		fork_thread_start(p, curp, flags);
+		sched_choosecpu_fork(p, curp, flags);

 	free(newptstat, M_SUBPROC, sizeof(*newptstat));

@@ -617,7 +604,7 @@ thread_fork(struct proc *curp, void *stack, void *tcb, pid_t *tidptr,
 			psignal(curp, SIGSEGV);
 	}

-	fork_thread_start(p, curp, 0);
+	sched_choosecpu_fork(p, curp, 0);

 	/*
 	 * Update stats now that we know the fork was successful.
diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c
--- a/sys/kern/kern_sched.c
+++ b/sys/kern/kern_sched.c
@@ -371,56 +371,64 @@ again:
 	return (p);
 }

-struct cpu_info *
-sched_choosecpu_fork(struct proc *parent, int flags)
+void
+sched_choosecpu_fork(struct proc *p, struct proc *parent, int flags)
 {
+	struct cpu_info *ci = NULL;
+
 #ifdef MULTIPROCESSOR
-	struct cpu_info *choice = NULL;
+	/* prefer idle+unqueued, then least spc_nrun, else curcpu. */
+	smr_read_enter();
+
+	struct cpu_info *c;
 	int run, best_run = INT_MAX;
-	struct cpu_info *ci;
 	struct cpuset set;

-#if 0
-	/*
-	 * XXX
-	 * Don't do this until we have a painless way to move the cpu in exec.
-	 * Preferably when nuking the old pmap and getting a new one on a
-	 * new cpu.
-	 */
-	/*
-	 * PPWAIT forks are simple. We know that the parent will not
-	 * run until we exec and choose another cpu, so we just steal its
-	 * cpu.
-	 */
-	if (flags & FORK_PPWAIT)
-		return (parent->p_cpu);
-#endif
+	/* Fast path: any idle CPUs at all? */
+	if (cpuset_first(&sched_idle_cpus) != NULL) {
+		/* idle ∧ !queued ∧ all */
+		cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
+		cpuset_intersection(&set, &set, &sched_all_cpus);

-	/*
-	 * Look at all cpus that are currently idle and have nothing queued.
-	 * If there are none, pick the one with least queued procs first,
-	 * then the one with lowest load average.
-	 */
-	cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
-	cpuset_intersection(&set, &set, &sched_all_cpus);
-	if (cpuset_first(&set) == NULL)
+		if ((c = cpuset_first(&set)) != NULL)
+			ci = c;
+	}
+
+	if (ci == NULL) {
+		/* Fallback: scan all online CPUs for least spc_nrun. */
 		cpuset_copy(&set, &sched_all_cpus);
+		while ((c = cpuset_first(&set)) != NULL) {
+			cpuset_del(&set, c);
+			run = c->ci_schedstate.spc_nrun;
+			if (ci == NULL || run < best_run) {
+				ci = c;
+				best_run = run;
+			}
+		}
+	}
+	smr_read_leave();

-	while ((ci = cpuset_first(&set)) != NULL) {
-		cpuset_del(&set, ci);
+	if (ci == NULL)
+		ci = curcpu();

-		run = ci->ci_schedstate.spc_nrun;
+#else
+	ci = curcpu();
+#endif

-		if (choice == NULL || run < best_run) {
-			choice = ci;
-			best_run = run;
-		}
+	/* Grab the lock, revalidate, and enqueue. */
+	SCHED_LOCK();
+#ifdef MULTIPROCESSOR
+	if (!cpuset_isset(&sched_all_cpus, ci) ||
+	    (ci->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) != 0) {
+		/* Fallback if CPU went away or is halting. */
+		ci = curcpu();
 	}
-
-	if (choice != NULL)
-		return (choice);
 #endif
-	return (curcpu());
+
+	TRACEPOINT(sched, fork, p->p_tid + THREAD_PID_OFFSET,
+	    p->p_p->ps_pid, CPU_INFO_UNIT(ci));
+	setrunqueue(ci, p, p->p_usrpri);
+	SCHED_UNLOCK();
 }

 struct cpu_info *
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -168,7 +168,7 @@ void mi_switch(void);
 void cpu_switchto(struct proc *, struct proc *);
 struct proc *sched_chooseproc(void);
 struct cpu_info *sched_choosecpu(struct proc *);
-struct cpu_info *sched_choosecpu_fork(struct proc *parent, int);
+void sched_choosecpu_fork(struct proc *p, struct proc *parent, int flags);
 void cpu_idle_enter(void);
 void cpu_idle_cycle(void);
 void cpu_idle_leave(void);