From: Tim Leslie Subject: Move fork CPU selection into scheduler To: "tech@openbsd.org" Date: Mon, 13 Oct 2025 21:53:52 +0000 Tech, The following patch consolidates fork-time CPU selection behavior entirely within the scheduler by integrating the fork_thread_start functionality into sched_choosecpu_fork. Changes: - Moves fork_thread_start from kern_fork.c into sched_choosecpu_fork - Updates both callers (fork1 and thread_fork) to use the scheduler function directly - Removes the last SCHED_LOCK calls from kern_fork.c - Provides cleaner parent/child separation in the scheduler interface - Removes commented-out code that has been unused for over 10 years - Adjust CPU search to only calculate sets when idle CPUs exist - Take the first available idle^unqueued CPU rather than searching all of that set — Tim Leslie 3 files changed, 36 insertions(+), 53 deletions(-) diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -333,18 +333,6 @@ fork_check_maxthread(uid_t uid) return 0; } -static inline void -fork_thread_start(struct proc *p, struct proc *parent, int flags) -{ - struct cpu_info *ci; - - SCHED_LOCK(); - ci = sched_choosecpu_fork(parent, flags); - TRACEPOINT(sched, fork, p->p_tid + THREAD_PID_OFFSET, - p->p_p->ps_pid, CPU_INFO_UNIT(ci)); - setrunqueue(ci, p, p->p_usrpri); - SCHED_UNLOCK(); -} int fork1(struct proc *curp, int flags, void (*func)(void *), void *arg, @@ -494,7 +482,7 @@ fork1(struct proc *curp, int flags, void (*func)(void *), void *arg, /* for consistency mark idle procs as pegged */ atomic_setbits_int(&p->p_flag, P_CPUPEG); } else - fork_thread_start(p, curp, flags); + sched_choosecpu_fork(p, curp, flags); free(newptstat, M_SUBPROC, sizeof(*newptstat)); @@ -617,7 +605,7 @@ thread_fork(struct proc *curp, void *stack, void *tcb, pid_t *tidptr, psignal(curp, SIGSEGV); } - fork_thread_start(p, curp, 0); + sched_choosecpu_fork(p, curp, 0); /* * Update stats now that we know the fork was successful. diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c --- a/sys/kern/kern_sched.c +++ b/sys/kern/kern_sched.c @@ -371,56 +371,51 @@ again: return (p); } -struct cpu_info * -sched_choosecpu_fork(struct proc *parent, int flags) +void +sched_choosecpu_fork(struct proc *p, struct proc *parent, int flags) { + struct cpu_info *choice = NULL, *ci; + + SCHED_LOCK(); #ifdef MULTIPROCESSOR - struct cpu_info *choice = NULL; int run, best_run = INT_MAX; - struct cpu_info *ci; struct cpuset set; -#if 0 - /* - * XXX - * Don't do this until we have a painless way to move the cpu in exec. - * Preferably when nuking the old pmap and getting a new one on a - * new cpu. - */ - /* - * PPWAIT forks are simple. We know that the parent will not - * run until we exec and choose another cpu, so we just steal its - * cpu. - */ - if (flags & FORK_PPWAIT) - return (parent->p_cpu); -#endif - - /* - * Look at all cpus that are currently idle and have nothing queued. - * If there are none, pick the one with least queued procs first, - * then the one with lowest load average. - */ - cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); - cpuset_intersection(&set, &set, &sched_all_cpus); - if (cpuset_first(&set) == NULL) - cpuset_copy(&set, &sched_all_cpus); - - while ((ci = cpuset_first(&set)) != NULL) { - cpuset_del(&set, ci); - - run = ci->ci_schedstate.spc_nrun; + /* Fast path: any idle CPUs at all? */ + if (cpuset_first(&sched_idle_cpus) != NULL) { + /* Intersect idle with unqueued and available set */ + cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); + cpuset_intersection(&set, &set, &sched_all_cpus); - if (choice == NULL || run < best_run) { + /* Take the first idle-and-unqueued CPU if any */ + if ((ci = cpuset_first(&set)) != NULL) choice = ci; - best_run = run; + } + + if (choice == NULL) { + /* Fallback: scan all CPUs for least spc_nrun */ + cpuset_copy(&set, &sched_all_cpus); + while ((ci = cpuset_first(&set)) != NULL) { + cpuset_del(&set, ci); + run = ci->ci_schedstate.spc_nrun; + if (choice == NULL || run < best_run) { + choice = ci; + best_run = run; + } } } + /* current CPU if nothing else was suitable */ if (choice != NULL) - return (choice); + choice = (curcpu()); +#else + choice = curcpu(); #endif - return (curcpu()); + + TRACEPOINT(sched, fork, p->p_tid + THREAD_PID_OFFSET, + p->p_p->ps_pid, CPU_INFO_UNIT(choice)); + setrunqueue(choice, p, p->p_usrpri); + SCHED_UNLOCK(); } struct cpu_info * diff --git a/sys/sys/sched.h b/sys/sys/sched.h --- a/sys/sys/sched.h +++ b/sys/sys/sched.h @@ -168,7 +168,7 @@ void mi_switch(void); void cpu_switchto(struct proc *, struct proc *); struct proc *sched_chooseproc(void); struct cpu_info *sched_choosecpu(struct proc *); -struct cpu_info *sched_choosecpu_fork(struct proc *parent, int); +void sched_choosecpu_fork(struct proc *p, struct proc *parent, int flags); void cpu_idle_enter(void); void cpu_idle_cycle(void); void cpu_idle_leave(void);