From: Tim Leslie Subject: Re: Move fork CPU selection into scheduler To: "tech@openbsd.org" Date: Wed, 26 Nov 2025 04:00:53 +0000 > > The following patch consolidates fork-time CPU selection behavior entirely within the scheduler by integrating the fork_thread_start functionality into sched_choosecpu_fork. Updated diff now uses smr to do the initial cpu placement decision without taking the SCHED_LOCK. After a selection is made, the choice is validated under the lock and the process enqueued. --- diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -333,19 +333,6 @@ fork_check_maxthread(uid_t uid) return 0; } -static inline void -fork_thread_start(struct proc *p, struct proc *parent, int flags) -{ - struct cpu_info *ci; - - SCHED_LOCK(); - ci = sched_choosecpu_fork(parent, flags); - TRACEPOINT(sched, fork, p->p_tid + THREAD_PID_OFFSET, - p->p_p->ps_pid, CPU_INFO_UNIT(ci)); - setrunqueue(ci, p, p->p_usrpri); - SCHED_UNLOCK(); -} - int fork1(struct proc *curp, int flags, void (*func)(void *), void *arg, register_t *retval, struct proc **rnewprocp) @@ -494,7 +481,7 @@ fork1(struct proc *curp, int flags, void (*func)(void *), void *arg, /* for consistency mark idle procs as pegged */ atomic_setbits_int(&p->p_flag, P_CPUPEG); } else - fork_thread_start(p, curp, flags); + sched_choosecpu_fork(p, curp, flags); free(newptstat, M_SUBPROC, sizeof(*newptstat)); @@ -617,7 +604,7 @@ thread_fork(struct proc *curp, void *stack, void *tcb, pid_t *tidptr, psignal(curp, SIGSEGV); } - fork_thread_start(p, curp, 0); + sched_choosecpu_fork(p, curp, 0); /* * Update stats now that we know the fork was successful. diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c --- a/sys/kern/kern_sched.c +++ b/sys/kern/kern_sched.c @@ -371,56 +371,64 @@ again: return (p); } -struct cpu_info * -sched_choosecpu_fork(struct proc *parent, int flags) +void +sched_choosecpu_fork(struct proc *p, struct proc *parent, int flags) { + struct cpu_info *ci = NULL; + #ifdef MULTIPROCESSOR - struct cpu_info *choice = NULL; + /* prefer idle+unqueued, then least spc_nrun, else curcpu. */ + smr_read_enter(); + + struct cpu_info *c; int run, best_run = INT_MAX; - struct cpu_info *ci; struct cpuset set; -#if 0 - /* - * XXX - * Don't do this until we have a painless way to move the cpu in exec. - * Preferably when nuking the old pmap and getting a new one on a - * new cpu. - */ - /* - * PPWAIT forks are simple. We know that the parent will not - * run until we exec and choose another cpu, so we just steal its - * cpu. - */ - if (flags & FORK_PPWAIT) - return (parent->p_cpu); -#endif + /* Fast path: any idle CPUs at all? */ + if (cpuset_first(&sched_idle_cpus) != NULL) { + /* idle ∧ !queued ∧ all */ + cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); + cpuset_intersection(&set, &set, &sched_all_cpus); - /* - * Look at all cpus that are currently idle and have nothing queued. - * If there are none, pick the one with least queued procs first, - * then the one with lowest load average. - */ - cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); - cpuset_intersection(&set, &set, &sched_all_cpus); - if (cpuset_first(&set) == NULL) + if ((c = cpuset_first(&set)) != NULL) + ci = c; + } + + if (ci == NULL) { + /* Fallback: scan all online CPUs for least spc_nrun. */ cpuset_copy(&set, &sched_all_cpus); + while ((c = cpuset_first(&set)) != NULL) { + cpuset_del(&set, c); + run = c->ci_schedstate.spc_nrun; + if (ci == NULL || run < best_run) { + ci = c; + best_run = run; + } + } + } + smr_read_leave(); - while ((ci = cpuset_first(&set)) != NULL) { - cpuset_del(&set, ci); + if (ci == NULL) + ci = curcpu(); - run = ci->ci_schedstate.spc_nrun; +#else + ci = curcpu(); +#endif - if (choice == NULL || run < best_run) { - choice = ci; - best_run = run; - } + /* Grab the lock, revalidate, and enqueue. */ + SCHED_LOCK(); +#ifdef MULTIPROCESSOR + if (!cpuset_isset(&sched_all_cpus, ci) || + (ci->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) != 0) { + /* Fallback if CPU went away or is halting. */ + ci = curcpu(); } - - if (choice != NULL) - return (choice); #endif - return (curcpu()); + + TRACEPOINT(sched, fork, p->p_tid + THREAD_PID_OFFSET, + p->p_p->ps_pid, CPU_INFO_UNIT(ci)); + setrunqueue(ci, p, p->p_usrpri); + SCHED_UNLOCK(); } struct cpu_info * diff --git a/sys/sys/sched.h b/sys/sys/sched.h --- a/sys/sys/sched.h +++ b/sys/sys/sched.h @@ -168,7 +168,7 @@ void mi_switch(void); void cpu_switchto(struct proc *, struct proc *); struct proc *sched_chooseproc(void); struct cpu_info *sched_choosecpu(struct proc *); -struct cpu_info *sched_choosecpu_fork(struct proc *parent, int); +void sched_choosecpu_fork(struct proc *p, struct proc *parent, int flags); void cpu_idle_enter(void); void cpu_idle_cycle(void); void cpu_idle_leave(void);