From: Tim Leslie Subject: Re: Move fork CPU selection into scheduler To: "tech@openbsd.org" Date: Tue, 04 Nov 2025 17:17:24 +0000 On Monday, October 13th, 2025 at 5:53 PM, Tim Leslie wrote: > Tech, > > The following patch consolidates fork-time CPU selection behavior entirely within the scheduler by integrating the fork_thread_start functionality into sched_choosecpu_fork. Anyone? > diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c > --- a/sys/kern/kern_fork.c > +++ b/sys/kern/kern_fork.c > @@ -333,18 +333,6 @@ fork_check_maxthread(uid_t uid) > return 0; > } > > -static inline void > -fork_thread_start(struct proc *p, struct proc *parent, int flags) > -{ > - struct cpu_info *ci; > - > - SCHED_LOCK(); > - ci = sched_choosecpu_fork(parent, flags); > - TRACEPOINT(sched, fork, p->p_tid + THREAD_PID_OFFSET, > > - p->p_p->ps_pid, CPU_INFO_UNIT(ci)); > > - setrunqueue(ci, p, p->p_usrpri); > > - SCHED_UNLOCK(); > -} > > int > fork1(struct proc *curp, int flags, void (*func)(void *), void *arg, > @@ -494,7 +482,7 @@ fork1(struct proc *curp, int flags, void (*func)(void *), void arg, > / for consistency mark idle procs as pegged */ > atomic_setbits_int(&p->p_flag, P_CPUPEG); > > } else > - fork_thread_start(p, curp, flags); > + sched_choosecpu_fork(p, curp, flags); > > free(newptstat, M_SUBPROC, sizeof(*newptstat)); > > @@ -617,7 +605,7 @@ thread_fork(struct proc *curp, void *stack, void *tcb, pid_t tidptr, > psignal(curp, SIGSEGV); > } > > - fork_thread_start(p, curp, 0); > + sched_choosecpu_fork(p, curp, 0); > > / > * Update stats now that we know the fork was successful. > diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c > --- a/sys/kern/kern_sched.c > +++ b/sys/kern/kern_sched.c > @@ -371,56 +371,51 @@ again: > return (p); > } > > -struct cpu_info * > -sched_choosecpu_fork(struct proc *parent, int flags) > +void > +sched_choosecpu_fork(struct proc *p, struct proc *parent, int flags) > { > + struct cpu_info *choice = NULL, *ci; > + > + SCHED_LOCK(); > #ifdef MULTIPROCESSOR > - struct cpu_info *choice = NULL; > int run, best_run = INT_MAX; > - struct cpu_info ci; > struct cpuset set; > > -#if 0 > - / > - * XXX > - * Don't do this until we have a painless way to move the cpu in exec. > - * Preferably when nuking the old pmap and getting a new one on a > - * new cpu. > - / > - / > - * PPWAIT forks are simple. We know that the parent will not > - * run until we exec and choose another cpu, so we just steal its > - * cpu. > - */ > - if (flags & FORK_PPWAIT) > - return (parent->p_cpu); > > -#endif > - > - /* > - * Look at all cpus that are currently idle and have nothing queued. > - * If there are none, pick the one with least queued procs first, > - * then the one with lowest load average. > - */ > - cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); > - cpuset_intersection(&set, &set, &sched_all_cpus); > - if (cpuset_first(&set) == NULL) > - cpuset_copy(&set, &sched_all_cpus); > - > - while ((ci = cpuset_first(&set)) != NULL) { > - cpuset_del(&set, ci); > - > - run = ci->ci_schedstate.spc_nrun; > > + /* Fast path: any idle CPUs at all? / > + if (cpuset_first(&sched_idle_cpus) != NULL) { > + / Intersect idle with unqueued and available set / > + cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); > + cpuset_intersection(&set, &set, &sched_all_cpus); > > - if (choice == NULL || run < best_run) { > + / Take the first idle-and-unqueued CPU if any / > + if ((ci = cpuset_first(&set)) != NULL) > choice = ci; > - best_run = run; > + } > + > + if (choice == NULL) { > + / Fallback: scan all CPUs for least spc_nrun */ > + cpuset_copy(&set, &sched_all_cpus); > + while ((ci = cpuset_first(&set)) != NULL) { > + cpuset_del(&set, ci); > + run = ci->ci_schedstate.spc_nrun; > > + if (choice == NULL || run < best_run) { > + choice = ci; > + best_run = run; > + } > } > } > > + /* current CPU if nothing else was suitable */ > if (choice != NULL) > - return (choice); > + choice = (curcpu()); > +#else > + choice = curcpu(); > #endif > - return (curcpu()); > + > + TRACEPOINT(sched, fork, p->p_tid + THREAD_PID_OFFSET, > > + p->p_p->ps_pid, CPU_INFO_UNIT(choice)); > > + setrunqueue(choice, p, p->p_usrpri); > > + SCHED_UNLOCK(); > } > > struct cpu_info * > diff --git a/sys/sys/sched.h b/sys/sys/sched.h > --- a/sys/sys/sched.h > +++ b/sys/sys/sched.h > @@ -168,7 +168,7 @@ void mi_switch(void); > void cpu_switchto(struct proc *, struct proc *); > struct proc *sched_chooseproc(void); > struct cpu_info *sched_choosecpu(struct proc *); > -struct cpu_info *sched_choosecpu_fork(struct proc *parent, int); > +void sched_choosecpu_fork(struct proc *p, struct proc *parent, int flags); > void cpu_idle_enter(void); > void cpu_idle_cycle(void); > void cpu_idle_leave(void);