Download raw body.
Faster _exit(2) for a faster userland: R.I.P the reaper
In the past 6 months, since Valencia, I investigated two different
performance issues:
- "Why configure scripts are slow?" (With aja@ and jan@), and
- "Why single-threaded page fault performances are so bad?" (by myself)
I don't even want to share numbers. That said both issues lead me to
exit1() in the kernel. Its too many context switches related to the
reaper, the extremely costly insertion in the RB-tree of many pages when
tearing down a VM space, as well as the serialization of such teardowns
in LIFO order...
The diff below is not a complete answer to all theses points, however I
believe it to be the most complex piece. It already greatly improves
performances even if processes are now charged for a bigger amount of
%sys time due to reaping their own address space.
The diff below has been tested on amd64, arm64 and i386. It includes
multiple pieces that can be reviewed independently:
- arch/amd64/amd64/locore.S: always update `ci_proc_pmap' even if the
%cr3 are similar. This is required because we now use pmap_kernel
on non-kernel threads to be able to reap the user pmap & user space.
- kern/kern_event.c: Use a mutex instead of a rwlock for the
`kqueue_ps_list_lock'. This is necessary to remove possible sleep
point in knote_processexit().
- kern/kern_rwlock.c: Add two assertwaitok() to ensure non-contended
rwlocks are not taken in code path that MUST NOT sleep.
- kern/subr_xxx.c: Add a check to ensure SDEAD thread never execute a
code path that might sleep.
The rest is a shuffling of the current exit1() logic which includes:
- Remove an extra synchronization for `ps_mainproc' for multi-threaded
process. Rely instead on single_thread_set(). That means the last
thread will cleanup per-process states and free it siblings states
and stack. As a bonus `ps_mainproc' is also killed.
- Move uvm_exit() inside exit1(). This is still executed without kernel
lock and now in parallel. We now borrow proc0's vmspace and pmap to
finish the execution of the dead process.
- Move re-parenting and NOTE_EXIT notification to exit1().
- Change dowait6() to allow init(8) to reap non-zombie processes.
A lot more cleanups and improvements can be done on top of this. We
should now be able to call mi_switch() instead of sched_toidle() after
cpu_exit(). This should remove an extra context switch and give us
another performance/latency boost.
Accounting could also certainly be improved. I must admit I don't
understand the API and I'd appreciate if someone (claudio@?) could look
at the many tuag_*, ruadd(), calcru() & co.
I'll look at improving the tear down of the VM space and pmap next.
I'd appreciate tests reports on many different setups as well as other
architectures.
Thanks,
Martin
Index: arch/amd64/amd64/locore.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
diff -u -p -r1.150 locore.S
--- arch/amd64/amd64/locore.S 2 Feb 2025 05:45:20 -0000 1.150
+++ arch/amd64/amd64/locore.S 30 Apr 2025 09:41:57 -0000
@@ -409,13 +409,14 @@ restore_saved:
cmpq %rcx,CPUVAR(PROC_PMAP)
jnz .Lbogus_proc_pmap
#endif
- /* record which pmap this CPU should get IPIs for */
- movq %rbx,CPUVAR(PROC_PMAP)
.Lset_cr3:
movq %rax,%cr3 /* %rax used below too */
.Lsame_cr3:
+ /* record which pmap this CPU should get IPIs for */
+ movq %rbx,CPUVAR(PROC_PMAP)
+
/*
* If we switched from a userland thread with a shallow call stack
* (e.g interrupt->ast->mi_ast->prempt->mi_switch->cpu_switchto)
Index: kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
diff -u -p -r1.328 init_main.c
--- kern/init_main.c 1 Jan 2025 07:44:54 -0000 1.328
+++ kern/init_main.c 1 May 2025 13:00:03 -0000
@@ -117,7 +117,6 @@ struct plimit limit0;
struct vmspace vmspace0;
struct sigacts sigacts0;
struct process *initprocess;
-struct proc *reaperproc;
extern struct user *proc0paddr;
@@ -496,10 +495,6 @@ main(void *framep)
/* Create the pageout daemon kernel thread. */
if (kthread_create(uvm_pageout, NULL, NULL, "pagedaemon"))
panic("fork pagedaemon");
-
- /* Create the reaper daemon kernel thread. */
- if (kthread_create(reaper, NULL, &reaperproc, "reaper"))
- panic("fork reaper");
/* Create the cleaner daemon kernel thread. */
if (kthread_create(buf_daemon, NULL, &cleanerproc, "cleaner"))
Index: kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
diff -u -p -r1.201 kern_event.c
--- kern/kern_event.c 10 Feb 2025 16:45:46 -0000 1.201
+++ kern/kern_event.c 1 May 2025 12:20:55 -0000
@@ -183,7 +183,7 @@ const struct filterops timer_filtops = {
struct pool knote_pool;
struct pool kqueue_pool;
struct mutex kqueue_klist_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
-struct rwlock kqueue_ps_list_lock = RWLOCK_INITIALIZER("kqpsl");
+struct mutex kqueue_ps_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
int kq_ntimeouts = 0;
int kq_timeoutmax = (4 * 1024);
@@ -340,7 +340,7 @@ int
filt_procattach(struct knote *kn)
{
struct process *pr;
- int nolock;
+ int locked = 0;
if ((curproc->p_p->ps_flags & PS_PLEDGE) &&
(curproc->p_pledge & PLEDGE_PROC) == 0)
@@ -368,18 +368,17 @@ filt_procattach(struct knote *kn)
kn->kn_data = kn->kn_sdata; /* ppid */
kn->kn_fflags = NOTE_CHILD;
kn->kn_flags &= ~EV_FLAG1;
- rw_assert_wrlock(&kqueue_ps_list_lock);
+ MUTEX_ASSERT_LOCKED(&kqueue_ps_list_lock);
+ locked = 1;
}
- /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */
- nolock = (rw_status(&kqueue_ps_list_lock) == RW_WRITE);
- if (!nolock)
- rw_enter_write(&kqueue_ps_list_lock);
+ if (!locked)
+ mtx_enter(&kqueue_ps_list_lock);
mtx_enter(&pr->ps_mtx);
klist_insert_locked(&pr->ps_klist, kn);
mtx_leave(&pr->ps_mtx);
- if (!nolock)
- rw_exit_write(&kqueue_ps_list_lock);
+ if (!locked)
+ mtx_leave(&kqueue_ps_list_lock);
KERNEL_UNLOCK();
@@ -404,8 +403,8 @@ filt_procdetach(struct knote *kn)
struct process *pr = kn->kn_ptr.p_process;
int status;
- /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */
- rw_enter_write(&kqueue_ps_list_lock);
+ /* this needs both the ps_mtx and kqueue_ps_list_lock. */
+ mtx_enter(&kqueue_ps_list_lock);
mtx_enter(&pr->ps_mtx);
status = kn->kn_status;
@@ -413,7 +412,7 @@ filt_procdetach(struct knote *kn)
klist_remove_locked(&pr->ps_klist, kn);
mtx_leave(&pr->ps_mtx);
- rw_exit_write(&kqueue_ps_list_lock);
+ mtx_leave(&kqueue_ps_list_lock);
}
int
@@ -435,6 +434,7 @@ filt_proc(struct knote *kn, long hint)
kn->kn_fflags |= event;
/*
+ KASSERT((p->p_p->ps_flags & PS_ZOMBIE) == 0);
* process is gone, so flag the event as finished and remove it
* from the process's klist
*/
@@ -471,7 +471,7 @@ filt_proc(struct knote *kn, long hint)
kev.data = kn->kn_id; /* parent */
kev.udata = kn->kn_udata; /* preserve udata */
- rw_assert_wrlock(&kqueue_ps_list_lock);
+ MUTEX_ASSERT_LOCKED(&kqueue_ps_list_lock);
mtx_leave(&pr->ps_mtx);
error = kqueue_register(kq, &kev, 0, NULL);
mtx_enter(&pr->ps_mtx);
@@ -531,12 +531,12 @@ filt_sigattach(struct knote *kn)
kn->kn_ptr.p_process = pr;
kn->kn_flags |= EV_CLEAR; /* automatically set */
- /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */
- rw_enter_write(&kqueue_ps_list_lock);
+ /* this needs both the ps_mtx and kqueue_ps_list_lock. */
+ mtx_enter(&kqueue_ps_list_lock);
mtx_enter(&pr->ps_mtx);
klist_insert_locked(&pr->ps_klist, kn);
mtx_leave(&pr->ps_mtx);
- rw_exit_write(&kqueue_ps_list_lock);
+ mtx_leave(&kqueue_ps_list_lock);
return (0);
}
@@ -546,11 +546,11 @@ filt_sigdetach(struct knote *kn)
{
struct process *pr = kn->kn_ptr.p_process;
- rw_enter_write(&kqueue_ps_list_lock);
+ mtx_enter(&kqueue_ps_list_lock);
mtx_enter(&pr->ps_mtx);
klist_remove_locked(&pr->ps_klist, kn);
mtx_leave(&pr->ps_mtx);
- rw_exit_write(&kqueue_ps_list_lock);
+ mtx_leave(&kqueue_ps_list_lock);
}
int
@@ -2058,12 +2058,12 @@ knote_fdclose(struct proc *p, int fd)
void
knote_processexit(struct process *pr)
{
- /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */
- rw_enter_write(&kqueue_ps_list_lock);
+ /* this needs both the ps_mtx and kqueue_ps_list_lock. */
+ mtx_enter(&kqueue_ps_list_lock);
mtx_enter(&pr->ps_mtx);
knote_locked(&pr->ps_klist, NOTE_EXIT);
mtx_leave(&pr->ps_mtx);
- rw_exit_write(&kqueue_ps_list_lock);
+ mtx_leave(&kqueue_ps_list_lock);
/* remove other knotes hanging off the process */
klist_invalidate(&pr->ps_klist);
@@ -2072,12 +2072,12 @@ knote_processexit(struct process *pr)
void
knote_processfork(struct process *pr, pid_t pid)
{
- /* this needs both the ps_mtx and exclusive kqueue_ps_list_lock. */
- rw_enter_write(&kqueue_ps_list_lock);
+ /* this needs both the ps_mtx and kqueue_ps_list_lock. */
+ mtx_enter(&kqueue_ps_list_lock);
mtx_enter(&pr->ps_mtx);
knote_locked(&pr->ps_klist, NOTE_FORK | pid);
mtx_leave(&pr->ps_mtx);
- rw_exit_write(&kqueue_ps_list_lock);
+ mtx_leave(&kqueue_ps_list_lock);
}
void
Index: kern/kern_exit.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_exit.c,v
diff -u -p -r1.245 kern_exit.c
--- kern/kern_exit.c 2 May 2025 05:04:38 -0000 1.245
+++ kern/kern_exit.c 2 May 2025 10:43:55 -0000
@@ -118,16 +118,22 @@ exit1(struct proc *p, int xexit, int xsi
{
struct process *pr, *qr, *nqr;
struct rusage *rup;
+ int wakeinit = 0, lastthread = 0;
atomic_setbits_int(&p->p_flag, P_WEXIT);
pr = p->p_p;
- /* single-threaded? */
+ /*
+ * For multi-threaded processes, the first thread reaching exit1()
+ * for full process exit (not thread exit) notifies and wait for
+ * it siblings via the single-thread API.
+ * Once they all reached exit1() they wake it up. Then it releases
+ * per-process resources (address space, PID, etc).
+ */
if (!P_HASSIBLING(p)) {
flags = EXIT_NORMAL;
} else {
- /* nope, multi-threaded */
if (flags == EXIT_NORMAL)
single_thread_set(p, SINGLE_EXIT);
}
@@ -157,11 +163,10 @@ exit1(struct proc *p, int xexit, int xsi
refcnt_finalize(&pr->ps_refcnt, "psdtor");
}
- /* unlink ourselves from the active threads */
mtx_enter(&pr->ps_mtx);
- TAILQ_REMOVE(&pr->ps_threads, p, p_thr_link);
- pr->ps_threadcnt--;
pr->ps_exitcnt++;
+ if (pr->ps_exitcnt == pr->ps_threadcnt)
+ lastthread = 1;
/*
* if somebody else wants to take us to single threaded mode
@@ -170,16 +175,9 @@ exit1(struct proc *p, int xexit, int xsi
if (pr->ps_single != NULL || ISSET(pr->ps_flags, PS_STOPPING))
process_suspend_signal(pr);
- /* proc is off ps_threads list so update accounting of process now */
+ /* update accounting of process now */
tuagg_add_runtime();
tuagg_add_process(pr, p);
-
- if ((p->p_flag & P_THREAD) == 0) {
- /* main thread gotta wait because it has the pid, et al */
- while (pr->ps_threadcnt + pr->ps_exitcnt > 1)
- msleep_nsec(&pr->ps_threads, &pr->ps_mtx, PWAIT,
- "thrdeath", INFSLP);
- }
mtx_leave(&pr->ps_mtx);
rup = pr->ps_ru;
@@ -193,7 +191,7 @@ exit1(struct proc *p, int xexit, int xsi
}
}
p->p_siglist = 0;
- if ((p->p_flag & P_THREAD) == 0)
+ if (lastthread)
pr->ps_siglist = 0;
kqpoll_exit();
@@ -202,7 +200,9 @@ exit1(struct proc *p, int xexit, int xsi
kcov_exit(p);
#endif
- if ((p->p_flag & P_THREAD) == 0) {
+ if (lastthread) {
+ struct proc *q, *qn;
+
if (pr->ps_flags & PS_PROFIL)
stopprofclock(pr);
@@ -241,6 +241,32 @@ exit1(struct proc *p, int xexit, int xsi
*/
if (pr->ps_pptr->ps_sigacts->ps_sigflags & SAS_NOCLDWAIT)
atomic_setbits_int(&pr->ps_flags, PS_NOZOMBIE);
+
+ /*
+ * Free the VM resources we're still holding on to.
+ * We must do this from a valid thread because doing
+ * so may block.
+ */
+#ifdef MULTIPROCESSOR
+ __mp_release_all(&kernel_lock);
+#endif
+ uvm_exit(pr);
+ KERNEL_LOCK();
+
+ /* Free siblings. */
+ TAILQ_FOREACH_SAFE(q, &pr->ps_threads, p_thr_link, qn) {
+ if (q == curproc)
+ continue;
+ mtx_enter(&pr->ps_mtx);
+ TAILQ_REMOVE(&pr->ps_threads, q, p_thr_link);
+ pr->ps_threadcnt--;
+ pr->ps_exitcnt--;
+ /* account the remainder of time spent in exit1() */
+ tuagg_add_process(pr, q);
+ mtx_leave(&pr->ps_mtx);
+ WITNESS_THREAD_EXIT(q);
+ proc_free(q);
+ }
}
p->p_fd = NULL; /* zap the thread's copy */
@@ -256,11 +282,7 @@ exit1(struct proc *p, int xexit, int xsi
/*
* Remove proc from pidhash chain and allproc so looking
- * it up won't work. We will put the proc on the
- * deadproc list later (using the p_hash member), and
- * wake up the reaper when we do. If this is the last
- * thread of a process that isn't PS_NOZOMBIE, we'll put
- * the process on the zombprocess list below.
+ * it up won't work.
*/
/*
* NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
@@ -270,7 +292,7 @@ exit1(struct proc *p, int xexit, int xsi
LIST_REMOVE(p, p_hash);
LIST_REMOVE(p, p_list);
- if ((p->p_flag & P_THREAD) == 0) {
+ if (lastthread) {
LIST_REMOVE(pr, ps_hash);
LIST_REMOVE(pr, ps_list);
@@ -291,7 +313,7 @@ exit1(struct proc *p, int xexit, int xsi
*/
qr = LIST_FIRST(&pr->ps_children);
if (qr) /* only need this if any child is S_ZOMB */
- wakeup(initprocess);
+ wakeinit = 1;
for (; qr != NULL; qr = nqr) {
nqr = LIST_NEXT(qr, ps_sibling);
/*
@@ -340,7 +362,7 @@ exit1(struct proc *p, int xexit, int xsi
*/
p->p_pctcpu = 0;
- if ((p->p_flag & P_THREAD) == 0) {
+ if (lastthread) {
/*
* Final thread has died, so add on our children's rusage
* and calculate the total times.
@@ -361,32 +383,36 @@ exit1(struct proc *p, int xexit, int xsi
if (pr->ps_flags & PS_NOZOMBIE) {
struct process *ppr = pr->ps_pptr;
process_reparent(pr, initprocess);
+ wakeinit = 1;
atomic_setbits_int(&ppr->ps_flags, PS_WAITEVENT);
wakeup(ppr);
+ } else {
+ /* Process is now a true zombie. */
+ atomic_setbits_int(&pr->ps_flags, PS_ZOMBIE);
}
mtx_leave(&pr->ps_mtx);
- }
- /* just a thread? check if last one standing. */
- if (p->p_flag & P_THREAD) {
- /* scheduler_wait_hook(pr->ps_mainproc, p); XXX */
- mtx_enter(&pr->ps_mtx);
- pr->ps_exitcnt--;
- if (pr->ps_threadcnt + pr->ps_exitcnt == 1)
- wakeup(&pr->ps_threads);
- mtx_leave(&pr->ps_mtx);
- }
+ /* Notify listeners of our demise and clean up. */
+ KERNEL_ASSERT_LOCKED();
+ knote_processexit(pr);
+
+ if (pr->ps_flags & PS_ZOMBIE) {
+ /* Post SIGCHLD and wake up parent. */
+ prsignal(pr->ps_pptr, SIGCHLD);
+ atomic_setbits_int(&pr->ps_pptr->ps_flags,
+ PS_WAITEVENT);
+ wakeup(pr->ps_pptr);
+ }
- /*
- * Other substructures are freed from reaper and wait().
- */
+ if (wakeinit)
+ wakeup(initprocess);
+ }
/*
- * Finally, call machine-dependent code to switch to a new
- * context (possibly the idle context). Once we are no longer
- * using the dead process's vmspace and stack, exit2() will be
- * called to schedule those resources to be released by the
- * reaper thread.
+ * Finally, call machine-dependent code to free MD per-thread
+ * resources and switch to a new context. Once we are no longer
+ * using the dead process's stack, it will be freed, along with
+ * other substructures from wait().
*
* Note that cpu_exit() will end with a call equivalent to
* cpu_switch(), finishing our execution (pun intended).
@@ -395,120 +421,27 @@ exit1(struct proc *p, int xexit, int xsi
panic("cpu_exit returned");
}
-/*
- * Locking of this proclist is special; it's accessed in a
- * critical section of process exit, and thus locking it can't
- * modify interrupt state. We use a simple spin lock for this
- * proclist. We use the p_hash member to linkup to deadproc.
- */
-struct mutex deadproc_mutex =
- MUTEX_INITIALIZER_FLAGS(IPL_NONE, "deadproc", MTX_NOWITNESS);
-struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc);
-
-/*
- * We are called from sched_idle() once it is safe to schedule the
- * dead process's resources to be freed. So this is not allowed to sleep.
- *
- * We lock the deadproc list, place the proc on that list (using
- * the p_hash member), and wake up the reaper.
- */
-void
-exit2(struct proc *p)
-{
- /* account the remainder of time spent in exit1() */
- mtx_enter(&p->p_p->ps_mtx);
- tuagg_add_process(p->p_p, p);
- mtx_leave(&p->p_p->ps_mtx);
-
- mtx_enter(&deadproc_mutex);
- LIST_INSERT_HEAD(&deadproc, p, p_hash);
- mtx_leave(&deadproc_mutex);
-
- wakeup(&deadproc);
-}
-
void
proc_free(struct proc *p)
{
+ uvm_uarea_free(p);
+ p->p_vmspace = NULL; /* zap the thread's copy */
+
crfree(p->p_ucred);
pool_put(&proc_pool, p);
atomic_dec_int(&nthreads);
}
-/*
- * Process reaper. This is run by a kernel thread to free the resources
- * of a dead process. Once the resources are free, the process becomes
- * a zombie, and the parent is allowed to read the undead's status.
- */
-void
-reaper(void *arg)
-{
- struct proc *p;
-
- KERNEL_UNLOCK();
-
- SCHED_ASSERT_UNLOCKED();
-
- for (;;) {
- mtx_enter(&deadproc_mutex);
- while ((p = LIST_FIRST(&deadproc)) == NULL)
- msleep_nsec(&deadproc, &deadproc_mutex, PVM, "reaper",
- INFSLP);
-
- /* Remove us from the deadproc list. */
- LIST_REMOVE(p, p_hash);
- mtx_leave(&deadproc_mutex);
-
- WITNESS_THREAD_EXIT(p);
-
- /*
- * Free the VM resources we're still holding on to.
- * We must do this from a valid thread because doing
- * so may block.
- */
- uvm_uarea_free(p);
- p->p_vmspace = NULL; /* zap the thread's copy */
-
- if (p->p_flag & P_THREAD) {
- /* Just a thread */
- proc_free(p);
- } else {
- struct process *pr = p->p_p;
-
- /* Release the rest of the process's vmspace */
- uvm_exit(pr);
-
- KERNEL_LOCK();
- if ((pr->ps_flags & PS_NOZOMBIE) == 0) {
- /* Process is now a true zombie. */
- atomic_setbits_int(&pr->ps_flags, PS_ZOMBIE);
- }
-
- /* Notify listeners of our demise and clean up. */
- knote_processexit(pr);
-
- if (pr->ps_flags & PS_ZOMBIE) {
- /* Post SIGCHLD and wake up parent. */
- prsignal(pr->ps_pptr, SIGCHLD);
- atomic_setbits_int(&pr->ps_pptr->ps_flags,
- PS_WAITEVENT);
- wakeup(pr->ps_pptr);
- } else {
- /* No one will wait for us, just zap it. */
- process_zap(pr);
- }
- KERNEL_UNLOCK();
- }
- }
-}
-
int
dowait6(struct proc *q, idtype_t idtype, id_t id, int *statusp, int options,
struct rusage *rusage, siginfo_t *info, register_t *retval)
{
int nfound;
struct process *pr;
- int error;
+ int error, isinit;
+
+ /* init must look at PS_NOZOMBIE to reap them. */
+ isinit = (curproc->p_p == initprocess);
if (info != NULL)
memset(info, 0, sizeof(*info));
@@ -518,7 +451,7 @@ loop:
nfound = 0;
LIST_FOREACH(pr, &q->p_p->ps_children, ps_sibling) {
mtx_enter(&pr->ps_mtx);
- if ((pr->ps_flags & PS_NOZOMBIE) ||
+ if ((!isinit && (pr->ps_flags & PS_NOZOMBIE)) ||
(idtype == P_PID && id != pr->ps_pid) ||
(idtype == P_PGID && id != pr->ps_pgid)) {
mtx_leave(&pr->ps_mtx);
@@ -764,7 +697,7 @@ proc_finish_wait(struct proc *waiter, st
wakeup(tr);
} else {
mtx_leave(&pr->ps_mtx);
- scheduler_wait_hook(waiter, pr->ps_mainproc);
+ scheduler_wait_hook(waiter, TAILQ_FIRST(&pr->ps_threads));
rup = &waiter->p_p->ps_cru;
ruadd(rup, pr->ps_ru);
LIST_REMOVE(pr, ps_list); /* off zombprocess */
@@ -837,7 +770,9 @@ void
process_zap(struct process *pr)
{
struct vnode *otvp;
- struct proc *p = pr->ps_mainproc;
+ struct proc *p = TAILQ_FIRST(&pr->ps_threads);
+
+ TAILQ_REMOVE(&pr->ps_threads, p, p_thr_link);
/*
* Finally finished with old proc entry.
@@ -860,7 +795,7 @@ process_zap(struct process *pr)
if (otvp)
vrele(otvp);
- KASSERT(pr->ps_threadcnt == 0);
+ KASSERT(pr->ps_threadcnt == 1);
KASSERT(pr->ps_exitcnt == 1);
if (pr->ps_ptstat != NULL)
free(pr->ps_ptstat, M_SUBPROC, sizeof(*pr->ps_ptstat));
@@ -872,5 +807,6 @@ process_zap(struct process *pr)
pool_put(&process_pool, pr);
nprocesses--;
+ WITNESS_THREAD_EXIT(p);
proc_free(p);
}
Index: kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
diff -u -p -r1.270 kern_fork.c
--- kern/kern_fork.c 14 Apr 2025 09:15:24 -0000 1.270
+++ kern/kern_fork.c 30 Apr 2025 09:56:57 -0000
@@ -182,7 +182,6 @@ process_initialize(struct process *pr, s
refcnt_init(&pr->ps_refcnt);
/* initialize the thread links */
- pr->ps_mainproc = p;
TAILQ_INIT(&pr->ps_threads);
TAILQ_INSERT_TAIL(&pr->ps_threads, p, p_thr_link);
pr->ps_threadcnt = 1;
Index: kern/kern_rwlock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
diff -u -p -r1.55 kern_rwlock.c
--- kern/kern_rwlock.c 29 Jan 2025 15:10:09 -0000 1.55
+++ kern/kern_rwlock.c 30 Apr 2025 14:27:15 -0000
@@ -232,7 +232,10 @@ rw_do_enter_write(struct rwlock *rwl, in
if (!ISSET(flags, RW_NOSLEEP))
WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL);
#endif
-
+#ifdef DIAGNOSTIC
+ if (!ISSET(flags, RW_NOSLEEP))
+ assertwaitok();
+#endif
owner = rw_cas(&rwl->rwl_owner, 0, self);
if (owner == 0) {
/* wow, we won. so easy */
@@ -351,7 +354,10 @@ rw_do_enter_read(struct rwlock *rwl, int
if (!ISSET(flags, RW_NOSLEEP))
WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL);
#endif
-
+#ifdef DIAGNOSTIC
+ if (!ISSET(flags, RW_NOSLEEP))
+ assertwaitok();
+#endif
owner = rw_cas(&rwl->rwl_owner, 0, RWLOCK_READ_INCR);
if (owner == 0) {
/* ermagerd, we won! */
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
diff -u -p -r1.104 kern_sched.c
--- kern/kern_sched.c 10 Mar 2025 09:28:56 -0000 1.104
+++ kern/kern_sched.c 1 May 2025 13:01:12 -0000
@@ -160,17 +160,10 @@ sched_idle(void *v)
while (1) {
while (!cpu_is_idle(curcpu())) {
- struct proc *dead;
-
SCHED_LOCK();
p->p_stat = SSLEEP;
mi_switch();
SCHED_UNLOCK();
-
- while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
- LIST_REMOVE(dead, p_hash);
- exit2(dead);
- }
}
splassert(IPL_NONE);
@@ -212,10 +205,6 @@ sched_idle(void *v)
void
sched_exit(struct proc *p)
{
- struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
-
- LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
-
tuagg_add_runtime();
KERNEL_ASSERT_LOCKED();
Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
diff -u -p -r1.364 kern_sig.c
--- kern/kern_sig.c 10 Mar 2025 09:28:56 -0000 1.364
+++ kern/kern_sig.c 29 Apr 2025 09:21:10 -0000
@@ -1602,7 +1602,7 @@ process_stop(struct process *pr, int fla
KASSERT(ISSET(p->p_flag, P_SUSPSIG | P_SUSPSINGLE) == 0);
}
- pr->ps_suspendcnt = pr->ps_threadcnt;
+ pr->ps_suspendcnt = (pr->ps_threadcnt - pr->ps_exitcnt);
TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
if (q == p)
continue;
Index: kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.465 kern_sysctl.c
--- kern/kern_sysctl.c 27 Apr 2025 00:58:55 -0000 1.465
+++ kern/kern_sysctl.c 30 Apr 2025 09:57:00 -0000
@@ -2018,7 +2018,7 @@ fill_kproc(struct process *pr, struct ki
isthread = p != NULL;
if (!isthread) {
- p = pr->ps_mainproc; /* XXX */
+ p = TAILQ_FIRST(&pr->ps_threads);
tuagg_get_process(&tu, pr);
} else
tuagg_get_proc(&tu, p);
Index: kern/subr_xxx.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_xxx.c,v
diff -u -p -r1.17 subr_xxx.c
--- kern/subr_xxx.c 17 May 2019 03:53:08 -0000 1.17
+++ kern/subr_xxx.c 30 Apr 2025 13:30:16 -0000
@@ -40,6 +40,7 @@
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/smr.h>
+#include <sys/proc.h>
/*
@@ -163,7 +164,8 @@ assertwaitok(void)
SMR_ASSERT_NONCRITICAL();
#ifdef DIAGNOSTIC
if (curcpu()->ci_mutex_level != 0)
- panic("assertwaitok: non-zero mutex count: %d",
- curcpu()->ci_mutex_level);
+ panic("non-zero mutex count: %d", curcpu()->ci_mutex_level);
+ if (!cold && curproc->p_stat == SDEAD)
+ panic("deads don't sleep");
#endif
}
Index: uvm/uvm_glue.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_glue.c,v
diff -u -p -r1.88 uvm_glue.c
--- uvm/uvm_glue.c 21 Mar 2025 13:19:33 -0000 1.88
+++ uvm/uvm_glue.c 1 May 2025 13:12:08 -0000
@@ -287,13 +287,25 @@ uvm_uarea_free(struct proc *p)
/*
* uvm_exit: exit a virtual address space
+ *
+ * - borrow process0's address space to free the vmspace and pmap
+ * of the dead process.
*/
void
uvm_exit(struct process *pr)
{
struct vmspace *vm = pr->ps_vmspace;
+ int s;
+
+ KERNEL_ASSERT_UNLOCKED();
+ KASSERT(curproc->p_p == pr);
+
+ s = intr_disable();
+ pmap_deactivate(curproc);
+ curproc->p_vmspace = pr->ps_vmspace = process0.ps_vmspace;
+ pmap_activate(curproc);
+ intr_restore(s);
- pr->ps_vmspace = NULL;
uvmspace_free(vm);
}
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
diff -u -p -r1.387 proc.h
--- sys/proc.h 2 May 2025 05:04:38 -0000 1.387
+++ sys/proc.h 2 May 2025 10:27:29 -0000
@@ -154,12 +154,6 @@ struct pinsyscall {
struct process {
struct refcnt ps_refcnt;
- /*
- * ps_mainproc is the original thread in the process.
- * It's only still special for the handling of
- * some signal and ptrace behaviors that need to be fixed.
- */
- struct proc *ps_mainproc;
struct ucred *ps_ucred; /* Process owner's identity. */
LIST_ENTRY(process) ps_list; /* List of all processes. */
@@ -544,7 +538,6 @@ extern struct processlist zombprocess; /
extern struct proclist allproc; /* List of all threads. */
extern struct process *initprocess; /* Process slot for init. */
-extern struct proc *reaperproc; /* Thread slot for reaper. */
extern struct proc *syncerproc; /* filesystem syncer daemon */
extern struct pool process_pool; /* memory pool for processes */
@@ -578,7 +571,6 @@ void setrunnable(struct proc *);
void endtsleep(void *);
int wakeup_proc(struct proc *);
void unsleep(struct proc *);
-void reaper(void *);
__dead void exit1(struct proc *, int, int, int);
void exit2(struct proc *);
void cpu_fork(struct proc *_curp, struct proc *_child, void *_stack,
Faster _exit(2) for a faster userland: R.I.P the reaper