From: Alexander Bluhm Subject: Re: fstat sysctl crashes kernel To: tech@openbsd.org Date: Thu, 8 Aug 2024 19:46:37 +0200 On Thu, Aug 08, 2024 at 09:47:27AM -0600, Theo de Raadt wrote: > Todd C. Miller wrote: > > > On Thu, 08 Aug 2024 13:21:54 +0200, Alexander Bluhm wrote: > > > > > Independent of that, can we commit the chunk below? When searching > > > for a specific process, there is not need to traverse the list to > > > the end. Also I check for arg >= 0 as this is cosistent with the > > > arg < -1 check before. Then you don't have to think about what is > > > special about process 0. There is no functional difference as > > > process 0 has PS_SYSTEM set and is skipped anyway. > > > > Yes, this makes sense. OK millert@ > > No matter what we do, we don't get an atomic view. Punting the problem > to userland sidesteps the difficult problem in the kernel. For me is important that we commit something instead of discussing pro and con of various diffs. The idea with the marker sounds like the correct thing, but adds a lot of complexity. mvs@ already worked on something simmilar. The easiest fix without locking concerns is my userland fstat diff. Locking the whole process list while doing copyout over NFS may open another can of worms. Both bluhm@ and mvs@ diff survive my stress test for hours. Usually the test crashes or hangs within minutes. Currently I am testing with witness to find fallout. Below is my diff merged with current. bluhm Index: sys/kern/kern_exit.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_exit.c,v diff -u -p -r1.230 kern_exit.c --- sys/kern/kern_exit.c 6 Aug 2024 18:41:20 -0000 1.230 +++ sys/kern/kern_exit.c 8 Aug 2024 17:20:17 -0000 @@ -137,7 +137,9 @@ exit1(struct proc *p, int xexit, int xsi if (pr->ps_pid == 1) panic("init died (signal %d, exit %d)", xsig, xexit); + rw_enter_write(&pr->ps_lock); atomic_setbits_int(&pr->ps_flags, PS_EXITING); + rw_exit_write(&pr->ps_lock); pr->ps_xexit = xexit; pr->ps_xsig = xsig; Index: sys/kern/kern_sysctl.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v diff -u -p -r1.436 kern_sysctl.c --- sys/kern/kern_sysctl.c 8 Aug 2024 15:02:36 -0000 1.436 +++ sys/kern/kern_sysctl.c 8 Aug 2024 17:20:17 -0000 @@ -1675,15 +1675,22 @@ sysctl_file(int *name, u_int namelen, ch break; } matched = 0; + restart: LIST_FOREACH(pr, &allprocess, ps_list) { + if (arg >= 0 && pr->ps_pid != (pid_t)arg) { + /* not the pid we are looking for */ + continue; + } /* * skip system, exiting, embryonic and undead * processes */ - if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING)) - continue; - if (arg >= 0 && pr->ps_pid != (pid_t)arg) { - /* not the pid we are looking for */ + if (rw_enter(&pr->ps_lock, + arg >= 0 ? RW_READ | RW_SLEEPFAIL : RW_READ)) + goto restart; + if (ISSET(pr->ps_flags, + PS_SYSTEM | PS_EMBRYO | PS_EXITING)) { + rw_exit_read(&pr->ps_lock); continue; } matched = 1; @@ -1702,6 +1709,7 @@ sysctl_file(int *name, u_int namelen, ch FILLIT(fp, fdp, i, NULL, pr); FRELE(fp, p); } + rw_exit_read(&pr->ps_lock); /* pid is unique, stop searching */ if (arg >= 0) break; Index: usr.bin/fstat/fstat.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/usr.bin/fstat/fstat.c,v diff -u -p -r1.103 fstat.c --- usr.bin/fstat/fstat.c 20 Jun 2022 01:39:44 -0000 1.103 +++ usr.bin/fstat/fstat.c 7 Aug 2024 20:19:33 -0000 @@ -142,12 +142,13 @@ hide(void *p) int main(int argc, char *argv[]) { + struct kinfo_proc *kp, *kplast; struct kinfo_file *kf, *kflast; int ch; char *memf, *nlistf, *optstr; char buf[_POSIX2_LINE_MAX]; const char *errstr; - int cnt, flags; + int pcnt, fcnt, flags; hideroot = getuid(); @@ -282,14 +283,27 @@ main(int argc, char *argv[]) checkfile = 1; } - if (nfilter == 1) { - if ((kf = kvm_getfiles(kd, filter[0].what, filter[0].arg, - sizeof(*kf), &cnt)) == NULL) - errx(1, "%s", kvm_geterr(kd)); + if (nfilter == 1 && filter[0].what == KERN_FILE_BYPID) { + kp = kvm_getprocs(kd, KERN_PROC_PID, filter[0].arg, + sizeof(*kp), &pcnt); } else { - if ((kf = kvm_getfiles(kd, KERN_FILE_BYPID, -1, sizeof(*kf), - &cnt)) == NULL) - errx(1, "%s", kvm_geterr(kd)); + kp = kvm_getprocs(kd, KERN_PROC_ALL, -1, sizeof(*kp), &pcnt); + } + if (kp == NULL) + errx(1, "%s", kvm_geterr(kd)); + kf = NULL; + fcnt = 0; + for (kplast = &kp[pcnt]; kp < kplast; ++kp) { + struct kinfo_file *kfpid; + int cnt; + + if ((kfpid = kvm_getfiles(kd, KERN_FILE_BYPID, kp->p_pid, + sizeof(*kf), &cnt)) == NULL) + continue; + if ((kf = reallocarray(kf, fcnt + cnt, sizeof(*kf))) == NULL) + err(1, NULL); + memcpy(&kf[fcnt], kfpid, sizeof(*kf) * cnt); + fcnt += cnt; } if (fuser) { @@ -317,10 +331,10 @@ main(int argc, char *argv[]) err(1, "pledge"); } - find_splices(kf, cnt); + find_splices(kf, fcnt); if (!fuser) fstat_header(); - for (kflast = &kf[cnt]; kf < kflast; ++kf) { + for (kflast = &kf[fcnt]; kf < kflast; ++kf) { if (fuser) fuser_check(kf); else