Download raw body.
fstat sysctl crashes kernel
> On 8 Aug 2024, at 20:46, Alexander Bluhm <bluhm@openbsd.org> wrote:
>
> On Thu, Aug 08, 2024 at 09:47:27AM -0600, Theo de Raadt wrote:
>> Todd C. Miller <millert@openbsd.org> wrote:
>>
>>> On Thu, 08 Aug 2024 13:21:54 +0200, Alexander Bluhm wrote:
>>>
>>>> Independent of that, can we commit the chunk below? When searching
>>>> for a specific process, there is not need to traverse the list to
>>>> the end. Also I check for arg >= 0 as this is cosistent with the
>>>> arg < -1 check before. Then you don't have to think about what is
>>>> special about process 0. There is no functional difference as
>>>> process 0 has PS_SYSTEM set and is skipped anyway.
>>>
>>> Yes, this makes sense. OK millert@
>>
>> No matter what we do, we don't get an atomic view. Punting the problem
>> to userland sidesteps the difficult problem in the kernel.
>
> For me is important that we commit something instead of discussing
> pro and con of various diffs.
>
> The idea with the marker sounds like the correct thing, but adds a
> lot of complexity. mvs@ already worked on something simmilar. The
> easiest fix without locking concerns is my userland fstat diff.
> Locking the whole process list while doing copyout over NFS may
> open another can of worms.
>
Userland pages are wired at this moment, you can’t stuck in swap
on nfs here, sysctl_vslock() cares about that.
> Both bluhm@ and mvs@ diff survive my stress test for hours. Usually
> the test crashes or hangs within minutes. Currently I am testing
> with witness to find fallout.
>
Nice to hear. Personal me, I prefer allprocess rwlock(9). It is much
simpler and left userland as is. Also, is allows to remove kernel
lock around some process stuff like prfind(). For example, from the
filt_procattach() where claudio@ left it to make ps dereference safe.
Also, as I can see, others have similar lock around their process
stuff, so this could be good practice.
> Below is my diff merged with current.
>
> bluhm
>
> Index: sys/kern/kern_exit.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_exit.c,v
> diff -u -p -r1.230 kern_exit.c
> --- sys/kern/kern_exit.c 6 Aug 2024 18:41:20 -0000 1.230
> +++ sys/kern/kern_exit.c 8 Aug 2024 17:20:17 -0000
> @@ -137,7 +137,9 @@ exit1(struct proc *p, int xexit, int xsi
> if (pr->ps_pid == 1)
> panic("init died (signal %d, exit %d)", xsig, xexit);
>
> + rw_enter_write(&pr->ps_lock);
> atomic_setbits_int(&pr->ps_flags, PS_EXITING);
> + rw_exit_write(&pr->ps_lock);
> pr->ps_xexit = xexit;
> pr->ps_xsig = xsig;
>
> Index: sys/kern/kern_sysctl.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.436 kern_sysctl.c
> --- sys/kern/kern_sysctl.c 8 Aug 2024 15:02:36 -0000 1.436
> +++ sys/kern/kern_sysctl.c 8 Aug 2024 17:20:17 -0000
> @@ -1675,15 +1675,22 @@ sysctl_file(int *name, u_int namelen, ch
> break;
> }
> matched = 0;
> + restart:
> LIST_FOREACH(pr, &allprocess, ps_list) {
> + if (arg >= 0 && pr->ps_pid != (pid_t)arg) {
> + /* not the pid we are looking for */
> + continue;
> + }
> /*
> * skip system, exiting, embryonic and undead
> * processes
> */
> - if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
> - continue;
> - if (arg >= 0 && pr->ps_pid != (pid_t)arg) {
> - /* not the pid we are looking for */
> + if (rw_enter(&pr->ps_lock,
> + arg >= 0 ? RW_READ | RW_SLEEPFAIL : RW_READ))
> + goto restart;
> + if (ISSET(pr->ps_flags,
> + PS_SYSTEM | PS_EMBRYO | PS_EXITING)) {
> + rw_exit_read(&pr->ps_lock);
> continue;
> }
> matched = 1;
> @@ -1702,6 +1709,7 @@ sysctl_file(int *name, u_int namelen, ch
> FILLIT(fp, fdp, i, NULL, pr);
> FRELE(fp, p);
> }
> + rw_exit_read(&pr->ps_lock);
> /* pid is unique, stop searching */
> if (arg >= 0)
> break;
> Index: usr.bin/fstat/fstat.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/usr.bin/fstat/fstat.c,v
> diff -u -p -r1.103 fstat.c
> --- usr.bin/fstat/fstat.c 20 Jun 2022 01:39:44 -0000 1.103
> +++ usr.bin/fstat/fstat.c 7 Aug 2024 20:19:33 -0000
> @@ -142,12 +142,13 @@ hide(void *p)
> int
> main(int argc, char *argv[])
> {
> + struct kinfo_proc *kp, *kplast;
> struct kinfo_file *kf, *kflast;
> int ch;
> char *memf, *nlistf, *optstr;
> char buf[_POSIX2_LINE_MAX];
> const char *errstr;
> - int cnt, flags;
> + int pcnt, fcnt, flags;
>
> hideroot = getuid();
>
> @@ -282,14 +283,27 @@ main(int argc, char *argv[])
> checkfile = 1;
> }
>
> - if (nfilter == 1) {
> - if ((kf = kvm_getfiles(kd, filter[0].what, filter[0].arg,
> - sizeof(*kf), &cnt)) == NULL)
> - errx(1, "%s", kvm_geterr(kd));
> + if (nfilter == 1 && filter[0].what == KERN_FILE_BYPID) {
> + kp = kvm_getprocs(kd, KERN_PROC_PID, filter[0].arg,
> + sizeof(*kp), &pcnt);
> } else {
> - if ((kf = kvm_getfiles(kd, KERN_FILE_BYPID, -1, sizeof(*kf),
> - &cnt)) == NULL)
> - errx(1, "%s", kvm_geterr(kd));
> + kp = kvm_getprocs(kd, KERN_PROC_ALL, -1, sizeof(*kp), &pcnt);
> + }
> + if (kp == NULL)
> + errx(1, "%s", kvm_geterr(kd));
> + kf = NULL;
> + fcnt = 0;
> + for (kplast = &kp[pcnt]; kp < kplast; ++kp) {
> + struct kinfo_file *kfpid;
> + int cnt;
> +
> + if ((kfpid = kvm_getfiles(kd, KERN_FILE_BYPID, kp->p_pid,
> + sizeof(*kf), &cnt)) == NULL)
> + continue;
> + if ((kf = reallocarray(kf, fcnt + cnt, sizeof(*kf))) == NULL)
> + err(1, NULL);
> + memcpy(&kf[fcnt], kfpid, sizeof(*kf) * cnt);
> + fcnt += cnt;
> }
>
> if (fuser) {
> @@ -317,10 +331,10 @@ main(int argc, char *argv[])
> err(1, "pledge");
> }
>
> - find_splices(kf, cnt);
> + find_splices(kf, fcnt);
> if (!fuser)
> fstat_header();
> - for (kflast = &kf[cnt]; kf < kflast; ++kf) {
> + for (kflast = &kf[fcnt]; kf < kflast; ++kf) {
> if (fuser)
> fuser_check(kf);
> else
>
fstat sysctl crashes kernel