Download raw body.
fstat sysctl crashes kernel
> On 8 Aug 2024, at 01:49, Alexander Bluhm <bluhm@openbsd.org> wrote:
>
> Hi,
>
> There is this long lasting problem that fstat(1) in a loop during
> heavy network load can crash the kernel. Bug is that while traversing
> allprocess in sysctl KERN_FILE_BYPID the list may change if fill_file()
> is sleeping. Conclusion at previous hackaton was that the single
> sysctl is doing too much.
>
> So I changed fstat to first call kvm_getprocs() getting all pids,
> and then kvm_getfiles(KERN_FILE_BYPID) for each pid. This allows
> to abort the list traversal after a matching process has been found.
> So we don't have to deal with dead next pointer. Of course fstat
> gets slower as it does multiple sysctl with linear process search,
> but slow is better than crash.
>
> It is necessary to lock the current process, otherwise fd_getfile(fdp)
> may use a freed struct filedesc. I am not happy with using
> &pr->ps_lock in exit1() as it adds additional sleeping points. With
> the diff below fork+exec, tcpbench, fstat in loops is stable. To
> avoid sleeps in allprocess loop I use RW_SLEEPFAIL.
>
> Note that only KERN_FILE_BYPID with given pid is safe. After we
> remove all other sysctl in userland, we can reject them in the
> kernel.
>
> Any idea how we can lock the process in a nicer way?
>
As I remember, when I tried to introduce allprocess rwlock(9), the
only problem was false (?) witness(4) report. I like to follow this
way. I don’t mean various allprocess list iterators, as it was exposed
this was bad idea.
> bluhm
>
> Index: sys/kern/kern_exit.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_exit.c,v
> diff -u -p -r1.230 kern_exit.c
> --- sys/kern/kern_exit.c 6 Aug 2024 18:41:20 -0000 1.230
> +++ sys/kern/kern_exit.c 7 Aug 2024 21:05:12 -0000
> @@ -137,7 +137,9 @@ exit1(struct proc *p, int xexit, int xsi
> if (pr->ps_pid == 1)
> panic("init died (signal %d, exit %d)", xsig, xexit);
>
> + rw_enter_write(&pr->ps_lock);
> atomic_setbits_int(&pr->ps_flags, PS_EXITING);
> + rw_exit_write(&pr->ps_lock);
> pr->ps_xexit = xexit;
> pr->ps_xsig = xsig;
>
> Index: sys/kern/kern_sysctl.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.434 kern_sysctl.c
> --- sys/kern/kern_sysctl.c 6 Aug 2024 12:36:54 -0000 1.434
> +++ sys/kern/kern_sysctl.c 7 Aug 2024 22:28:56 -0000
> @@ -1672,15 +1672,22 @@ sysctl_file(int *name, u_int namelen, ch
> break;
> }
> matched = 0;
> + restart:
> LIST_FOREACH(pr, &allprocess, ps_list) {
> + if (arg >= 0 && pr->ps_pid != (pid_t)arg) {
> + /* not the pid we are looking for */
> + continue;
> + }
> /*
> * skip system, exiting, embryonic and undead
> * processes
> */
> - if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
> - continue;
> - if (arg > 0 && pr->ps_pid != (pid_t)arg) {
> - /* not the pid we are looking for */
> + if (rw_enter(&pr->ps_lock,
> + arg >= 0 ? RW_READ | RW_SLEEPFAIL : RW_READ))
> + goto restart;
> + if (ISSET(pr->ps_flags,
> + PS_SYSTEM | PS_EMBRYO | PS_EXITING)) {
> + rw_exit_read(&pr->ps_lock);
> continue;
> }
> matched = 1;
> @@ -1699,6 +1706,10 @@ sysctl_file(int *name, u_int namelen, ch
> FILLIT(fp, fdp, i, NULL, pr);
> FRELE(fp, p);
> }
> + rw_exit_read(&pr->ps_lock);
> + /* pid is unique, stop searching */
> + if (arg >= 0)
> + break;
> }
> if (!matched)
> error = ESRCH;
> Index: usr.bin/fstat/fstat.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/usr.bin/fstat/fstat.c,v
> diff -u -p -r1.103 fstat.c
> --- usr.bin/fstat/fstat.c 20 Jun 2022 01:39:44 -0000 1.103
> +++ usr.bin/fstat/fstat.c 7 Aug 2024 20:19:33 -0000
> @@ -142,12 +142,13 @@ hide(void *p)
> int
> main(int argc, char *argv[])
> {
> + struct kinfo_proc *kp, *kplast;
> struct kinfo_file *kf, *kflast;
> int ch;
> char *memf, *nlistf, *optstr;
> char buf[_POSIX2_LINE_MAX];
> const char *errstr;
> - int cnt, flags;
> + int pcnt, fcnt, flags;
>
> hideroot = getuid();
>
> @@ -282,14 +283,27 @@ main(int argc, char *argv[])
> checkfile = 1;
> }
>
> - if (nfilter == 1) {
> - if ((kf = kvm_getfiles(kd, filter[0].what, filter[0].arg,
> - sizeof(*kf), &cnt)) == NULL)
> - errx(1, "%s", kvm_geterr(kd));
> + if (nfilter == 1 && filter[0].what == KERN_FILE_BYPID) {
> + kp = kvm_getprocs(kd, KERN_PROC_PID, filter[0].arg,
> + sizeof(*kp), &pcnt);
> } else {
> - if ((kf = kvm_getfiles(kd, KERN_FILE_BYPID, -1, sizeof(*kf),
> - &cnt)) == NULL)
> - errx(1, "%s", kvm_geterr(kd));
> + kp = kvm_getprocs(kd, KERN_PROC_ALL, -1, sizeof(*kp), &pcnt);
> + }
> + if (kp == NULL)
> + errx(1, "%s", kvm_geterr(kd));
> + kf = NULL;
> + fcnt = 0;
> + for (kplast = &kp[pcnt]; kp < kplast; ++kp) {
> + struct kinfo_file *kfpid;
> + int cnt;
> +
> + if ((kfpid = kvm_getfiles(kd, KERN_FILE_BYPID, kp->p_pid,
> + sizeof(*kf), &cnt)) == NULL)
> + continue;
> + if ((kf = reallocarray(kf, fcnt + cnt, sizeof(*kf))) == NULL)
> + err(1, NULL);
> + memcpy(&kf[fcnt], kfpid, sizeof(*kf) * cnt);
> + fcnt += cnt;
> }
>
> if (fuser) {
> @@ -317,10 +331,10 @@ main(int argc, char *argv[])
> err(1, "pledge");
> }
>
> - find_splices(kf, cnt);
> + find_splices(kf, fcnt);
> if (!fuser)
> fstat_header();
> - for (kflast = &kf[cnt]; kf < kflast; ++kf) {
> + for (kflast = &kf[fcnt]; kf < kflast; ++kf) {
> if (fuser)
> fuser_check(kf);
> else
>
fstat sysctl crashes kernel