Index | Thread | Search

From:
Vitaliy Makkoveev <otto@bsdbox.dev>
Subject:
Re: fstat sysctl crashes kernel
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Thu, 8 Aug 2024 02:03:19 +0300

Download raw body.

Thread
> On 8 Aug 2024, at 01:49, Alexander Bluhm <bluhm@openbsd.org> wrote:
> 
> Hi,
> 
> There is this long lasting problem that fstat(1) in a loop during
> heavy network load can crash the kernel.  Bug is that while traversing
> allprocess in sysctl KERN_FILE_BYPID the list may change if fill_file()
> is sleeping.  Conclusion at previous hackaton was that the single
> sysctl is doing too much.
> 
> So I changed fstat to first call kvm_getprocs() getting all pids,
> and then kvm_getfiles(KERN_FILE_BYPID) for each pid.  This allows
> to abort the list traversal after a matching process has been found.
> So we don't have to deal with dead next pointer.  Of course fstat
> gets slower as it does multiple sysctl with linear process search,
> but slow is better than crash.
> 
> It is necessary to lock the current process, otherwise fd_getfile(fdp)
> may use a freed struct filedesc.  I am not happy with using
> &pr->ps_lock in exit1() as it adds additional sleeping points.  With
> the diff below fork+exec, tcpbench, fstat in loops is stable.  To
> avoid sleeps in allprocess loop I use RW_SLEEPFAIL.
> 
> Note that only KERN_FILE_BYPID with given pid is safe.  After we
> remove all other sysctl in userland, we can reject them in the
> kernel.
> 
> Any idea how we can lock the process in a nicer way?
> 

As I remember, when I tried to introduce allprocess rwlock(9), the
only problem was false (?) witness(4) report. I like to follow this
way. I don’t mean various allprocess list iterators, as it was exposed
this was bad idea.

> bluhm
> 
> Index: sys/kern/kern_exit.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_exit.c,v
> diff -u -p -r1.230 kern_exit.c
> --- sys/kern/kern_exit.c	6 Aug 2024 18:41:20 -0000	1.230
> +++ sys/kern/kern_exit.c	7 Aug 2024 21:05:12 -0000
> @@ -137,7 +137,9 @@ exit1(struct proc *p, int xexit, int xsi
> 		if (pr->ps_pid == 1)
> 			panic("init died (signal %d, exit %d)", xsig, xexit);
> 
> +		rw_enter_write(&pr->ps_lock);
> 		atomic_setbits_int(&pr->ps_flags, PS_EXITING);
> +		rw_exit_write(&pr->ps_lock);
> 		pr->ps_xexit = xexit;
> 		pr->ps_xsig  = xsig;
> 
> Index: sys/kern/kern_sysctl.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.434 kern_sysctl.c
> --- sys/kern/kern_sysctl.c	6 Aug 2024 12:36:54 -0000	1.434
> +++ sys/kern/kern_sysctl.c	7 Aug 2024 22:28:56 -0000
> @@ -1672,15 +1672,22 @@ sysctl_file(int *name, u_int namelen, ch
> 			break;
> 		}
> 		matched = 0;
> + restart:
> 		LIST_FOREACH(pr, &allprocess, ps_list) {
> +			if (arg >= 0 && pr->ps_pid != (pid_t)arg) {
> +				/* not the pid we are looking for */
> +				continue;
> +			}
> 			/*
> 			 * skip system, exiting, embryonic and undead
> 			 * processes
> 			 */
> -			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
> -				continue;
> -			if (arg > 0 && pr->ps_pid != (pid_t)arg) {
> -				/* not the pid we are looking for */
> +			if (rw_enter(&pr->ps_lock,
> +			    arg >= 0 ? RW_READ | RW_SLEEPFAIL : RW_READ))
> +				goto restart;
> +			if (ISSET(pr->ps_flags,
> +			    PS_SYSTEM | PS_EMBRYO | PS_EXITING)) {
> +				rw_exit_read(&pr->ps_lock);
> 				continue;
> 			}
> 			matched = 1;
> @@ -1699,6 +1706,10 @@ sysctl_file(int *name, u_int namelen, ch
> 				FILLIT(fp, fdp, i, NULL, pr);
> 				FRELE(fp, p);
> 			}
> +			rw_exit_read(&pr->ps_lock);
> +			/* pid is unique, stop searching */
> +			if (arg >= 0)
> +				break;
> 		}
> 		if (!matched)
> 			error = ESRCH;
> Index: usr.bin/fstat/fstat.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/usr.bin/fstat/fstat.c,v
> diff -u -p -r1.103 fstat.c
> --- usr.bin/fstat/fstat.c	20 Jun 2022 01:39:44 -0000	1.103
> +++ usr.bin/fstat/fstat.c	7 Aug 2024 20:19:33 -0000
> @@ -142,12 +142,13 @@ hide(void *p)
> int
> main(int argc, char *argv[])
> {
> +	struct kinfo_proc *kp, *kplast;
> 	struct kinfo_file *kf, *kflast;
> 	int ch;
> 	char *memf, *nlistf, *optstr;
> 	char buf[_POSIX2_LINE_MAX];
> 	const char *errstr;
> -	int cnt, flags;
> +	int pcnt, fcnt, flags;
> 
> 	hideroot = getuid();
> 
> @@ -282,14 +283,27 @@ main(int argc, char *argv[])
> 		checkfile = 1;
> 	}
> 
> -	if (nfilter == 1) {
> -		if ((kf = kvm_getfiles(kd, filter[0].what, filter[0].arg,
> -		    sizeof(*kf), &cnt)) == NULL)
> -			errx(1, "%s", kvm_geterr(kd));
> +	if (nfilter == 1 && filter[0].what == KERN_FILE_BYPID) {
> +		kp = kvm_getprocs(kd, KERN_PROC_PID, filter[0].arg,
> +		    sizeof(*kp), &pcnt);
> 	} else {
> -		if ((kf = kvm_getfiles(kd, KERN_FILE_BYPID, -1, sizeof(*kf),
> -		    &cnt)) == NULL)
> -			errx(1, "%s", kvm_geterr(kd));
> +		kp = kvm_getprocs(kd, KERN_PROC_ALL, -1, sizeof(*kp), &pcnt);
> +	}
> +	if (kp == NULL)
> +		errx(1, "%s", kvm_geterr(kd));
> +	kf = NULL;
> +	fcnt = 0;
> +	for (kplast = &kp[pcnt]; kp < kplast; ++kp) {
> +		struct kinfo_file *kfpid;
> +		int cnt;
> +
> +		if ((kfpid = kvm_getfiles(kd, KERN_FILE_BYPID, kp->p_pid,
> +		    sizeof(*kf), &cnt)) == NULL)
> +			continue;
> +		if ((kf = reallocarray(kf, fcnt + cnt, sizeof(*kf))) == NULL)
> +			err(1, NULL);
> +		memcpy(&kf[fcnt], kfpid, sizeof(*kf) * cnt);
> +		fcnt += cnt;
> 	}
> 
> 	if (fuser) {
> @@ -317,10 +331,10 @@ main(int argc, char *argv[])
> 			err(1, "pledge");
> 	}
> 
> -	find_splices(kf, cnt);
> +	find_splices(kf, fcnt);
> 	if (!fuser)
> 		fstat_header();
> -	for (kflast = &kf[cnt]; kf < kflast; ++kf) {
> +	for (kflast = &kf[fcnt]; kf < kflast; ++kf) {
> 		if (fuser)
> 			fuser_check(kf);
> 		else
>