Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
fstat sysctl crashes kernel
To:
tech@openbsd.org
Date:
Thu, 8 Aug 2024 00:49:54 +0200

Download raw body.

Thread
Hi,

There is this long lasting problem that fstat(1) in a loop during
heavy network load can crash the kernel.  Bug is that while traversing
allprocess in sysctl KERN_FILE_BYPID the list may change if fill_file()
is sleeping.  Conclusion at previous hackaton was that the single
sysctl is doing too much.

So I changed fstat to first call kvm_getprocs() getting all pids,
and then kvm_getfiles(KERN_FILE_BYPID) for each pid.  This allows
to abort the list traversal after a matching process has been found.
So we don't have to deal with dead next pointer.  Of course fstat
gets slower as it does multiple sysctl with linear process search,
but slow is better than crash.

It is necessary to lock the current process, otherwise fd_getfile(fdp)
may use a freed struct filedesc.  I am not happy with using
&pr->ps_lock in exit1() as it adds additional sleeping points.  With
the diff below fork+exec, tcpbench, fstat in loops is stable.  To
avoid sleeps in allprocess loop I use RW_SLEEPFAIL.

Note that only KERN_FILE_BYPID with given pid is safe.  After we
remove all other sysctl in userland, we can reject them in the
kernel.

Any idea how we can lock the process in a nicer way?

bluhm

Index: sys/kern/kern_exit.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_exit.c,v
diff -u -p -r1.230 kern_exit.c
--- sys/kern/kern_exit.c	6 Aug 2024 18:41:20 -0000	1.230
+++ sys/kern/kern_exit.c	7 Aug 2024 21:05:12 -0000
@@ -137,7 +137,9 @@ exit1(struct proc *p, int xexit, int xsi
 		if (pr->ps_pid == 1)
 			panic("init died (signal %d, exit %d)", xsig, xexit);
 
+		rw_enter_write(&pr->ps_lock);
 		atomic_setbits_int(&pr->ps_flags, PS_EXITING);
+		rw_exit_write(&pr->ps_lock);
 		pr->ps_xexit = xexit;
 		pr->ps_xsig  = xsig;
 
Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.434 kern_sysctl.c
--- sys/kern/kern_sysctl.c	6 Aug 2024 12:36:54 -0000	1.434
+++ sys/kern/kern_sysctl.c	7 Aug 2024 22:28:56 -0000
@@ -1672,15 +1672,22 @@ sysctl_file(int *name, u_int namelen, ch
 			break;
 		}
 		matched = 0;
+ restart:
 		LIST_FOREACH(pr, &allprocess, ps_list) {
+			if (arg >= 0 && pr->ps_pid != (pid_t)arg) {
+				/* not the pid we are looking for */
+				continue;
+			}
 			/*
 			 * skip system, exiting, embryonic and undead
 			 * processes
 			 */
-			if (pr->ps_flags & (PS_SYSTEM | PS_EMBRYO | PS_EXITING))
-				continue;
-			if (arg > 0 && pr->ps_pid != (pid_t)arg) {
-				/* not the pid we are looking for */
+			if (rw_enter(&pr->ps_lock,
+			    arg >= 0 ? RW_READ | RW_SLEEPFAIL : RW_READ))
+				goto restart;
+			if (ISSET(pr->ps_flags,
+			    PS_SYSTEM | PS_EMBRYO | PS_EXITING)) {
+				rw_exit_read(&pr->ps_lock);
 				continue;
 			}
 			matched = 1;
@@ -1699,6 +1706,10 @@ sysctl_file(int *name, u_int namelen, ch
 				FILLIT(fp, fdp, i, NULL, pr);
 				FRELE(fp, p);
 			}
+			rw_exit_read(&pr->ps_lock);
+			/* pid is unique, stop searching */
+			if (arg >= 0)
+				break;
 		}
 		if (!matched)
 			error = ESRCH;
Index: usr.bin/fstat/fstat.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/usr.bin/fstat/fstat.c,v
diff -u -p -r1.103 fstat.c
--- usr.bin/fstat/fstat.c	20 Jun 2022 01:39:44 -0000	1.103
+++ usr.bin/fstat/fstat.c	7 Aug 2024 20:19:33 -0000
@@ -142,12 +142,13 @@ hide(void *p)
 int
 main(int argc, char *argv[])
 {
+	struct kinfo_proc *kp, *kplast;
 	struct kinfo_file *kf, *kflast;
 	int ch;
 	char *memf, *nlistf, *optstr;
 	char buf[_POSIX2_LINE_MAX];
 	const char *errstr;
-	int cnt, flags;
+	int pcnt, fcnt, flags;
 
 	hideroot = getuid();
 
@@ -282,14 +283,27 @@ main(int argc, char *argv[])
 		checkfile = 1;
 	}
 
-	if (nfilter == 1) {
-		if ((kf = kvm_getfiles(kd, filter[0].what, filter[0].arg,
-		    sizeof(*kf), &cnt)) == NULL)
-			errx(1, "%s", kvm_geterr(kd));
+	if (nfilter == 1 && filter[0].what == KERN_FILE_BYPID) {
+		kp = kvm_getprocs(kd, KERN_PROC_PID, filter[0].arg,
+		    sizeof(*kp), &pcnt);
 	} else {
-		if ((kf = kvm_getfiles(kd, KERN_FILE_BYPID, -1, sizeof(*kf),
-		    &cnt)) == NULL)
-			errx(1, "%s", kvm_geterr(kd));
+		kp = kvm_getprocs(kd, KERN_PROC_ALL, -1, sizeof(*kp), &pcnt);
+	}
+	if (kp == NULL)
+		errx(1, "%s", kvm_geterr(kd));
+	kf = NULL;
+	fcnt = 0;
+	for (kplast = &kp[pcnt]; kp < kplast; ++kp) {
+		struct kinfo_file *kfpid;
+		int cnt;
+
+		if ((kfpid = kvm_getfiles(kd, KERN_FILE_BYPID, kp->p_pid,
+		    sizeof(*kf), &cnt)) == NULL)
+			continue;
+		if ((kf = reallocarray(kf, fcnt + cnt, sizeof(*kf))) == NULL)
+			err(1, NULL);
+		memcpy(&kf[fcnt], kfpid, sizeof(*kf) * cnt);
+		fcnt += cnt;
 	}
 
 	if (fuser) {
@@ -317,10 +331,10 @@ main(int argc, char *argv[])
 			err(1, "pledge");
 	}
 
-	find_splices(kf, cnt);
+	find_splices(kf, fcnt);
 	if (!fuser)
 		fstat_header();
-	for (kflast = &kf[cnt]; kf < kflast; ++kf) {
+	for (kflast = &kf[fcnt]; kf < kflast; ++kf) {
 		if (fuser)
 			fuser_check(kf);
 		else