Download raw body.
On Thu, Aug 08, 2024 at 02:03:19AM +0300, Vitaliy Makkoveev wrote:
> > On 8 Aug 2024, at 01:49, Alexander Bluhm <bluhm@openbsd.org> wrote:
> >
> > Hi,
> >
> > There is this long lasting problem that fstat(1) in a loop during
> > heavy network load can crash the kernel. Bug is that while traversing
> > allprocess in sysctl KERN_FILE_BYPID the list may change if fill_file()
> > is sleeping. Conclusion at previous hackaton was that the single
> > sysctl is doing too much.
> >
> > So I changed fstat to first call kvm_getprocs() getting all pids,
> > and then kvm_getfiles(KERN_FILE_BYPID) for each pid. This allows
> > to abort the list traversal after a matching process has been found.
> > So we don't have to deal with dead next pointer. Of course fstat
> > gets slower as it does multiple sysctl with linear process search,
> > but slow is better than crash.
> >
> > It is necessary to lock the current process, otherwise fd_getfile(fdp)
> > may use a freed struct filedesc. I am not happy with using
> > &pr->ps_lock in exit1() as it adds additional sleeping points. With
> > the diff below fork+exec, tcpbench, fstat in loops is stable. To
> > avoid sleeps in allprocess loop I use RW_SLEEPFAIL.
> >
> > Note that only KERN_FILE_BYPID with given pid is safe. After we
> > remove all other sysctl in userland, we can reject them in the
> > kernel.
> >
> > Any idea how we can lock the process in a nicer way?
> >
>
> As I remember, when I tried to introduce allprocess rwlock(9), the
> only problem was false (?) witness(4) report. I like to follow this
> way. I don’t mean various allprocess list iterators, as it was exposed
> this was bad idea.
>
The minimal diff. Since both fork1() and exit1() are kernel locked, only
sysctl_file() `allprocess' loops protected with `allprocess_lock'
rwlock(9).
Continious "make -j 16" with simultaneous fstat(1) loops and some
network load are stable on my amd64.
Index: sys/kern/kern_exit.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_exit.c,v
diff -u -p -r1.230 kern_exit.c
--- sys/kern/kern_exit.c 6 Aug 2024 18:41:20 -0000 1.230
+++ sys/kern/kern_exit.c 8 Aug 2024 10:55:09 -0000
@@ -210,6 +210,8 @@ exit1(struct proc *p, int xexit, int xsi
#endif
if ((p->p_flag & P_THREAD) == 0) {
+ rw_enter_write(&allprocess_lock);
+
if (pr->ps_flags & PS_PROFIL)
stopprofclock(pr);
@@ -291,6 +293,8 @@ exit1(struct proc *p, int xexit, int xsi
*/
freepid(pr->ps_pid);
}
+
+ rw_exit_write(&allprocess_lock);
/*
* Reparent children to their original parent, in case
Index: sys/kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
diff -u -p -r1.261 kern_fork.c
--- sys/kern/kern_fork.c 6 Aug 2024 08:44:54 -0000 1.261
+++ sys/kern/kern_fork.c 8 Aug 2024 10:55:09 -0000
@@ -392,6 +392,8 @@ fork1(struct proc *curp, int flags, void
return (ENOMEM);
}
+ rw_enter_write(&allprocess_lock);
+
/*
* From now on, we're committed to the fork and cannot fail.
*/
@@ -503,6 +505,14 @@ fork1(struct proc *curp, int flags, void
*rnewprocp = p;
/*
+ * Return child pid to parent process
+ */
+ if (retval != NULL)
+ *retval = pr->ps_pid;
+
+ rw_exit_write(&allprocess_lock);
+
+ /*
* Preserve synchronization semantics of vfork. If waiting for
* child to exec or exit, set PS_PPWAIT on child and PS_ISPWAIT
* on ourselves, and sleep on our process for the latter flag
@@ -519,11 +529,6 @@ fork1(struct proc *curp, int flags, void
if ((flags & FORK_PTRACE) && (curpr->ps_flags & PS_TRACED))
psignal(curp, SIGTRAP);
- /*
- * Return child pid to parent process
- */
- if (retval != NULL)
- *retval = pr->ps_pid;
return (0);
}
Index: sys/kern/kern_proc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_proc.c,v
diff -u -p -r1.99 kern_proc.c
--- sys/kern/kern_proc.c 8 Jul 2024 13:17:12 -0000 1.99
+++ sys/kern/kern_proc.c 8 Aug 2024 10:55:09 -0000
@@ -49,6 +49,7 @@
* U uidinfolk
*/
+struct rwlock allprocess_lock;
struct rwlock uidinfolk;
#define UIHASH(uid) (&uihashtbl[(uid) & uihash])
LIST_HEAD(uihashhead, uidinfo) *uihashtbl; /* [U] */
@@ -92,6 +93,7 @@ procinit(void)
LIST_INIT(&zombprocess);
LIST_INIT(&allproc);
+ rw_init(&allprocess_lock, "allpslk");
rw_init(&uidinfolk, "uidinfo");
tidhashtbl = hashinit(maxthread / 4, M_PROC, M_NOWAIT, &tidhash);
Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.435 kern_sysctl.c
--- sys/kern/kern_sysctl.c 8 Aug 2024 10:25:00 -0000 1.435
+++ sys/kern/kern_sysctl.c 8 Aug 2024 10:55:09 -0000
@@ -1675,6 +1675,7 @@ sysctl_file(int *name, u_int namelen, ch
break;
}
matched = 0;
+ rw_enter_read(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list) {
/*
* skip system, exiting, embryonic and undead
@@ -1703,10 +1704,12 @@ sysctl_file(int *name, u_int namelen, ch
FRELE(fp, p);
}
}
+ rw_exit_read(&allprocess_lock);
if (!matched)
error = ESRCH;
break;
case KERN_FILE_BYUID:
+ rw_enter_read(&allprocess_lock);
LIST_FOREACH(pr, &allprocess, ps_list) {
/*
* skip system, exiting, embryonic and undead
@@ -1732,6 +1735,7 @@ sysctl_file(int *name, u_int namelen, ch
FRELE(fp, p);
}
}
+ rw_exit_read(&allprocess_lock);
break;
default:
error = EINVAL;
Index: sys/sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
diff -u -p -r1.367 proc.h
--- sys/sys/proc.h 6 Aug 2024 08:44:54 -0000 1.367
+++ sys/sys/proc.h 8 Aug 2024 10:55:09 -0000
@@ -503,6 +503,8 @@ void uid_release(struct uidinfo *);
#define EXIT_THREAD 0x00000002
#define EXIT_THREAD_NOCHECK 0x00000003
+extern struct rwlock allprocess_lock;
+
#define TIDHASH(tid) (&tidhashtbl[(tid) & tidhash])
extern LIST_HEAD(tidhashhead, proc) *tidhashtbl;
extern u_long tidhash;