Index | Thread | Search

From:
Martin Pieuchot <mpi@grenadille.net>
Subject:
Re: btrace, identifying syscall+0x5c4
To:
tech <tech@openbsd.org>
Date:
Thu, 5 Dec 2024 10:21:59 +0100

Download raw body.

Thread
On 04/12/24(Wed) 12:03, Stuart Henderson wrote:
> I just found a nice stat that I had forgotten was logged - poll time
> for the complete run of devices that librenms is monitoring (runtime of
> all poller processes added together).
> 
> looking at this from before running with the unlock diffs, and comparing
> to after (before i dropped the number of concurrent processes), this went
> from around 950 seconds to around 700.
> 
> (and further to 600 after i lowered the concurrency).

Thanks for testing.

Here's an updated diff that protects ktrnamei() with KERNEL_LOCK().
That was a bug in the previous versions and I guess that's what you
triggered.

Index: kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
diff -u -p -r1.265 syscalls.master
--- kern/syscalls.master	2 Aug 2024 14:34:45 -0000	1.265
+++ kern/syscalls.master	28 Nov 2024 18:55:36 -0000
@@ -52,7 +52,7 @@
 3	STD NOLOCK	{ ssize_t sys_read(int fd, void *buf, size_t nbyte); }
 4	STD NOLOCK	{ ssize_t sys_write(int fd, const void *buf, \
 			    size_t nbyte); }
-5	STD		{ int sys_open(const char *path, \
+5	STD NOLOCK	{ int sys_open(const char *path, \
 			    int flags, ... mode_t mode); }
 6	STD NOLOCK	{ int sys_close(int fd); }
 7	STD NOLOCK	{ int sys_getentropy(void *buf, size_t nbyte); }
@@ -104,11 +104,11 @@
 35	STD		{ int sys_fchflags(int fd, u_int flags); }
 36	STD		{ void sys_sync(void); }
 37	OBSOL		msyscall
-38	STD		{ int sys_stat(const char *path, struct stat *ub); }
+38	STD NOLOCK	{ int sys_stat(const char *path, struct stat *ub); }
 39	STD NOLOCK	{ pid_t sys_getppid(void); }
-40	STD		{ int sys_lstat(const char *path, struct stat *ub); }
+40	STD NOLOCK	{ int sys_lstat(const char *path, struct stat *ub); }
 41	STD NOLOCK	{ int sys_dup(int fd); }
-42	STD		{ int sys_fstatat(int fd, const char *path, \
+42	STD NOLOCK	{ int sys_fstatat(int fd, const char *path, \
 			    struct stat *buf, int flag); }
 43	STD NOLOCK	{ gid_t sys_getegid(void); }
 44	STD		{ int sys_profil(caddr_t samples, size_t size, \
@@ -133,7 +133,7 @@
 51	UNIMPL		acct
 #endif
 52	STD		{ int sys_sigpending(void); }
-53	STD		{ int sys_fstat(int fd, struct stat *sb); }
+53	STD NOLOCK	{ int sys_fstat(int fd, struct stat *sb); }
 54	STD NOLOCK	{ int sys_ioctl(int fd, \
 			    u_long com, ... void *data); }
 55	STD		{ int sys_reboot(int opt); }
@@ -240,7 +240,7 @@
 113	UNIMPL		fktrace
 114	STD 		{ int sys_unveil(const char *path, \
 			    const char *permissions); }
-115	STD		{ int sys___realpath(const char *pathname, \
+115	STD NOLOCK	{ int sys___realpath(const char *pathname, \
 			    char *resolved); }
 116	STD NOLOCK	{ int sys_recvmmsg(int s, struct mmsghdr *mmsg, \
 			    unsigned int vlen, int flags, \
@@ -553,7 +553,7 @@
 			    mode_t mode); }
 320	STD		{ int sys_mknodat(int fd, const char *path, \
 			    mode_t mode, dev_t dev); }
-321	STD		{ int sys_openat(int fd, const char *path, int flags, \
+321	STD NOLOCK	{ int sys_openat(int fd, const char *path, int flags, \
 			    ... mode_t mode); }
 322	STD		{ ssize_t sys_readlinkat(int fd, const char *path, \
 			    char *buf, size_t count); }
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
diff -u -p -r1.370 vfs_syscalls.c
--- kern/vfs_syscalls.c	5 Nov 2024 06:03:19 -0000	1.370
+++ kern/vfs_syscalls.c	28 Nov 2024 18:19:24 -0000
@@ -894,8 +894,10 @@ sys___realpath(struct proc *p, void *v, 
 		bp = &cwdbuf[cwdlen - 1];
 		*bp = '\0';
 
+		KERNEL_LOCK();
 		error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, cwdbuf,
 		    cwdlen/2, GETCWD_CHECK_ACCESS, p);
+		KERNEL_UNLOCK();
 
 		if (error) {
 			free(cwdbuf, M_TEMP, cwdlen);
@@ -919,12 +921,16 @@ sys___realpath(struct proc *p, void *v, 
 
 	nd.ni_pledge = PLEDGE_RPATH;
 	nd.ni_unveil = UNVEIL_READ;
-	if ((error = namei(&nd)) != 0)
+	KERNEL_LOCK();
+	if ((error = namei(&nd)) != 0) {
+		KERNEL_UNLOCK();
 		goto end;
+	}
 
 	/* release reference from namei */
 	if (nd.ni_vp)
 		vrele(nd.ni_vp);
+	KERNEL_UNLOCK();
 
 	error = copyoutstr(nd.ni_cnd.cn_rpbuf, SCARG(uap, resolved),
 	    MAXPATHLEN, NULL);
@@ -1120,23 +1126,20 @@ doopenat(struct proc *p, int fd, const c
 		localtrunc = 1;
 		flags &= ~O_TRUNC;	/* Must do truncate ourselves */
 	}
+	KERNEL_LOCK();
 	if ((error = vn_open(&nd, flags, cmode)) != 0) {
 		fdplock(fdp);
 		if (error == ENODEV &&
 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
 		    (error =
 			dupfdopen(p, indx, flags)) == 0) {
-			fdpunlock(fdp);
-			closef(fp, p);
 			*retval = indx;
-			return (error);
+			goto error;
 		}
 		if (error == ERESTART)
 			error = EINTR;
 		fdremove(fdp, indx);
-		fdpunlock(fdp);
-		closef(fp, p);
-		return (error);
+		goto error;
 	}
 	p->p_dupfd = 0;
 	vp = nd.ni_vp;
@@ -1161,9 +1164,7 @@ doopenat(struct proc *p, int fd, const c
 			fdplock(fdp);
 			/* closef will vn_close the file for us. */
 			fdremove(fdp, indx);
-			fdpunlock(fdp);
-			closef(fp, p);
-			return (error);
+			goto error;
 		}
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK);
@@ -1185,18 +1186,22 @@ doopenat(struct proc *p, int fd, const c
 			fdplock(fdp);
 			/* closef will close the file for us. */
 			fdremove(fdp, indx);
-			fdpunlock(fdp);
-			closef(fp, p);
-			return (error);
+			goto error;
 		}
 	}
 	VOP_UNLOCK(vp);
+	KERNEL_UNLOCK();
 	*retval = indx;
 	fdplock(fdp);
 	fdinsert(fdp, indx, cloexec, fp);
 	fdpunlock(fdp);
 	FRELE(fp, p);
 	return (error);
+error:
+	KERNEL_UNLOCK();
+	fdpunlock(fdp);
+	closef(fp, p);
+	return (error);
 }
 
 /*
@@ -2066,10 +2071,14 @@ dofstatat(struct proc *p, int fd, const 
 	NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p);
 	nd.ni_pledge = PLEDGE_RPATH;
 	nd.ni_unveil = UNVEIL_READ;
-	if ((error = namei(&nd)) != 0)
+	KERNEL_LOCK();
+	if ((error = namei(&nd)) != 0) {
+		KERNEL_UNLOCK();
 		return (error);
+	}
 	error = vn_stat(nd.ni_vp, &sb, p);
 	vput(nd.ni_vp);
+	KERNEL_UNLOCK();
 	if (error)
 		return (error);
 	/* Don't let non-root see generation numbers (for NFS security) */
Index: kern/vfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_vnops.c,v
diff -u -p -r1.123 vfs_vnops.c
--- kern/vfs_vnops.c	5 Nov 2024 06:03:19 -0000	1.123
+++ kern/vfs_vnops.c	2 Dec 2024 16:42:23 -0000
@@ -427,7 +427,13 @@ int
 vn_statfile(struct file *fp, struct stat *sb, struct proc *p)
 {
 	struct vnode *vp = fp->f_data;
-	return vn_stat(vp, sb, p);
+	int error;
+
+	KERNEL_LOCK();
+	error = vn_stat(vp, sb, p);
+	KERNEL_UNLOCK();
+
+	return (error);
 }
 
 /*
Index: kern/kern_ktrace.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_ktrace.c,v
diff -u -p -r1.114 kern_ktrace.c
--- kern/kern_ktrace.c	15 Dec 2023 15:12:08 -0000	1.114
+++ kern/kern_ktrace.c	4 Dec 2024 09:10:07 -0000
@@ -179,7 +179,9 @@ ktrsyscall(struct proc *p, register_t co
 		*argp++ = args[i];
 	if (nargs && copyin((void *)args[0], argp, nargs * sizeof(int)))
 		memset(argp, 0, nargs * sizeof(int));
+	KERNEL_LOCK();
 	ktrwrite(p, &kth, ktp, len);
+	KERNEL_UNLOCK();
 	free(ktp, M_TEMP, len);
 	atomic_clearbits_int(&p->p_flag, P_INKTR);
 }
@@ -214,7 +216,9 @@ ktrnamei(struct proc *p, char *path)
 
 	atomic_setbits_int(&p->p_flag, P_INKTR);
 	ktrinitheader(&kth, p, KTR_NAMEI);
+	KERNEL_LOCK();
 	ktrwrite(p, &kth, path, strlen(path));
+	KERNEL_UNLOCK();
 	atomic_clearbits_int(&p->p_flag, P_INKTR);
 }
 
Index: sys/syscall_mi.h
===================================================================
RCS file: /cvs/src/sys/sys/syscall_mi.h,v
diff -u -p -r1.36 syscall_mi.h
--- sys/syscall_mi.h	29 Oct 2024 12:40:17 -0000	1.36
+++ sys/syscall_mi.h	4 Dec 2024 09:08:33 -0000
@@ -151,9 +151,7 @@ mi_syscall(struct proc *p, register_t co
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_SYSCALL)) {
 		/* convert to mask, then include with code */
-		KERNEL_LOCK();
 		ktrsyscall(p, code, callp->sy_argsize, argp);
-		KERNEL_UNLOCK();
 	}
 #endif