From: Martin Pieuchot Subject: Re: btrace, identifying syscall+0x5c4 To: tech Date: Thu, 5 Dec 2024 10:21:59 +0100 On 04/12/24(Wed) 12:03, Stuart Henderson wrote: > I just found a nice stat that I had forgotten was logged - poll time > for the complete run of devices that librenms is monitoring (runtime of > all poller processes added together). > > looking at this from before running with the unlock diffs, and comparing > to after (before i dropped the number of concurrent processes), this went > from around 950 seconds to around 700. > > (and further to 600 after i lowered the concurrency). Thanks for testing. Here's an updated diff that protects ktrnamei() with KERNEL_LOCK(). That was a bug in the previous versions and I guess that's what you triggered. Index: kern/syscalls.master =================================================================== RCS file: /cvs/src/sys/kern/syscalls.master,v diff -u -p -r1.265 syscalls.master --- kern/syscalls.master 2 Aug 2024 14:34:45 -0000 1.265 +++ kern/syscalls.master 28 Nov 2024 18:55:36 -0000 @@ -52,7 +52,7 @@ 3 STD NOLOCK { ssize_t sys_read(int fd, void *buf, size_t nbyte); } 4 STD NOLOCK { ssize_t sys_write(int fd, const void *buf, \ size_t nbyte); } -5 STD { int sys_open(const char *path, \ +5 STD NOLOCK { int sys_open(const char *path, \ int flags, ... mode_t mode); } 6 STD NOLOCK { int sys_close(int fd); } 7 STD NOLOCK { int sys_getentropy(void *buf, size_t nbyte); } @@ -104,11 +104,11 @@ 35 STD { int sys_fchflags(int fd, u_int flags); } 36 STD { void sys_sync(void); } 37 OBSOL msyscall -38 STD { int sys_stat(const char *path, struct stat *ub); } +38 STD NOLOCK { int sys_stat(const char *path, struct stat *ub); } 39 STD NOLOCK { pid_t sys_getppid(void); } -40 STD { int sys_lstat(const char *path, struct stat *ub); } +40 STD NOLOCK { int sys_lstat(const char *path, struct stat *ub); } 41 STD NOLOCK { int sys_dup(int fd); } -42 STD { int sys_fstatat(int fd, const char *path, \ +42 STD NOLOCK { int sys_fstatat(int fd, const char *path, \ struct stat *buf, int flag); } 43 STD NOLOCK { gid_t sys_getegid(void); } 44 STD { int sys_profil(caddr_t samples, size_t size, \ @@ -133,7 +133,7 @@ 51 UNIMPL acct #endif 52 STD { int sys_sigpending(void); } -53 STD { int sys_fstat(int fd, struct stat *sb); } +53 STD NOLOCK { int sys_fstat(int fd, struct stat *sb); } 54 STD NOLOCK { int sys_ioctl(int fd, \ u_long com, ... void *data); } 55 STD { int sys_reboot(int opt); } @@ -240,7 +240,7 @@ 113 UNIMPL fktrace 114 STD { int sys_unveil(const char *path, \ const char *permissions); } -115 STD { int sys___realpath(const char *pathname, \ +115 STD NOLOCK { int sys___realpath(const char *pathname, \ char *resolved); } 116 STD NOLOCK { int sys_recvmmsg(int s, struct mmsghdr *mmsg, \ unsigned int vlen, int flags, \ @@ -553,7 +553,7 @@ mode_t mode); } 320 STD { int sys_mknodat(int fd, const char *path, \ mode_t mode, dev_t dev); } -321 STD { int sys_openat(int fd, const char *path, int flags, \ +321 STD NOLOCK { int sys_openat(int fd, const char *path, int flags, \ ... mode_t mode); } 322 STD { ssize_t sys_readlinkat(int fd, const char *path, \ char *buf, size_t count); } Index: kern/vfs_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v diff -u -p -r1.370 vfs_syscalls.c --- kern/vfs_syscalls.c 5 Nov 2024 06:03:19 -0000 1.370 +++ kern/vfs_syscalls.c 28 Nov 2024 18:19:24 -0000 @@ -894,8 +894,10 @@ sys___realpath(struct proc *p, void *v, bp = &cwdbuf[cwdlen - 1]; *bp = '\0'; + KERNEL_LOCK(); error = vfs_getcwd_common(p->p_fd->fd_cdir, NULL, &bp, cwdbuf, cwdlen/2, GETCWD_CHECK_ACCESS, p); + KERNEL_UNLOCK(); if (error) { free(cwdbuf, M_TEMP, cwdlen); @@ -919,12 +921,16 @@ sys___realpath(struct proc *p, void *v, nd.ni_pledge = PLEDGE_RPATH; nd.ni_unveil = UNVEIL_READ; - if ((error = namei(&nd)) != 0) + KERNEL_LOCK(); + if ((error = namei(&nd)) != 0) { + KERNEL_UNLOCK(); goto end; + } /* release reference from namei */ if (nd.ni_vp) vrele(nd.ni_vp); + KERNEL_UNLOCK(); error = copyoutstr(nd.ni_cnd.cn_rpbuf, SCARG(uap, resolved), MAXPATHLEN, NULL); @@ -1120,23 +1126,20 @@ doopenat(struct proc *p, int fd, const c localtrunc = 1; flags &= ~O_TRUNC; /* Must do truncate ourselves */ } + KERNEL_LOCK(); if ((error = vn_open(&nd, flags, cmode)) != 0) { fdplock(fdp); if (error == ENODEV && p->p_dupfd >= 0 && /* XXX from fdopen */ (error = dupfdopen(p, indx, flags)) == 0) { - fdpunlock(fdp); - closef(fp, p); *retval = indx; - return (error); + goto error; } if (error == ERESTART) error = EINTR; fdremove(fdp, indx); - fdpunlock(fdp); - closef(fp, p); - return (error); + goto error; } p->p_dupfd = 0; vp = nd.ni_vp; @@ -1161,9 +1164,7 @@ doopenat(struct proc *p, int fd, const c fdplock(fdp); /* closef will vn_close the file for us. */ fdremove(fdp, indx); - fdpunlock(fdp); - closef(fp, p); - return (error); + goto error; } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); atomic_setbits_int(&fp->f_iflags, FIF_HASLOCK); @@ -1185,18 +1186,22 @@ doopenat(struct proc *p, int fd, const c fdplock(fdp); /* closef will close the file for us. */ fdremove(fdp, indx); - fdpunlock(fdp); - closef(fp, p); - return (error); + goto error; } } VOP_UNLOCK(vp); + KERNEL_UNLOCK(); *retval = indx; fdplock(fdp); fdinsert(fdp, indx, cloexec, fp); fdpunlock(fdp); FRELE(fp, p); return (error); +error: + KERNEL_UNLOCK(); + fdpunlock(fdp); + closef(fp, p); + return (error); } /* @@ -2066,10 +2071,14 @@ dofstatat(struct proc *p, int fd, const NDINITAT(&nd, LOOKUP, follow | LOCKLEAF, UIO_USERSPACE, fd, path, p); nd.ni_pledge = PLEDGE_RPATH; nd.ni_unveil = UNVEIL_READ; - if ((error = namei(&nd)) != 0) + KERNEL_LOCK(); + if ((error = namei(&nd)) != 0) { + KERNEL_UNLOCK(); return (error); + } error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); + KERNEL_UNLOCK(); if (error) return (error); /* Don't let non-root see generation numbers (for NFS security) */ Index: kern/vfs_vnops.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_vnops.c,v diff -u -p -r1.123 vfs_vnops.c --- kern/vfs_vnops.c 5 Nov 2024 06:03:19 -0000 1.123 +++ kern/vfs_vnops.c 2 Dec 2024 16:42:23 -0000 @@ -427,7 +427,13 @@ int vn_statfile(struct file *fp, struct stat *sb, struct proc *p) { struct vnode *vp = fp->f_data; - return vn_stat(vp, sb, p); + int error; + + KERNEL_LOCK(); + error = vn_stat(vp, sb, p); + KERNEL_UNLOCK(); + + return (error); } /* Index: kern/kern_ktrace.c =================================================================== RCS file: /cvs/src/sys/kern/kern_ktrace.c,v diff -u -p -r1.114 kern_ktrace.c --- kern/kern_ktrace.c 15 Dec 2023 15:12:08 -0000 1.114 +++ kern/kern_ktrace.c 4 Dec 2024 09:10:07 -0000 @@ -179,7 +179,9 @@ ktrsyscall(struct proc *p, register_t co *argp++ = args[i]; if (nargs && copyin((void *)args[0], argp, nargs * sizeof(int))) memset(argp, 0, nargs * sizeof(int)); + KERNEL_LOCK(); ktrwrite(p, &kth, ktp, len); + KERNEL_UNLOCK(); free(ktp, M_TEMP, len); atomic_clearbits_int(&p->p_flag, P_INKTR); } @@ -214,7 +216,9 @@ ktrnamei(struct proc *p, char *path) atomic_setbits_int(&p->p_flag, P_INKTR); ktrinitheader(&kth, p, KTR_NAMEI); + KERNEL_LOCK(); ktrwrite(p, &kth, path, strlen(path)); + KERNEL_UNLOCK(); atomic_clearbits_int(&p->p_flag, P_INKTR); } Index: sys/syscall_mi.h =================================================================== RCS file: /cvs/src/sys/sys/syscall_mi.h,v diff -u -p -r1.36 syscall_mi.h --- sys/syscall_mi.h 29 Oct 2024 12:40:17 -0000 1.36 +++ sys/syscall_mi.h 4 Dec 2024 09:08:33 -0000 @@ -151,9 +151,7 @@ mi_syscall(struct proc *p, register_t co #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) { /* convert to mask, then include with code */ - KERNEL_LOCK(); ktrsyscall(p, code, callp->sy_argsize, argp); - KERNEL_UNLOCK(); } #endif