From: Vitaliy Makkoveev Subject: sysctl(2): push locks down to vfs_sysctl() To: tech@openbsd.org Date: Thu, 31 Oct 2024 15:27:53 +0300 1. VFS_MAXTYPENUM `maxvfsconf' is immutable, after vfsinit() updates it. 2. VFS_CONF The registered file systems array `vfsconflist' is immutable, so no locks required for vfs_bytypenum() and `vfsp' dereference. Except atomically modified `vfc_refcount', 'vfsconf' structure is immutable so use atomic_load_int(9) to load `vfc_refcount' value into `tmpvfsp' local copy. 3. VFS_BCACHESTAT The `tmpbcstats' buffer cache statistics is mutable, but kernel lock is enough to store it to local copy. So follow VFS_CONF way and allocate temporary `tmpbcstats' to pass statistics to userland. 4. Non generic filesystem case The optional (*vfs_sysctl)() handler returns EINVAL for all the cases except fusefs_sysctl(), nfs_sysctl() and ffs_sysctl(). 1. fusefs_sysctl() does read-only access to array of integers `fusefs_vars', so no locking required. 2. nfs_sysctl() is complicated, so it was wrapped by sysctl_vslock() to keep original locking. 3. ffs_sysctl() modifies array of integers `ffs_vars', so kernel lock is enough because sysctl_bounded_arr() does context switch safe load/store. Index: sys/kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v diff -u -p -r1.451 kern_sysctl.c --- sys/kern/kern_sysctl.c 31 Oct 2024 10:06:51 -0000 1.451 +++ sys/kern/kern_sysctl.c 31 Oct 2024 12:21:07 -0000 @@ -266,6 +266,7 @@ sys_sysctl(struct proc *p, void *v, regi fn = fs_sysctl; break; case CTL_VFS: + dolock = 0; fn = vfs_sysctl; break; case CTL_MACHDEP: Index: sys/kern/vfs_init.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_init.c,v diff -u -p -r1.44 vfs_init.c --- sys/kern/vfs_init.c 20 May 2024 09:11:21 -0000 1.44 +++ sys/kern/vfs_init.c 31 Oct 2024 12:21:07 -0000 @@ -44,13 +44,18 @@ #include #include +/* + * Locks used to protect data: + * I immutable after creation + */ + struct pool namei_pool; /* This defines the root filesystem. */ struct vnode *rootvnode; /* Set up the filesystem operations for vnodes. */ -static struct vfsconf vfsconflist[] = { +static struct vfsconf vfsconflist[] = { /* [I] */ #ifdef FFS { &ffs_vfsops, MOUNT_FFS, 1, 0, MNT_LOCAL | MNT_SWAPPABLE, sizeof(struct ufs_args) }, @@ -107,7 +112,7 @@ static struct vfsconf vfsconflist[] = { * Initially the size of the list, vfsinit will set maxvfsconf * to the highest defined type number. */ -int maxvfsconf = sizeof(vfsconflist) / sizeof(struct vfsconf); +int maxvfsconf = sizeof(vfsconflist) / sizeof(struct vfsconf); /* [I] */ /* Initialize the vnode structures and initialize each file system type. */ void Index: sys/kern/vfs_subr.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_subr.c,v diff -u -p -r1.325 vfs_subr.c --- sys/kern/vfs_subr.c 31 Oct 2024 10:06:51 -0000 1.325 +++ sys/kern/vfs_subr.c 31 Oct 2024 12:21:07 -0000 @@ -1375,6 +1375,7 @@ vfs_sysctl(int *name, u_int namelen, voi size_t newlen, struct proc *p) { struct vfsconf *vfsp, *tmpvfsp; + struct bcachestats *tmpbcstats; int ret; /* all sysctl names at this level are at least name and field */ @@ -1402,10 +1403,14 @@ vfs_sysctl(int *name, u_int namelen, voi if (vfsp == NULL) return (EOPNOTSUPP); - /* Make a copy, clear out kernel pointers */ + /* + * Make a copy, clear out kernel pointers, + * atomically load refcount + */ tmpvfsp = malloc(sizeof(*tmpvfsp), M_TEMP, M_WAITOK|M_ZERO); memcpy(tmpvfsp, vfsp, sizeof(*tmpvfsp)); tmpvfsp->vfc_vfsops = NULL; + tmpvfsp->vfc_refcount = atomic_load_int(&vfsp->vfc_refcount); ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpvfsp, sizeof(struct vfsconf)); @@ -1413,9 +1418,17 @@ vfs_sysctl(int *name, u_int namelen, voi free(tmpvfsp, M_TEMP, sizeof(*tmpvfsp)); return (ret); case VFS_BCACHESTAT: /* buffer cache statistics */ - ret = sysctl_rdstruct(oldp, oldlenp, newp, &bcstats, - sizeof(struct bcachestats)); - return(ret); + tmpbcstats = malloc(sizeof(*tmpbcstats), M_TEMP, + M_WAITOK|M_ZERO); + KERNEL_LOCK(); + memcpy(tmpbcstats, &bcstats, sizeof(*tmpbcstats)); + KERNEL_UNLOCK(); + + ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpbcstats, + sizeof(*tmpbcstats)); + + free(tmpbcstats, M_TEMP, sizeof(*tmpbcstats)); + return (ret); } return (EOPNOTSUPP); } Index: sys/nfs/nfs_vfsops.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_vfsops.c,v diff -u -p -r1.132 nfs_vfsops.c --- sys/nfs/nfs_vfsops.c 30 Oct 2024 06:16:27 -0000 1.132 +++ sys/nfs/nfs_vfsops.c 31 Oct 2024 12:21:07 -0000 @@ -65,6 +65,8 @@ extern struct nfsstats nfsstats; extern int nfs_ticks; +int nfs_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t, + struct proc *); int nfs_sysctl(int *, u_int, void *, size_t *, void *, size_t, struct proc *); int nfs_checkexp(struct mount *, struct mbuf *, int *, struct ucred **); @@ -844,8 +846,8 @@ nfs_vget(struct mount *mp, ino_t ino, st * Do that sysctl thang... */ int -nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, - size_t newlen, struct proc *p) +nfs_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen, struct proc *p) { int rv; @@ -892,6 +894,21 @@ nfs_sysctl(int *name, u_int namelen, voi } } +int +nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, + size_t newlen, struct proc *p) +{ + size_t savelen = *oldlenp; + int error; + + if ((error = sysctl_vslock(oldp, savelen))) + return error; + error = nfs_sysctl_locked(name, namelen, oldp, oldlenp, + newp, newlen, p); + sysctl_vsunlock(oldp, savelen); + + return error; +} /* * At this point, this should never happen Index: sys/sys/mount.h =================================================================== RCS file: /cvs/src/sys/sys/mount.h,v diff -u -p -r1.151 mount.h --- sys/sys/mount.h 3 Feb 2024 18:51:58 -0000 1.151 +++ sys/sys/mount.h 31 Oct 2024 12:21:07 -0000 @@ -42,6 +42,12 @@ #include #include +/* + * Locks used to protect data: + * I immutable after creation + * a atomic + */ + typedef struct { int32_t val[2]; } fsid_t; /* file system id type */ /* @@ -456,12 +462,13 @@ typedef struct fhandle fhandle_t; * mount time to identify the requested filesystem. */ struct vfsconf { - const struct vfsops *vfc_vfsops; /* filesystem operations vector */ - char vfc_name[MFSNAMELEN]; /* filesystem type name */ - int vfc_typenum; /* historic filesystem type number */ - u_int vfc_refcount; /* number mounted of this type */ - int vfc_flags; /* permanent flags */ - size_t vfc_datasize; /* size of data args */ + const struct vfsops *vfc_vfsops; /* [I] filesystem operations vector */ + char vfc_name[MFSNAMELEN]; /* [I] filesystem type name */ + int vfc_typenum; /* [I] historic filesystem type + number */ + u_int vfc_refcount; /* [a] number mounted of this type */ + int vfc_flags; /* [I] permanent flags */ + size_t vfc_datasize; /* [I] size of data args */ }; /* buffer cache statistics */ Index: sys/ufs/ffs/ffs_vfsops.c =================================================================== RCS file: /cvs/src/sys/ufs/ffs/ffs_vfsops.c,v diff -u -p -r1.198 ffs_vfsops.c --- sys/ufs/ffs/ffs_vfsops.c 3 Feb 2024 18:51:58 -0000 1.198 +++ sys/ufs/ffs/ffs_vfsops.c 31 Oct 2024 12:21:07 -0000 @@ -1458,6 +1458,12 @@ int ffs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p) { - return sysctl_bounded_arr(ffs_vars, nitems(ffs_vars), name, + int error; + + KERNEL_LOCK(); + error = sysctl_bounded_arr(ffs_vars, nitems(ffs_vars), name, namelen, oldp, oldlenp, newp, newlen); + KERNEL_UNLOCK(); + + return (error); }