Download raw body.
sysctl(2): push locks down to vfs_sysctl()
anyone?
> On 31 Oct 2024, at 15:27, Vitaliy Makkoveev <mvs@openbsd.org> wrote:
>
> 1. VFS_MAXTYPENUM
>
> `maxvfsconf' is immutable, after vfsinit() updates it.
>
> 2. VFS_CONF
>
> The registered file systems array `vfsconflist' is immutable, so no
> locks required for vfs_bytypenum() and `vfsp' dereference.
>
> Except atomically modified `vfc_refcount', 'vfsconf' structure is
> immutable so use atomic_load_int(9) to load `vfc_refcount' value
> into `tmpvfsp' local copy.
>
> 3. VFS_BCACHESTAT
>
> The `tmpbcstats' buffer cache statistics is mutable, but kernel lock
> is enough to store it to local copy. So follow VFS_CONF way and
> allocate temporary `tmpbcstats' to pass statistics to userland.
>
> 4. Non generic filesystem case
>
> The optional (*vfs_sysctl)() handler returns EINVAL for all the cases
> except fusefs_sysctl(), nfs_sysctl() and ffs_sysctl().
>
> 1. fusefs_sysctl() does read-only access to array of integers
> `fusefs_vars', so no locking required.
>
> 2. nfs_sysctl() is complicated, so it was wrapped by sysctl_vslock()
> to keep original locking.
>
> 3. ffs_sysctl() modifies array of integers `ffs_vars', so kernel lock
> is enough because sysctl_bounded_arr() does context switch safe
> load/store.
>
> Index: sys/kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.451 kern_sysctl.c
> --- sys/kern/kern_sysctl.c 31 Oct 2024 10:06:51 -0000 1.451
> +++ sys/kern/kern_sysctl.c 31 Oct 2024 12:21:07 -0000
> @@ -266,6 +266,7 @@ sys_sysctl(struct proc *p, void *v, regi
> fn = fs_sysctl;
> break;
> case CTL_VFS:
> + dolock = 0;
> fn = vfs_sysctl;
> break;
> case CTL_MACHDEP:
> Index: sys/kern/vfs_init.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_init.c,v
> diff -u -p -r1.44 vfs_init.c
> --- sys/kern/vfs_init.c 20 May 2024 09:11:21 -0000 1.44
> +++ sys/kern/vfs_init.c 31 Oct 2024 12:21:07 -0000
> @@ -44,13 +44,18 @@
> #include <sys/vnode.h>
> #include <sys/pool.h>
>
> +/*
> + * Locks used to protect data:
> + * I immutable after creation
> + */
> +
> struct pool namei_pool;
>
> /* This defines the root filesystem. */
> struct vnode *rootvnode;
>
> /* Set up the filesystem operations for vnodes. */
> -static struct vfsconf vfsconflist[] = {
> +static struct vfsconf vfsconflist[] = { /* [I] */
> #ifdef FFS
> { &ffs_vfsops, MOUNT_FFS, 1, 0, MNT_LOCAL | MNT_SWAPPABLE,
> sizeof(struct ufs_args) },
> @@ -107,7 +112,7 @@ static struct vfsconf vfsconflist[] = {
> * Initially the size of the list, vfsinit will set maxvfsconf
> * to the highest defined type number.
> */
> -int maxvfsconf = sizeof(vfsconflist) / sizeof(struct vfsconf);
> +int maxvfsconf = sizeof(vfsconflist) / sizeof(struct vfsconf); /* [I] */
>
> /* Initialize the vnode structures and initialize each file system type. */
> void
> Index: sys/kern/vfs_subr.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_subr.c,v
> diff -u -p -r1.325 vfs_subr.c
> --- sys/kern/vfs_subr.c 31 Oct 2024 10:06:51 -0000 1.325
> +++ sys/kern/vfs_subr.c 31 Oct 2024 12:21:07 -0000
> @@ -1375,6 +1375,7 @@ vfs_sysctl(int *name, u_int namelen, voi
> size_t newlen, struct proc *p)
> {
> struct vfsconf *vfsp, *tmpvfsp;
> + struct bcachestats *tmpbcstats;
> int ret;
>
> /* all sysctl names at this level are at least name and field */
> @@ -1402,10 +1403,14 @@ vfs_sysctl(int *name, u_int namelen, voi
> if (vfsp == NULL)
> return (EOPNOTSUPP);
>
> - /* Make a copy, clear out kernel pointers */
> + /*
> + * Make a copy, clear out kernel pointers,
> + * atomically load refcount
> + */
> tmpvfsp = malloc(sizeof(*tmpvfsp), M_TEMP, M_WAITOK|M_ZERO);
> memcpy(tmpvfsp, vfsp, sizeof(*tmpvfsp));
> tmpvfsp->vfc_vfsops = NULL;
> + tmpvfsp->vfc_refcount = atomic_load_int(&vfsp->vfc_refcount);
>
> ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpvfsp,
> sizeof(struct vfsconf));
> @@ -1413,9 +1418,17 @@ vfs_sysctl(int *name, u_int namelen, voi
> free(tmpvfsp, M_TEMP, sizeof(*tmpvfsp));
> return (ret);
> case VFS_BCACHESTAT: /* buffer cache statistics */
> - ret = sysctl_rdstruct(oldp, oldlenp, newp, &bcstats,
> - sizeof(struct bcachestats));
> - return(ret);
> + tmpbcstats = malloc(sizeof(*tmpbcstats), M_TEMP,
> + M_WAITOK|M_ZERO);
> + KERNEL_LOCK();
> + memcpy(tmpbcstats, &bcstats, sizeof(*tmpbcstats));
> + KERNEL_UNLOCK();
> +
> + ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpbcstats,
> + sizeof(*tmpbcstats));
> +
> + free(tmpbcstats, M_TEMP, sizeof(*tmpbcstats));
> + return (ret);
> }
> return (EOPNOTSUPP);
> }
> Index: sys/nfs/nfs_vfsops.c
> ===================================================================
> RCS file: /cvs/src/sys/nfs/nfs_vfsops.c,v
> diff -u -p -r1.132 nfs_vfsops.c
> --- sys/nfs/nfs_vfsops.c 30 Oct 2024 06:16:27 -0000 1.132
> +++ sys/nfs/nfs_vfsops.c 31 Oct 2024 12:21:07 -0000
> @@ -65,6 +65,8 @@
> extern struct nfsstats nfsstats;
> extern int nfs_ticks;
>
> +int nfs_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t,
> + struct proc *);
> int nfs_sysctl(int *, u_int, void *, size_t *, void *, size_t,
> struct proc *);
> int nfs_checkexp(struct mount *, struct mbuf *, int *, struct ucred **);
> @@ -844,8 +846,8 @@ nfs_vget(struct mount *mp, ino_t ino, st
> * Do that sysctl thang...
> */
> int
> -nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
> - size_t newlen, struct proc *p)
> +nfs_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
> + void *newp, size_t newlen, struct proc *p)
> {
> int rv;
>
> @@ -892,6 +894,21 @@ nfs_sysctl(int *name, u_int namelen, voi
> }
> }
>
> +int
> +nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
> + size_t newlen, struct proc *p)
> +{
> + size_t savelen = *oldlenp;
> + int error;
> +
> + if ((error = sysctl_vslock(oldp, savelen)))
> + return error;
> + error = nfs_sysctl_locked(name, namelen, oldp, oldlenp,
> + newp, newlen, p);
> + sysctl_vsunlock(oldp, savelen);
> +
> + return error;
> +}
>
> /*
> * At this point, this should never happen
> Index: sys/sys/mount.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/mount.h,v
> diff -u -p -r1.151 mount.h
> --- sys/sys/mount.h 3 Feb 2024 18:51:58 -0000 1.151
> +++ sys/sys/mount.h 31 Oct 2024 12:21:07 -0000
> @@ -42,6 +42,12 @@
> #include <sys/queue.h>
> #include <sys/rwlock.h>
>
> +/*
> + * Locks used to protect data:
> + * I immutable after creation
> + * a atomic
> + */
> +
> typedef struct { int32_t val[2]; } fsid_t; /* file system id type */
>
> /*
> @@ -456,12 +462,13 @@ typedef struct fhandle fhandle_t;
> * mount time to identify the requested filesystem.
> */
> struct vfsconf {
> - const struct vfsops *vfc_vfsops; /* filesystem operations vector */
> - char vfc_name[MFSNAMELEN]; /* filesystem type name */
> - int vfc_typenum; /* historic filesystem type number */
> - u_int vfc_refcount; /* number mounted of this type */
> - int vfc_flags; /* permanent flags */
> - size_t vfc_datasize; /* size of data args */
> + const struct vfsops *vfc_vfsops; /* [I] filesystem operations vector */
> + char vfc_name[MFSNAMELEN]; /* [I] filesystem type name */
> + int vfc_typenum; /* [I] historic filesystem type
> + number */
> + u_int vfc_refcount; /* [a] number mounted of this type */
> + int vfc_flags; /* [I] permanent flags */
> + size_t vfc_datasize; /* [I] size of data args */
> };
>
> /* buffer cache statistics */
> Index: sys/ufs/ffs/ffs_vfsops.c
> ===================================================================
> RCS file: /cvs/src/sys/ufs/ffs/ffs_vfsops.c,v
> diff -u -p -r1.198 ffs_vfsops.c
> --- sys/ufs/ffs/ffs_vfsops.c 3 Feb 2024 18:51:58 -0000 1.198
> +++ sys/ufs/ffs/ffs_vfsops.c 31 Oct 2024 12:21:07 -0000
> @@ -1458,6 +1458,12 @@ int
> ffs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
> size_t newlen, struct proc *p)
> {
> - return sysctl_bounded_arr(ffs_vars, nitems(ffs_vars), name,
> + int error;
> +
> + KERNEL_LOCK();
> + error = sysctl_bounded_arr(ffs_vars, nitems(ffs_vars), name,
> namelen, oldp, oldlenp, newp, newlen);
> + KERNEL_UNLOCK();
> +
> + return (error);
> }
>
sysctl(2): push locks down to vfs_sysctl()