Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: sysctl(2): push locks down to vfs_sysctl()
To:
tech@openbsd.org
Date:
Fri, 8 Nov 2024 20:36:43 +0300

Download raw body.

Thread
anyone?

> On 31 Oct 2024, at 15:27, Vitaliy Makkoveev <mvs@openbsd.org> wrote:
> 
> 1. VFS_MAXTYPENUM
> 
>  `maxvfsconf' is immutable, after vfsinit() updates it. 
> 
> 2. VFS_CONF
> 
>  The registered file systems array `vfsconflist' is immutable, so no
>  locks required for vfs_bytypenum() and `vfsp' dereference.
> 
>  Except atomically modified `vfc_refcount', 'vfsconf' structure is
>  immutable so use atomic_load_int(9) to load `vfc_refcount' value
>  into `tmpvfsp' local copy.
> 
> 3. VFS_BCACHESTAT
> 
>  The `tmpbcstats' buffer cache statistics is mutable, but kernel lock
>  is enough to store it to local copy. So follow VFS_CONF way and
>  allocate temporary `tmpbcstats' to pass statistics to userland.
> 
> 4. Non generic filesystem case
> 
>  The optional (*vfs_sysctl)() handler returns EINVAL for all the cases
>  except fusefs_sysctl(), nfs_sysctl() and ffs_sysctl().
> 
>   1. fusefs_sysctl() does read-only access to array of integers
>      `fusefs_vars', so no locking required.
> 
>   2. nfs_sysctl() is complicated, so it was wrapped by sysctl_vslock()
>      to keep original locking.
> 
>   3. ffs_sysctl() modifies array of integers `ffs_vars', so kernel lock
>      is enough because sysctl_bounded_arr() does context switch safe
>      load/store.
> 
> Index: sys/kern/kern_sysctl.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> diff -u -p -r1.451 kern_sysctl.c
> --- sys/kern/kern_sysctl.c	31 Oct 2024 10:06:51 -0000	1.451
> +++ sys/kern/kern_sysctl.c	31 Oct 2024 12:21:07 -0000
> @@ -266,6 +266,7 @@ sys_sysctl(struct proc *p, void *v, regi
> 		fn = fs_sysctl;
> 		break;
> 	case CTL_VFS:
> +		dolock = 0;
> 		fn = vfs_sysctl;
> 		break;
> 	case CTL_MACHDEP:
> Index: sys/kern/vfs_init.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_init.c,v
> diff -u -p -r1.44 vfs_init.c
> --- sys/kern/vfs_init.c	20 May 2024 09:11:21 -0000	1.44
> +++ sys/kern/vfs_init.c	31 Oct 2024 12:21:07 -0000
> @@ -44,13 +44,18 @@
> #include <sys/vnode.h>
> #include <sys/pool.h>
> 
> +/*
> + * Locks used to protect data:
> + *	I	immutable after creation
> + */
> +
> struct pool namei_pool;
> 
> /* This defines the root filesystem. */
> struct vnode *rootvnode;
> 
> /* Set up the filesystem operations for vnodes. */
> -static struct vfsconf vfsconflist[] = {
> +static struct vfsconf vfsconflist[] = {				/* [I] */
> #ifdef FFS
>         { &ffs_vfsops, MOUNT_FFS, 1, 0, MNT_LOCAL | MNT_SWAPPABLE,
> 	    sizeof(struct ufs_args) },
> @@ -107,7 +112,7 @@ static struct vfsconf vfsconflist[] = {
>  * Initially the size of the list, vfsinit will set maxvfsconf
>  * to the highest defined type number.
>  */
> -int maxvfsconf = sizeof(vfsconflist) / sizeof(struct vfsconf);
> +int maxvfsconf = sizeof(vfsconflist) / sizeof(struct vfsconf); /* [I] */
> 
> /* Initialize the vnode structures and initialize each file system type. */
> void
> Index: sys/kern/vfs_subr.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_subr.c,v
> diff -u -p -r1.325 vfs_subr.c
> --- sys/kern/vfs_subr.c	31 Oct 2024 10:06:51 -0000	1.325
> +++ sys/kern/vfs_subr.c	31 Oct 2024 12:21:07 -0000
> @@ -1375,6 +1375,7 @@ vfs_sysctl(int *name, u_int namelen, voi
>     size_t newlen, struct proc *p)
> {
> 	struct vfsconf *vfsp, *tmpvfsp;
> +	struct bcachestats *tmpbcstats;
> 	int ret;
> 
> 	/* all sysctl names at this level are at least name and field */
> @@ -1402,10 +1403,14 @@ vfs_sysctl(int *name, u_int namelen, voi
> 		if (vfsp == NULL)
> 			return (EOPNOTSUPP);
> 
> -		/* Make a copy, clear out kernel pointers */
> +		/*
> +		 * Make a copy, clear out kernel pointers,
> +		 * atomically load refcount
> +		 */
> 		tmpvfsp = malloc(sizeof(*tmpvfsp), M_TEMP, M_WAITOK|M_ZERO);
> 		memcpy(tmpvfsp, vfsp, sizeof(*tmpvfsp));
> 		tmpvfsp->vfc_vfsops = NULL;
> +		tmpvfsp->vfc_refcount = atomic_load_int(&vfsp->vfc_refcount);
> 
> 		ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpvfsp,
> 		    sizeof(struct vfsconf));
> @@ -1413,9 +1418,17 @@ vfs_sysctl(int *name, u_int namelen, voi
> 		free(tmpvfsp, M_TEMP, sizeof(*tmpvfsp));
> 		return (ret);
> 	case VFS_BCACHESTAT:	/* buffer cache statistics */
> -		ret = sysctl_rdstruct(oldp, oldlenp, newp, &bcstats,
> -		    sizeof(struct bcachestats));
> -		return(ret);
> +		tmpbcstats = malloc(sizeof(*tmpbcstats), M_TEMP,
> +		    M_WAITOK|M_ZERO);
> +		KERNEL_LOCK();
> +		memcpy(tmpbcstats, &bcstats, sizeof(*tmpbcstats));
> +		KERNEL_UNLOCK();
> +
> +		ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpbcstats,
> +		    sizeof(*tmpbcstats));
> +
> +		free(tmpbcstats, M_TEMP, sizeof(*tmpbcstats));
> +		return (ret);
> 	}
> 	return (EOPNOTSUPP);
> }
> Index: sys/nfs/nfs_vfsops.c
> ===================================================================
> RCS file: /cvs/src/sys/nfs/nfs_vfsops.c,v
> diff -u -p -r1.132 nfs_vfsops.c
> --- sys/nfs/nfs_vfsops.c	30 Oct 2024 06:16:27 -0000	1.132
> +++ sys/nfs/nfs_vfsops.c	31 Oct 2024 12:21:07 -0000
> @@ -65,6 +65,8 @@
> extern struct nfsstats nfsstats;
> extern int nfs_ticks;
> 
> +int	nfs_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t,
> +	    struct proc *);
> int	nfs_sysctl(int *, u_int, void *, size_t *, void *, size_t,
> 	    struct proc *);
> int	nfs_checkexp(struct mount *, struct mbuf *, int *, struct ucred **);
> @@ -844,8 +846,8 @@ nfs_vget(struct mount *mp, ino_t ino, st
>  * Do that sysctl thang...
>  */
> int
> -nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
> -    size_t newlen, struct proc *p)
> +nfs_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
> +    void *newp, size_t newlen, struct proc *p)
> {
> 	int rv;
> 
> @@ -892,6 +894,21 @@ nfs_sysctl(int *name, u_int namelen, voi
> 	}
> }
> 
> +int
> +nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
> +    size_t newlen, struct proc *p)
> +{
> +	size_t savelen = *oldlenp;
> +	int error;
> +
> +	if ((error = sysctl_vslock(oldp, savelen)))
> +		return error;
> +	error = nfs_sysctl_locked(name, namelen, oldp, oldlenp,
> +	    newp, newlen, p);	
> +	sysctl_vsunlock(oldp, savelen);
> +
> +	return error;
> +}
> 
> /*
>  * At this point, this should never happen
> Index: sys/sys/mount.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/mount.h,v
> diff -u -p -r1.151 mount.h
> --- sys/sys/mount.h	3 Feb 2024 18:51:58 -0000	1.151
> +++ sys/sys/mount.h	31 Oct 2024 12:21:07 -0000
> @@ -42,6 +42,12 @@
> #include <sys/queue.h>
> #include <sys/rwlock.h>
> 
> +/*
> + * Locks used to protect data:
> + *	I	immutable after creation
> + *	a	atomic
> + */
> +
> typedef struct { int32_t val[2]; } fsid_t;	/* file system id type */
> 
> /*
> @@ -456,12 +462,13 @@ typedef struct fhandle	fhandle_t;
>  * mount time to identify the requested filesystem.
>  */
> struct vfsconf {
> -	const struct vfsops *vfc_vfsops; /* filesystem operations vector */
> -	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
> -	int	vfc_typenum;		/* historic filesystem type number */
> -	u_int	vfc_refcount;		/* number mounted of this type */
> -	int	vfc_flags;		/* permanent flags */
> -	size_t	vfc_datasize;		/* size of data args */
> +	const struct vfsops *vfc_vfsops; /* [I] filesystem operations vector */
> +	char	vfc_name[MFSNAMELEN];	/* [I] filesystem type name */
> +	int	vfc_typenum;		/* [I] historic filesystem type
> +					    number */
> +	u_int	vfc_refcount;		/* [a] number mounted of this type */
> +	int	vfc_flags;		/* [I] permanent flags */
> +	size_t	vfc_datasize;		/* [I] size of data args */
> };
> 
> /* buffer cache statistics */
> Index: sys/ufs/ffs/ffs_vfsops.c
> ===================================================================
> RCS file: /cvs/src/sys/ufs/ffs/ffs_vfsops.c,v
> diff -u -p -r1.198 ffs_vfsops.c
> --- sys/ufs/ffs/ffs_vfsops.c	3 Feb 2024 18:51:58 -0000	1.198
> +++ sys/ufs/ffs/ffs_vfsops.c	31 Oct 2024 12:21:07 -0000
> @@ -1458,6 +1458,12 @@ int
> ffs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
>     size_t newlen, struct proc *p)
> {
> -	return sysctl_bounded_arr(ffs_vars, nitems(ffs_vars), name,
> +	int error;
> +
> +	KERNEL_LOCK();
> +	error = sysctl_bounded_arr(ffs_vars, nitems(ffs_vars), name,
> 	    namelen, oldp, oldlenp, newp, newlen);
> +	KERNEL_UNLOCK();
> +
> +	return (error);
> }
>