Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
sysctl(2): push locks down to vfs_sysctl()
To:
tech@openbsd.org
Date:
Thu, 31 Oct 2024 15:27:53 +0300

Download raw body.

Thread
1. VFS_MAXTYPENUM

  `maxvfsconf' is immutable, after vfsinit() updates it. 

2. VFS_CONF

  The registered file systems array `vfsconflist' is immutable, so no
  locks required for vfs_bytypenum() and `vfsp' dereference.

  Except atomically modified `vfc_refcount', 'vfsconf' structure is
  immutable so use atomic_load_int(9) to load `vfc_refcount' value
  into `tmpvfsp' local copy.

3. VFS_BCACHESTAT

  The `tmpbcstats' buffer cache statistics is mutable, but kernel lock
  is enough to store it to local copy. So follow VFS_CONF way and
  allocate temporary `tmpbcstats' to pass statistics to userland.

4. Non generic filesystem case

  The optional (*vfs_sysctl)() handler returns EINVAL for all the cases
  except fusefs_sysctl(), nfs_sysctl() and ffs_sysctl().

   1. fusefs_sysctl() does read-only access to array of integers
      `fusefs_vars', so no locking required.

   2. nfs_sysctl() is complicated, so it was wrapped by sysctl_vslock()
      to keep original locking.

   3. ffs_sysctl() modifies array of integers `ffs_vars', so kernel lock
      is enough because sysctl_bounded_arr() does context switch safe
      load/store.

Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.451 kern_sysctl.c
--- sys/kern/kern_sysctl.c	31 Oct 2024 10:06:51 -0000	1.451
+++ sys/kern/kern_sysctl.c	31 Oct 2024 12:21:07 -0000
@@ -266,6 +266,7 @@ sys_sysctl(struct proc *p, void *v, regi
 		fn = fs_sysctl;
 		break;
 	case CTL_VFS:
+		dolock = 0;
 		fn = vfs_sysctl;
 		break;
 	case CTL_MACHDEP:
Index: sys/kern/vfs_init.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_init.c,v
diff -u -p -r1.44 vfs_init.c
--- sys/kern/vfs_init.c	20 May 2024 09:11:21 -0000	1.44
+++ sys/kern/vfs_init.c	31 Oct 2024 12:21:07 -0000
@@ -44,13 +44,18 @@
 #include <sys/vnode.h>
 #include <sys/pool.h>
 
+/*
+ * Locks used to protect data:
+ *	I	immutable after creation
+ */
+
 struct pool namei_pool;
 
 /* This defines the root filesystem. */
 struct vnode *rootvnode;
 
 /* Set up the filesystem operations for vnodes. */
-static struct vfsconf vfsconflist[] = {
+static struct vfsconf vfsconflist[] = {				/* [I] */
 #ifdef FFS
         { &ffs_vfsops, MOUNT_FFS, 1, 0, MNT_LOCAL | MNT_SWAPPABLE,
 	    sizeof(struct ufs_args) },
@@ -107,7 +112,7 @@ static struct vfsconf vfsconflist[] = {
  * Initially the size of the list, vfsinit will set maxvfsconf
  * to the highest defined type number.
  */
-int maxvfsconf = sizeof(vfsconflist) / sizeof(struct vfsconf);
+int maxvfsconf = sizeof(vfsconflist) / sizeof(struct vfsconf); /* [I] */
 
 /* Initialize the vnode structures and initialize each file system type. */
 void
Index: sys/kern/vfs_subr.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_subr.c,v
diff -u -p -r1.325 vfs_subr.c
--- sys/kern/vfs_subr.c	31 Oct 2024 10:06:51 -0000	1.325
+++ sys/kern/vfs_subr.c	31 Oct 2024 12:21:07 -0000
@@ -1375,6 +1375,7 @@ vfs_sysctl(int *name, u_int namelen, voi
     size_t newlen, struct proc *p)
 {
 	struct vfsconf *vfsp, *tmpvfsp;
+	struct bcachestats *tmpbcstats;
 	int ret;
 
 	/* all sysctl names at this level are at least name and field */
@@ -1402,10 +1403,14 @@ vfs_sysctl(int *name, u_int namelen, voi
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 
-		/* Make a copy, clear out kernel pointers */
+		/*
+		 * Make a copy, clear out kernel pointers,
+		 * atomically load refcount
+		 */
 		tmpvfsp = malloc(sizeof(*tmpvfsp), M_TEMP, M_WAITOK|M_ZERO);
 		memcpy(tmpvfsp, vfsp, sizeof(*tmpvfsp));
 		tmpvfsp->vfc_vfsops = NULL;
+		tmpvfsp->vfc_refcount = atomic_load_int(&vfsp->vfc_refcount);
 
 		ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpvfsp,
 		    sizeof(struct vfsconf));
@@ -1413,9 +1418,17 @@ vfs_sysctl(int *name, u_int namelen, voi
 		free(tmpvfsp, M_TEMP, sizeof(*tmpvfsp));
 		return (ret);
 	case VFS_BCACHESTAT:	/* buffer cache statistics */
-		ret = sysctl_rdstruct(oldp, oldlenp, newp, &bcstats,
-		    sizeof(struct bcachestats));
-		return(ret);
+		tmpbcstats = malloc(sizeof(*tmpbcstats), M_TEMP,
+		    M_WAITOK|M_ZERO);
+		KERNEL_LOCK();
+		memcpy(tmpbcstats, &bcstats, sizeof(*tmpbcstats));
+		KERNEL_UNLOCK();
+
+		ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpbcstats,
+		    sizeof(*tmpbcstats));
+
+		free(tmpbcstats, M_TEMP, sizeof(*tmpbcstats));
+		return (ret);
 	}
 	return (EOPNOTSUPP);
 }
Index: sys/nfs/nfs_vfsops.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_vfsops.c,v
diff -u -p -r1.132 nfs_vfsops.c
--- sys/nfs/nfs_vfsops.c	30 Oct 2024 06:16:27 -0000	1.132
+++ sys/nfs/nfs_vfsops.c	31 Oct 2024 12:21:07 -0000
@@ -65,6 +65,8 @@
 extern struct nfsstats nfsstats;
 extern int nfs_ticks;
 
+int	nfs_sysctl_locked(int *, u_int, void *, size_t *, void *, size_t,
+	    struct proc *);
 int	nfs_sysctl(int *, u_int, void *, size_t *, void *, size_t,
 	    struct proc *);
 int	nfs_checkexp(struct mount *, struct mbuf *, int *, struct ucred **);
@@ -844,8 +846,8 @@ nfs_vget(struct mount *mp, ino_t ino, st
  * Do that sysctl thang...
  */
 int
-nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen, struct proc *p)
+nfs_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen, struct proc *p)
 {
 	int rv;
 
@@ -892,6 +894,21 @@ nfs_sysctl(int *name, u_int namelen, voi
 	}
 }
 
+int
+nfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen, struct proc *p)
+{
+	size_t savelen = *oldlenp;
+	int error;
+
+	if ((error = sysctl_vslock(oldp, savelen)))
+		return error;
+	error = nfs_sysctl_locked(name, namelen, oldp, oldlenp,
+	    newp, newlen, p);	
+	sysctl_vsunlock(oldp, savelen);
+
+	return error;
+}
 
 /*
  * At this point, this should never happen
Index: sys/sys/mount.h
===================================================================
RCS file: /cvs/src/sys/sys/mount.h,v
diff -u -p -r1.151 mount.h
--- sys/sys/mount.h	3 Feb 2024 18:51:58 -0000	1.151
+++ sys/sys/mount.h	31 Oct 2024 12:21:07 -0000
@@ -42,6 +42,12 @@
 #include <sys/queue.h>
 #include <sys/rwlock.h>
 
+/*
+ * Locks used to protect data:
+ *	I	immutable after creation
+ *	a	atomic
+ */
+
 typedef struct { int32_t val[2]; } fsid_t;	/* file system id type */
 
 /*
@@ -456,12 +462,13 @@ typedef struct fhandle	fhandle_t;
  * mount time to identify the requested filesystem.
  */
 struct vfsconf {
-	const struct vfsops *vfc_vfsops; /* filesystem operations vector */
-	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
-	int	vfc_typenum;		/* historic filesystem type number */
-	u_int	vfc_refcount;		/* number mounted of this type */
-	int	vfc_flags;		/* permanent flags */
-	size_t	vfc_datasize;		/* size of data args */
+	const struct vfsops *vfc_vfsops; /* [I] filesystem operations vector */
+	char	vfc_name[MFSNAMELEN];	/* [I] filesystem type name */
+	int	vfc_typenum;		/* [I] historic filesystem type
+					    number */
+	u_int	vfc_refcount;		/* [a] number mounted of this type */
+	int	vfc_flags;		/* [I] permanent flags */
+	size_t	vfc_datasize;		/* [I] size of data args */
 };
 
 /* buffer cache statistics */
Index: sys/ufs/ffs/ffs_vfsops.c
===================================================================
RCS file: /cvs/src/sys/ufs/ffs/ffs_vfsops.c,v
diff -u -p -r1.198 ffs_vfsops.c
--- sys/ufs/ffs/ffs_vfsops.c	3 Feb 2024 18:51:58 -0000	1.198
+++ sys/ufs/ffs/ffs_vfsops.c	31 Oct 2024 12:21:07 -0000
@@ -1458,6 +1458,12 @@ int
 ffs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen, struct proc *p)
 {
-	return sysctl_bounded_arr(ffs_vars, nitems(ffs_vars), name,
+	int error;
+
+	KERNEL_LOCK();
+	error = sysctl_bounded_arr(ffs_vars, nitems(ffs_vars), name,
 	    namelen, oldp, oldlenp, newp, newlen);
+	KERNEL_UNLOCK();
+
+	return (error);
 }