Index | Thread | Search

From:
Martin Pieuchot <mpi@grenadille.net>
Subject:
Use foffset() to push the KERNEL_LOCK()
To:
anton@openbsd.org, visa@openbsd.org
Cc:
tech@openbsd.org
Date:
Mon, 27 Oct 2025 10:08:08 +0100

Download raw body.

Thread
Diff below uses foffset() in vn_read() and getdents(2) to push the
KERNEL_LOCK() a bit further.  This is not much in itself but is a
requirement for upcoming changes.

By reading the current offset before grabbing the vnode lock we
accept a race: another thread could change it.

While here, grab the corresponding mutex before writing `f_offset' in
dt(4).

ok?

Index: kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
diff -u -p -r1.270 syscalls.master
--- kern/syscalls.master	24 May 2025 06:49:16 -0000	1.270
+++ kern/syscalls.master	27 Oct 2025 08:49:58 -0000
@@ -213,7 +213,7 @@
 97	STD NOLOCK	{ int sys_socket(int domain, int type, int protocol); }
 98	STD NOLOCK	{ int sys_connect(int s, const struct sockaddr *name, \
 			    socklen_t namelen); }
-99	STD		{ int sys_getdents(int fd, void *buf, size_t buflen); }
+99	STD NOLOCK	{ int sys_getdents(int fd, void *buf, size_t buflen); }
 100	STD		{ int sys_getpriority(int which, id_t who); }
 101	STD NOLOCK	{ int sys_pipe2(int *fdp, int flags); }
 102	STD NOLOCK	{ int sys_dup3(int from, int to, int flags); }
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
diff -u -p -r1.378 vfs_syscalls.c
--- kern/vfs_syscalls.c	20 Sep 2025 13:53:36 -0000	1.378
+++ kern/vfs_syscalls.c	27 Oct 2025 08:51:30 -0000
@@ -3146,6 +3146,7 @@ sys_getdents(struct proc *p, void *v, re
 	struct iovec aiov;
 	size_t buflen;
 	int error, eofflag;
+	off_t offset;
 
 	buflen = SCARG(uap, buflen);
 
@@ -3163,10 +3164,8 @@ sys_getdents(struct proc *p, void *v, re
 		goto bad;
 	}
 
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-
-	if (fp->f_offset < 0) {
-		VOP_UNLOCK(vp);
+	offset = foffset(fp);
+	if (offset < 0) {
 		error = EINVAL;
 		goto bad;
 	}
@@ -3179,12 +3178,15 @@ sys_getdents(struct proc *p, void *v, re
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 	auio.uio_resid = buflen;
-	auio.uio_offset = fp->f_offset;
+	auio.uio_offset = offset;
+	KERNEL_LOCK();
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
 	mtx_enter(&fp->f_mtx);
 	fp->f_offset = auio.uio_offset;
 	mtx_leave(&fp->f_mtx);
 	VOP_UNLOCK(vp);
+	KERNEL_UNLOCK();
 	if (error)
 		goto bad;
 	*retval = buflen - auio.uio_resid;
Index: kern/vfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_vnops.c,v
diff -u -p -r1.125 vfs_vnops.c
--- kern/vfs_vnops.c	6 Jan 2025 08:57:23 -0000	1.125
+++ kern/vfs_vnops.c	27 Oct 2025 08:47:55 -0000
@@ -346,26 +346,20 @@ vn_read(struct file *fp, struct uio *uio
 	off_t offset;
 	int error;
 
-	KERNEL_LOCK();
-
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-
 	if ((fflags & FO_POSITION) == 0)
-		offset = uio->uio_offset = fp->f_offset;
+		offset = uio->uio_offset = foffset(fp);
 	else
 		offset = uio->uio_offset;
 
 	/* no wrap around of offsets except on character devices */
-	if (vp->v_type != VCHR && count > LLONG_MAX - offset) {
-		error = EINVAL;
-		goto done;
-	}
+	if (vp->v_type != VCHR && count > LLONG_MAX - offset)
+		return (EINVAL);
 
-	if (vp->v_type == VDIR) {
-		error = EISDIR;
-		goto done;
-	}
+	if (vp->v_type == VDIR)
+		return (EISDIR);
 
+	KERNEL_LOCK();
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0,
 	    cred);
 	if ((fflags & FO_POSITION) == 0) {
@@ -373,7 +367,6 @@ vn_read(struct file *fp, struct uio *uio
 		fp->f_offset += count - uio->uio_resid;
 		mtx_leave(&fp->f_mtx);
 	}
-done:
 	VOP_UNLOCK(vp);
 	KERNEL_UNLOCK();
 	return (error);
Index: dev/dt/dt_dev.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
diff -u -p -r1.46 dt_dev.c
--- dev/dt/dt_dev.c	22 Sep 2025 07:49:43 -0000	1.46
+++ dev/dt/dt_dev.c	27 Oct 2025 08:43:27 -0000
@@ -711,7 +711,9 @@ dt_ioctl_rd_vnode(struct dt_softc *sc, s
 			fp->f_type = DTYPE_VNODE;
 			fp->f_ops = &vnops;
 			fp->f_data = vn;
+			mtx_enter(&fp->f_mtx);
 			fp->f_offset = 0;
+			mtx_leave(&fp->f_mtx);
 			dtrv->dtrv_fd = fd;
 			fdplock(p->p_fd);
 			fdinsert(p->p_fd, fd, UF_EXCLOSE, fp);