From: Martin Pieuchot Subject: Use foffset() to push the KERNEL_LOCK() To: anton@openbsd.org, visa@openbsd.org Cc: tech@openbsd.org Date: Mon, 27 Oct 2025 10:08:08 +0100 Diff below uses foffset() in vn_read() and getdents(2) to push the KERNEL_LOCK() a bit further. This is not much in itself but is a requirement for upcoming changes. By reading the current offset before grabbing the vnode lock we accept a race: another thread could change it. While here, grab the corresponding mutex before writing `f_offset' in dt(4). ok? Index: kern/syscalls.master =================================================================== RCS file: /cvs/src/sys/kern/syscalls.master,v diff -u -p -r1.270 syscalls.master --- kern/syscalls.master 24 May 2025 06:49:16 -0000 1.270 +++ kern/syscalls.master 27 Oct 2025 08:49:58 -0000 @@ -213,7 +213,7 @@ 97 STD NOLOCK { int sys_socket(int domain, int type, int protocol); } 98 STD NOLOCK { int sys_connect(int s, const struct sockaddr *name, \ socklen_t namelen); } -99 STD { int sys_getdents(int fd, void *buf, size_t buflen); } +99 STD NOLOCK { int sys_getdents(int fd, void *buf, size_t buflen); } 100 STD { int sys_getpriority(int which, id_t who); } 101 STD NOLOCK { int sys_pipe2(int *fdp, int flags); } 102 STD NOLOCK { int sys_dup3(int from, int to, int flags); } Index: kern/vfs_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v diff -u -p -r1.378 vfs_syscalls.c --- kern/vfs_syscalls.c 20 Sep 2025 13:53:36 -0000 1.378 +++ kern/vfs_syscalls.c 27 Oct 2025 08:51:30 -0000 @@ -3146,6 +3146,7 @@ sys_getdents(struct proc *p, void *v, re struct iovec aiov; size_t buflen; int error, eofflag; + off_t offset; buflen = SCARG(uap, buflen); @@ -3163,10 +3164,8 @@ sys_getdents(struct proc *p, void *v, re goto bad; } - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - - if (fp->f_offset < 0) { - VOP_UNLOCK(vp); + offset = foffset(fp); + if (offset < 0) { error = EINVAL; goto bad; } @@ -3179,12 +3178,15 @@ sys_getdents(struct proc *p, void *v, re auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; auio.uio_resid = buflen; - auio.uio_offset = fp->f_offset; + auio.uio_offset = offset; + KERNEL_LOCK(); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag); mtx_enter(&fp->f_mtx); fp->f_offset = auio.uio_offset; mtx_leave(&fp->f_mtx); VOP_UNLOCK(vp); + KERNEL_UNLOCK(); if (error) goto bad; *retval = buflen - auio.uio_resid; Index: kern/vfs_vnops.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_vnops.c,v diff -u -p -r1.125 vfs_vnops.c --- kern/vfs_vnops.c 6 Jan 2025 08:57:23 -0000 1.125 +++ kern/vfs_vnops.c 27 Oct 2025 08:47:55 -0000 @@ -346,26 +346,20 @@ vn_read(struct file *fp, struct uio *uio off_t offset; int error; - KERNEL_LOCK(); - - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - if ((fflags & FO_POSITION) == 0) - offset = uio->uio_offset = fp->f_offset; + offset = uio->uio_offset = foffset(fp); else offset = uio->uio_offset; /* no wrap around of offsets except on character devices */ - if (vp->v_type != VCHR && count > LLONG_MAX - offset) { - error = EINVAL; - goto done; - } + if (vp->v_type != VCHR && count > LLONG_MAX - offset) + return (EINVAL); - if (vp->v_type == VDIR) { - error = EISDIR; - goto done; - } + if (vp->v_type == VDIR) + return (EISDIR); + KERNEL_LOCK(); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred); if ((fflags & FO_POSITION) == 0) { @@ -373,7 +367,6 @@ vn_read(struct file *fp, struct uio *uio fp->f_offset += count - uio->uio_resid; mtx_leave(&fp->f_mtx); } -done: VOP_UNLOCK(vp); KERNEL_UNLOCK(); return (error); Index: dev/dt/dt_dev.c =================================================================== RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v diff -u -p -r1.46 dt_dev.c --- dev/dt/dt_dev.c 22 Sep 2025 07:49:43 -0000 1.46 +++ dev/dt/dt_dev.c 27 Oct 2025 08:43:27 -0000 @@ -711,7 +711,9 @@ dt_ioctl_rd_vnode(struct dt_softc *sc, s fp->f_type = DTYPE_VNODE; fp->f_ops = &vnops; fp->f_data = vn; + mtx_enter(&fp->f_mtx); fp->f_offset = 0; + mtx_leave(&fp->f_mtx); dtrv->dtrv_fd = fd; fdplock(p->p_fd); fdinsert(p->p_fd, fd, UF_EXCLOSE, fp);