Download raw body.
Use foffset() to push the KERNEL_LOCK()
On Mon, Oct 27, 2025 at 10:08:08AM +0100, Martin Pieuchot wrote:
> Diff below uses foffset() in vn_read() and getdents(2) to push the
> KERNEL_LOCK() a bit further. This is not much in itself but is a
> requirement for upcoming changes.
>
> By reading the current offset before grabbing the vnode lock we
> accept a race: another thread could change it.
>
> While here, grab the corresponding mutex before writing `f_offset' in
> dt(4).
>
> ok?
>
As I understand the code, vn_lock() and VOP_LOCK() paths do not require
kernel lock to be held, so you can leave vn_lock() as is and avoid
race for `f_offset'.
> Index: kern/syscalls.master
> ===================================================================
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> diff -u -p -r1.270 syscalls.master
> --- kern/syscalls.master 24 May 2025 06:49:16 -0000 1.270
> +++ kern/syscalls.master 27 Oct 2025 08:49:58 -0000
> @@ -213,7 +213,7 @@
> 97 STD NOLOCK { int sys_socket(int domain, int type, int protocol); }
> 98 STD NOLOCK { int sys_connect(int s, const struct sockaddr *name, \
> socklen_t namelen); }
> -99 STD { int sys_getdents(int fd, void *buf, size_t buflen); }
> +99 STD NOLOCK { int sys_getdents(int fd, void *buf, size_t buflen); }
> 100 STD { int sys_getpriority(int which, id_t who); }
> 101 STD NOLOCK { int sys_pipe2(int *fdp, int flags); }
> 102 STD NOLOCK { int sys_dup3(int from, int to, int flags); }
> Index: kern/vfs_syscalls.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
> diff -u -p -r1.378 vfs_syscalls.c
> --- kern/vfs_syscalls.c 20 Sep 2025 13:53:36 -0000 1.378
> +++ kern/vfs_syscalls.c 27 Oct 2025 08:51:30 -0000
> @@ -3146,6 +3146,7 @@ sys_getdents(struct proc *p, void *v, re
> struct iovec aiov;
> size_t buflen;
> int error, eofflag;
> + off_t offset;
>
> buflen = SCARG(uap, buflen);
>
> @@ -3163,10 +3164,8 @@ sys_getdents(struct proc *p, void *v, re
> goto bad;
> }
>
> - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
> -
> - if (fp->f_offset < 0) {
> - VOP_UNLOCK(vp);
> + offset = foffset(fp);
> + if (offset < 0) {
> error = EINVAL;
> goto bad;
> }
> @@ -3179,12 +3178,15 @@ sys_getdents(struct proc *p, void *v, re
> auio.uio_segflg = UIO_USERSPACE;
> auio.uio_procp = p;
> auio.uio_resid = buflen;
> - auio.uio_offset = fp->f_offset;
> + auio.uio_offset = offset;
> + KERNEL_LOCK();
> + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
> error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag);
> mtx_enter(&fp->f_mtx);
> fp->f_offset = auio.uio_offset;
> mtx_leave(&fp->f_mtx);
> VOP_UNLOCK(vp);
> + KERNEL_UNLOCK();
> if (error)
> goto bad;
> *retval = buflen - auio.uio_resid;
> Index: kern/vfs_vnops.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/vfs_vnops.c,v
> diff -u -p -r1.125 vfs_vnops.c
> --- kern/vfs_vnops.c 6 Jan 2025 08:57:23 -0000 1.125
> +++ kern/vfs_vnops.c 27 Oct 2025 08:47:55 -0000
> @@ -346,26 +346,20 @@ vn_read(struct file *fp, struct uio *uio
> off_t offset;
> int error;
>
> - KERNEL_LOCK();
> -
> - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
> -
> if ((fflags & FO_POSITION) == 0)
> - offset = uio->uio_offset = fp->f_offset;
> + offset = uio->uio_offset = foffset(fp);
> else
> offset = uio->uio_offset;
>
> /* no wrap around of offsets except on character devices */
> - if (vp->v_type != VCHR && count > LLONG_MAX - offset) {
> - error = EINVAL;
> - goto done;
> - }
> + if (vp->v_type != VCHR && count > LLONG_MAX - offset)
> + return (EINVAL);
>
> - if (vp->v_type == VDIR) {
> - error = EISDIR;
> - goto done;
> - }
> + if (vp->v_type == VDIR)
> + return (EISDIR);
>
> + KERNEL_LOCK();
> + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
> error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0,
> cred);
> if ((fflags & FO_POSITION) == 0) {
> @@ -373,7 +367,6 @@ vn_read(struct file *fp, struct uio *uio
> fp->f_offset += count - uio->uio_resid;
> mtx_leave(&fp->f_mtx);
> }
> -done:
> VOP_UNLOCK(vp);
> KERNEL_UNLOCK();
> return (error);
> Index: dev/dt/dt_dev.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
> diff -u -p -r1.46 dt_dev.c
> --- dev/dt/dt_dev.c 22 Sep 2025 07:49:43 -0000 1.46
> +++ dev/dt/dt_dev.c 27 Oct 2025 08:43:27 -0000
> @@ -711,7 +711,9 @@ dt_ioctl_rd_vnode(struct dt_softc *sc, s
> fp->f_type = DTYPE_VNODE;
> fp->f_ops = &vnops;
> fp->f_data = vn;
> + mtx_enter(&fp->f_mtx);
> fp->f_offset = 0;
> + mtx_leave(&fp->f_mtx);
> dtrv->dtrv_fd = fd;
> fdplock(p->p_fd);
> fdinsert(p->p_fd, fd, UF_EXCLOSE, fp);
>
>
Use foffset() to push the KERNEL_LOCK()