From: Helg Subject: FUSE - read whole fusebuf v2 To: tech@openbsd.org Date: Wed, 3 Sep 2025 19:50:36 +0200 Hi tech@ I've taken Claudio's feedback onboard and updated my diff, and it's all the better for it. - no more #ifdef _Kernel in fusebuf.h - libfuse uses readv(2) and writev(2) to read the fusebuf and any io data into an allocated buffer - the libfuse buffer is 4MB since that's the max size that the kernel can send - additional checks in the kernel and libfuse - MIN(a,b) macro is gone This is the first of a series of changes required to make the fuse kernel interface compatible with other patforms. It updates /dev/fuse0 so that fusebufs must be read and written in their entirety. Currently, ioctls are needed to to read and write any io data associated with a syscall such as read(2), readdir(3), readlinki(2) and write(2). ok? Index: lib/libfuse/fuse.c =================================================================== RCS file: /cvs/src/lib/libfuse/fuse.c,v diff -u -p -r1.53 fuse.c --- lib/libfuse/fuse.c 2 Sep 2025 17:18:40 -0000 1.53 +++ lib/libfuse/fuse.c 4 Sep 2025 14:56:25 -0000 @@ -17,7 +17,7 @@ #include #include -#include +#include #include @@ -154,9 +154,10 @@ fuse_loop(struct fuse *fuse) { struct fusebuf fbuf; struct fuse_context ctx; - struct fb_ioctl_xch ioexch; struct kevent event[5]; struct kevent ev; + struct iovec iov[2]; + size_t fb_dat_size = FUSEBUFMAXSIZE; ssize_t n; int ret; @@ -180,6 +181,14 @@ fuse_loop(struct fuse *fuse) EV_SET(&event[4], SIGTERM, EVFILT_SIGNAL, EV_ADD | EV_ENABLE, 0, 0, 0); + /* prepare the read and write data buffer */ + fbuf.fb_dat = malloc(fb_dat_size); + if (fbuf.fb_dat == NULL) + return (-1); + iov[0].iov_base = &fbuf; + iov[0].iov_len = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD); + iov[1].iov_base = fbuf.fb_dat; + while (!fuse->fc->dead) { ret = kevent(fuse->fc->kq, &event[0], 5, &ev, 1, NULL); if (ret == -1) { @@ -201,27 +210,38 @@ fuse_loop(struct fuse *fuse) strsignal(signum)); } } else if (ret > 0) { - n = read(fuse->fc->fd, &fbuf, sizeof(fbuf)); - if (n != sizeof(fbuf)) { + iov[1].iov_len = fb_dat_size; + n = readv(fuse->fc->fd, iov, 2); + if (n == -1) { + perror("fuse_loop"); + fprintf(stderr, "%s: bad fusebuf read: %s\n", + __func__, strerror(errno)); + free(fbuf.fb_dat); + return (-1); + } + if (n < (ssize_t)sizeof(fbuf.fb_hdr)) { fprintf(stderr, "%s: bad fusebuf read\n", __func__); + free(fbuf.fb_dat); + return (-1); + } + if ((size_t)n != sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) + + fbuf.fb_len) { + fprintf(stderr, "%s: bad fusebuf read\n", + __func__); + free(fbuf.fb_dat); return (-1); } - /* check if there is data something present */ - if (fbuf.fb_len) { - fbuf.fb_dat = malloc(fbuf.fb_len); - if (fbuf.fb_dat == NULL) - return (-1); - ioexch.fbxch_uuid = fbuf.fb_uuid; - ioexch.fbxch_len = fbuf.fb_len; - ioexch.fbxch_data = fbuf.fb_dat; - - if (ioctl(fuse->fc->fd, FIOCGETFBDAT, - &ioexch) == -1) { - free(fbuf.fb_dat); - return (-1); - } + /* + * fuse_ops check that they do not write more than + * fb_io_len when writing to fb_dat + */ + if (fbuf.fb_io_len > fb_dat_size) { + fprintf(stderr, "%s: io exceeds buffer size\n", + __func__); + free(fbuf.fb_dat); + return (-1); } ctx.fuse = fuse; @@ -235,35 +255,25 @@ fuse_loop(struct fuse *fuse) ret = ifuse_exec_opcode(fuse, &fbuf); if (ret) { ictx = NULL; + free(fbuf.fb_dat); return (-1); } - n = write(fuse->fc->fd, &fbuf, sizeof(fbuf)); - if (fbuf.fb_len) { - if (fbuf.fb_dat == NULL) { - fprintf(stderr, "%s: fb_dat is Null\n", - __func__); - return (-1); - } - ioexch.fbxch_uuid = fbuf.fb_uuid; - ioexch.fbxch_len = fbuf.fb_len; - ioexch.fbxch_data = fbuf.fb_dat; - - if (ioctl(fuse->fc->fd, FIOCSETFBDAT, &ioexch) == -1) { - free(fbuf.fb_dat); - return (-1); - } - free(fbuf.fb_dat); - } + iov[1].iov_len = fbuf.fb_len; + n = writev(fuse->fc->fd, iov, 2); + ictx = NULL; - if (n != FUSEBUFSIZE) { + if (n == -1 || (size_t)n != sizeof(fbuf.fb_hdr) + + sizeof(fbuf.FD) + fbuf.fb_len) { errno = EINVAL; + free(fbuf.fb_dat); return (-1); } } } + free(fbuf.fb_dat); return (0); } DEF(fuse_loop); Index: lib/libfuse/fuse_ops.c =================================================================== RCS file: /cvs/src/lib/libfuse/fuse_ops.c,v diff -u -p -r1.37 fuse_ops.c --- lib/libfuse/fuse_ops.c 2 Sep 2025 17:18:40 -0000 1.37 +++ lib/libfuse/fuse_ops.c 4 Sep 2025 14:56:25 -0000 @@ -317,17 +317,9 @@ ifuse_ops_readdir(struct fuse *f, struct offset = fbuf->fb_io_off; size = fbuf->fb_io_len; - fbuf->fb_dat = calloc(1, size); - - if (fbuf->fb_dat == NULL) { - fbuf->fb_err = -errno; - return (0); - } - vn = tree_get(&f->vnode_tree, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -344,7 +336,6 @@ ifuse_ops_readdir(struct fuse *f, struct realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -362,9 +353,6 @@ ifuse_ops_readdir(struct fuse *f, struct else if (fd.full && fbuf->fb_len == 0) fbuf->fb_err = -ENOBUFS; - if (fbuf->fb_len == 0) - free(fbuf->fb_dat); - return (0); } @@ -526,7 +514,6 @@ ifuse_ops_lookup(struct fuse *f, struct fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } set_vn(f, vn); /*XXX*/ @@ -537,13 +524,11 @@ ifuse_ops_lookup(struct fuse *f, struct realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn); fbuf->fb_ino = vn->ino; - free(fbuf->fb_dat); free(realname); return (0); @@ -566,23 +551,15 @@ ifuse_ops_read(struct fuse *f, struct fu size = fbuf->fb_io_len; offset = fbuf->fb_io_off; - fbuf->fb_dat = malloc(size); - if (fbuf->fb_dat == NULL) { - fbuf->fb_err = -errno; - return (0); - } - vn = tree_get(&f->vnode_tree, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -593,9 +570,6 @@ ifuse_ops_read(struct fuse *f, struct fu else fbuf->fb_err = ret; - if (fbuf->fb_len == 0) - free(fbuf->fb_dat); - return (0); } @@ -621,20 +595,17 @@ ifuse_ops_write(struct fuse *f, struct f vn = tree_get(&f->vnode_tree, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } ret = f->op.write(realname, (char *)fbuf->fb_dat, size, offset, &ffi); free(realname); - free(fbuf->fb_dat); if (ret >= 0) fbuf->fb_io_len = ret; @@ -657,11 +628,9 @@ ifuse_ops_mkdir(struct fuse *f, struct f vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } - free(fbuf->fb_dat); realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; @@ -690,11 +659,9 @@ ifuse_ops_rmdir(struct fuse *f, struct f vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } - free(fbuf->fb_dat); realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; @@ -740,11 +707,6 @@ ifuse_ops_readlink(struct fuse *f, struc if (!ret) { len = strnlen(name, PATH_MAX); fbuf->fb_len = len; - fbuf->fb_dat = malloc(fbuf->fb_len); - if (fbuf->fb_dat == NULL) { - fbuf->fb_err = -errno; - return (0); - } memcpy(fbuf->fb_dat, name, len); } else fbuf->fb_len = 0; @@ -762,12 +724,10 @@ ifuse_ops_unlink(struct fuse *f, struct vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { - free(fbuf->fb_dat); fbuf->fb_err = -errno; return (0); } - free(fbuf->fb_dat); realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; @@ -821,11 +781,9 @@ ifuse_ops_link(struct fuse *f, struct fu vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } - free(fbuf->fb_dat); realname = build_realname(f, oldnodeid); if (realname == NULL) { fbuf->fb_err = -errno; @@ -863,14 +821,12 @@ ifuse_ops_setattr(struct fuse *f, struct vn = tree_get(&f->vnode_tree, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } io = fbtod(fbuf, struct fb_io *); @@ -923,7 +879,6 @@ ifuse_ops_setattr(struct fuse *f, struct if (!fbuf->fb_err) fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn); free(realname); - free(fbuf->fb_dat); return (0); } @@ -940,7 +895,6 @@ ifuse_ops_symlink(unused struct fuse *f, vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -949,7 +903,6 @@ ifuse_ops_symlink(unused struct fuse *f, realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -957,7 +910,6 @@ ifuse_ops_symlink(unused struct fuse *f, fbuf->fb_err = f->op.symlink((const char *)&fbuf->fb_dat[len + 1], realname); fbuf->fb_ino = vn->ino; - free(fbuf->fb_dat); free(realname); return (0); @@ -978,7 +930,6 @@ ifuse_ops_rename(struct fuse *f, struct vnf = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vnf == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } @@ -986,12 +937,9 @@ ifuse_ops_rename(struct fuse *f, struct fbuf->fb_io_ino); if (vnt == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } - free(fbuf->fb_dat); - realnamef = build_realname(f, vnf->ino); if (realnamef == NULL) { fbuf->fb_err = -errno; @@ -1060,11 +1008,9 @@ ifuse_ops_mknod(struct fuse *f, struct f vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); if (vn == NULL) { fbuf->fb_err = -errno; - free(fbuf->fb_dat); return (0); } - free(fbuf->fb_dat); realname = build_realname(f, vn->ino); if (realname == NULL) { fbuf->fb_err = -errno; Index: sys/miscfs/fuse/fuse_device.c =================================================================== RCS file: /cvs/src/sys/miscfs/fuse/fuse_device.c,v diff -u -p -r1.42 fuse_device.c --- sys/miscfs/fuse/fuse_device.c 2 Sep 2025 17:18:40 -0000 1.42 +++ sys/miscfs/fuse/fuse_device.c 4 Sep 2025 14:56:44 -0000 @@ -404,13 +404,8 @@ fuseread(dev_t dev, struct uio *uio, int struct fuse_d *fd; struct fusebuf *fbuf; struct fb_hdr hdr; - void *tmpaddr; int error = 0; - /* We get the whole fusebuf or nothing */ - if (uio->uio_resid != FUSEBUFSIZE) - return (EINVAL); - fd = fuse_lookup(minor(dev)); if (fd == NULL) return (ENXIO); @@ -424,29 +419,43 @@ fuseread(dev_t dev, struct uio *uio, int } fbuf = SIMPLEQ_FIRST(&fd->fd_fbufs_in); + /* We get the whole fusebuf or nothing */ + if (uio->uio_resid < sizeof(fbuf->fb_hdr) + sizeof(fbuf->FD) + + fbuf->fb_len) { + error = EINVAL; + goto end; + } + /* Do not send kernel pointers */ memcpy(&hdr.fh_next, &fbuf->fb_next, sizeof(fbuf->fb_next)); memset(&fbuf->fb_next, 0, sizeof(fbuf->fb_next)); - tmpaddr = fbuf->fb_dat; - fbuf->fb_dat = NULL; - error = uiomove(fbuf, FUSEBUFSIZE, uio); + + error = uiomove(&fbuf->fb_hdr, sizeof(fbuf->fb_hdr), uio); if (error) goto end; + error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio); + if (error) + goto end; + if (fbuf->fb_len > 0) { + error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio); + if (error) + goto end; + } #ifdef FUSE_DEBUG - fuse_dump_buff((char *)fbuf, FUSEBUFSIZE); + fuse_dump_buff((char *)fbuf, sizeof(struct fusebuf)); #endif /* Restore kernel pointers */ memcpy(&fbuf->fb_next, &hdr.fh_next, sizeof(fbuf->fb_next)); - fbuf->fb_dat = tmpaddr; - /* Remove the fbuf if it does not contains data */ - if (fbuf->fb_len == 0) { - SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next); - stat_fbufs_in--; - SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next); - stat_fbufs_wait++; - } + free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len); + fbuf->fb_dat = NULL; + + /* Move the fbuf to the wait queue */ + SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next); + stat_fbufs_in--; + SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next); + stat_fbufs_wait++; end: rw_exit_write(&fd->fd_lock); @@ -466,13 +475,21 @@ fusewrite(dev_t dev, struct uio *uio, in if (fd == NULL) return (ENXIO); - /* We get the whole fusebuf or nothing */ - if (uio->uio_resid != FUSEBUFSIZE) + /* Check for sanity - must receive more than just the header */ + if (uio->uio_resid <= sizeof(hdr)) return (EINVAL); if ((error = uiomove(&hdr, sizeof(hdr), uio)) != 0) return (error); + /* Check for sanity */ + if (hdr.fh_len > FUSEBUFMAXSIZE) + return (EINVAL); + + /* We get the whole fusebuf or nothing */ + if (uio->uio_resid != sizeof(fbuf->FD) + hdr.fh_len) + return (EINVAL); + /* looking for uuid in fd_fbufs_wait */ SIMPLEQ_FOREACH(fbuf, &fd->fd_fbufs_wait, fb_next) { if (fbuf->fb_uuid == hdr.fh_uuid) @@ -497,12 +514,24 @@ fusewrite(dev_t dev, struct uio *uio, in } /* Get the missing data from the fbuf */ - error = uiomove(&fbuf->FD, uio->uio_resid, uio); + error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio); if (error) return error; + fbuf->fb_dat = NULL; + if (fbuf->fb_len > 0) { + fbuf->fb_dat = malloc(fbuf->fb_len, M_FUSEFS, + M_WAITOK | M_ZERO); + error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio); + if (error) { + free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len); + fbuf->fb_dat = NULL; + return (error); + } + } + #ifdef FUSE_DEBUG - fuse_dump_buff((char *)fbuf, FUSEBUFSIZE); + fuse_dump_buff((char *)fbuf, sizeof(struct fusebuf)); #endif switch (fbuf->fb_type) { @@ -514,19 +543,17 @@ fusewrite(dev_t dev, struct uio *uio, in break ; } end: - /* Remove the fbuf if it does not contains data */ - if (fbuf->fb_len == 0) { - if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait)) - SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next); - else - SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf, - fb_next); - stat_fbufs_wait--; - if (fbuf->fb_type == FBT_INIT) - fb_delete(fbuf); - else - wakeup(fbuf); - } + /* Remove the fbuf from the wait queue */ + if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait)) + SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next); + else + SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf, + fb_next); + stat_fbufs_wait--; + if (fbuf->fb_type == FBT_INIT) + fb_delete(fbuf); + else + wakeup(fbuf); return (error); } Index: sys/sys/fusebuf.h =================================================================== RCS file: /cvs/src/sys/sys/fusebuf.h,v diff -u -p -r1.15 fusebuf.h --- sys/sys/fusebuf.h 2 Sep 2025 17:18:40 -0000 1.15 +++ sys/sys/fusebuf.h 4 Sep 2025 14:56:44 -0000 @@ -20,9 +20,9 @@ #define _SYS_FUSEBUF_H_ /* - * Fusebufs are of a single size, 4096 bytes. + * Maximum size of the read or write buffer sent from the kernel for VFS + * syscalls: read, readdir, readlink, write. */ -#define FUSEBUFSIZE (sizeof(struct fusebuf)) #define FUSEBUFMAXSIZE (4096*1024) /* header at beginning of each fusebuf: */