From: Claudio Jeker Subject: Re: FUSE - read whole fusebuf v2 To: Helg Cc: tech@openbsd.org Date: Fri, 5 Sep 2025 22:57:27 +0200 On Wed, Sep 03, 2025 at 07:50:36PM +0200, Helg wrote: > Hi tech@ > > I've taken Claudio's feedback onboard and updated my diff, and it's all > the better for it. > > - no more #ifdef _Kernel in fusebuf.h > - libfuse uses readv(2) and writev(2) to read the fusebuf and any io data > into an allocated buffer > - the libfuse buffer is 4MB since that's the max size that the kernel > can send > - additional checks in the kernel and libfuse > - MIN(a,b) macro is gone > > This is the first of a series of changes required to make the fuse > kernel interface compatible with other patforms. It updates /dev/fuse0 so > that fusebufs must be read and written in their entirety. Currently, > ioctls are needed to to read and write any io data associated with a > syscall such as read(2), readdir(3), readlinki(2) and write(2). > > ok? I don't really use any fuse fs so I did test this. I reviewed it. OK claudio@ > Index: lib/libfuse/fuse.c > =================================================================== > RCS file: /cvs/src/lib/libfuse/fuse.c,v > diff -u -p -r1.53 fuse.c > --- lib/libfuse/fuse.c 2 Sep 2025 17:18:40 -0000 1.53 > +++ lib/libfuse/fuse.c 4 Sep 2025 14:56:25 -0000 > @@ -17,7 +17,7 @@ > > #include > #include > -#include > +#include > > #include > > @@ -154,9 +154,10 @@ fuse_loop(struct fuse *fuse) > { > struct fusebuf fbuf; > struct fuse_context ctx; > - struct fb_ioctl_xch ioexch; > struct kevent event[5]; > struct kevent ev; > + struct iovec iov[2]; > + size_t fb_dat_size = FUSEBUFMAXSIZE; > ssize_t n; > int ret; > > @@ -180,6 +181,14 @@ fuse_loop(struct fuse *fuse) > EV_SET(&event[4], SIGTERM, EVFILT_SIGNAL, EV_ADD | > EV_ENABLE, 0, 0, 0); > > + /* prepare the read and write data buffer */ > + fbuf.fb_dat = malloc(fb_dat_size); > + if (fbuf.fb_dat == NULL) > + return (-1); > + iov[0].iov_base = &fbuf; > + iov[0].iov_len = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD); > + iov[1].iov_base = fbuf.fb_dat; > + > while (!fuse->fc->dead) { > ret = kevent(fuse->fc->kq, &event[0], 5, &ev, 1, NULL); > if (ret == -1) { > @@ -201,27 +210,38 @@ fuse_loop(struct fuse *fuse) > strsignal(signum)); > } > } else if (ret > 0) { > - n = read(fuse->fc->fd, &fbuf, sizeof(fbuf)); > - if (n != sizeof(fbuf)) { > + iov[1].iov_len = fb_dat_size; > + n = readv(fuse->fc->fd, iov, 2); > + if (n == -1) { > + perror("fuse_loop"); > + fprintf(stderr, "%s: bad fusebuf read: %s\n", > + __func__, strerror(errno)); > + free(fbuf.fb_dat); > + return (-1); > + } > + if (n < (ssize_t)sizeof(fbuf.fb_hdr)) { > fprintf(stderr, "%s: bad fusebuf read\n", > __func__); > + free(fbuf.fb_dat); > + return (-1); > + } > + if ((size_t)n != sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) + > + fbuf.fb_len) { > + fprintf(stderr, "%s: bad fusebuf read\n", > + __func__); > + free(fbuf.fb_dat); > return (-1); > } > > - /* check if there is data something present */ > - if (fbuf.fb_len) { > - fbuf.fb_dat = malloc(fbuf.fb_len); > - if (fbuf.fb_dat == NULL) > - return (-1); > - ioexch.fbxch_uuid = fbuf.fb_uuid; > - ioexch.fbxch_len = fbuf.fb_len; > - ioexch.fbxch_data = fbuf.fb_dat; > - > - if (ioctl(fuse->fc->fd, FIOCGETFBDAT, > - &ioexch) == -1) { > - free(fbuf.fb_dat); > - return (-1); > - } > + /* > + * fuse_ops check that they do not write more than > + * fb_io_len when writing to fb_dat > + */ > + if (fbuf.fb_io_len > fb_dat_size) { > + fprintf(stderr, "%s: io exceeds buffer size\n", > + __func__); > + free(fbuf.fb_dat); > + return (-1); > } > > ctx.fuse = fuse; > @@ -235,35 +255,25 @@ fuse_loop(struct fuse *fuse) > ret = ifuse_exec_opcode(fuse, &fbuf); > if (ret) { > ictx = NULL; > + free(fbuf.fb_dat); > return (-1); > } > > - n = write(fuse->fc->fd, &fbuf, sizeof(fbuf)); > - if (fbuf.fb_len) { > - if (fbuf.fb_dat == NULL) { > - fprintf(stderr, "%s: fb_dat is Null\n", > - __func__); > - return (-1); > - } > - ioexch.fbxch_uuid = fbuf.fb_uuid; > - ioexch.fbxch_len = fbuf.fb_len; > - ioexch.fbxch_data = fbuf.fb_dat; > - > - if (ioctl(fuse->fc->fd, FIOCSETFBDAT, &ioexch) == -1) { > - free(fbuf.fb_dat); > - return (-1); > - } > - free(fbuf.fb_dat); > - } > + iov[1].iov_len = fbuf.fb_len; > + n = writev(fuse->fc->fd, iov, 2); > + > ictx = NULL; > > - if (n != FUSEBUFSIZE) { > + if (n == -1 || (size_t)n != sizeof(fbuf.fb_hdr) + > + sizeof(fbuf.FD) + fbuf.fb_len) { > errno = EINVAL; > + free(fbuf.fb_dat); > return (-1); > } > } > } > > + free(fbuf.fb_dat); > return (0); > } > DEF(fuse_loop); > Index: lib/libfuse/fuse_ops.c > =================================================================== > RCS file: /cvs/src/lib/libfuse/fuse_ops.c,v > diff -u -p -r1.37 fuse_ops.c > --- lib/libfuse/fuse_ops.c 2 Sep 2025 17:18:40 -0000 1.37 > +++ lib/libfuse/fuse_ops.c 4 Sep 2025 14:56:25 -0000 > @@ -317,17 +317,9 @@ ifuse_ops_readdir(struct fuse *f, struct > offset = fbuf->fb_io_off; > size = fbuf->fb_io_len; > > - fbuf->fb_dat = calloc(1, size); > - > - if (fbuf->fb_dat == NULL) { > - fbuf->fb_err = -errno; > - return (0); > - } > - > vn = tree_get(&f->vnode_tree, fbuf->fb_ino); > if (vn == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > @@ -344,7 +336,6 @@ ifuse_ops_readdir(struct fuse *f, struct > realname = build_realname(f, vn->ino); > if (realname == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > @@ -362,9 +353,6 @@ ifuse_ops_readdir(struct fuse *f, struct > else if (fd.full && fbuf->fb_len == 0) > fbuf->fb_err = -ENOBUFS; > > - if (fbuf->fb_len == 0) > - free(fbuf->fb_dat); > - > return (0); > } > > @@ -526,7 +514,6 @@ ifuse_ops_lookup(struct fuse *f, struct > fbuf->fb_ino); > if (vn == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > set_vn(f, vn); /*XXX*/ > @@ -537,13 +524,11 @@ ifuse_ops_lookup(struct fuse *f, struct > realname = build_realname(f, vn->ino); > if (realname == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn); > fbuf->fb_ino = vn->ino; > - free(fbuf->fb_dat); > free(realname); > > return (0); > @@ -566,23 +551,15 @@ ifuse_ops_read(struct fuse *f, struct fu > size = fbuf->fb_io_len; > offset = fbuf->fb_io_off; > > - fbuf->fb_dat = malloc(size); > - if (fbuf->fb_dat == NULL) { > - fbuf->fb_err = -errno; > - return (0); > - } > - > vn = tree_get(&f->vnode_tree, fbuf->fb_ino); > if (vn == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > realname = build_realname(f, vn->ino); > if (realname == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > @@ -593,9 +570,6 @@ ifuse_ops_read(struct fuse *f, struct fu > else > fbuf->fb_err = ret; > > - if (fbuf->fb_len == 0) > - free(fbuf->fb_dat); > - > return (0); > } > > @@ -621,20 +595,17 @@ ifuse_ops_write(struct fuse *f, struct f > vn = tree_get(&f->vnode_tree, fbuf->fb_ino); > if (vn == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > realname = build_realname(f, vn->ino); > if (realname == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > ret = f->op.write(realname, (char *)fbuf->fb_dat, size, offset, &ffi); > free(realname); > - free(fbuf->fb_dat); > > if (ret >= 0) > fbuf->fb_io_len = ret; > @@ -657,11 +628,9 @@ ifuse_ops_mkdir(struct fuse *f, struct f > vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); > if (vn == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > - free(fbuf->fb_dat); > realname = build_realname(f, vn->ino); > if (realname == NULL) { > fbuf->fb_err = -errno; > @@ -690,11 +659,9 @@ ifuse_ops_rmdir(struct fuse *f, struct f > vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); > if (vn == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > - free(fbuf->fb_dat); > realname = build_realname(f, vn->ino); > if (realname == NULL) { > fbuf->fb_err = -errno; > @@ -740,11 +707,6 @@ ifuse_ops_readlink(struct fuse *f, struc > if (!ret) { > len = strnlen(name, PATH_MAX); > fbuf->fb_len = len; > - fbuf->fb_dat = malloc(fbuf->fb_len); > - if (fbuf->fb_dat == NULL) { > - fbuf->fb_err = -errno; > - return (0); > - } > memcpy(fbuf->fb_dat, name, len); > } else > fbuf->fb_len = 0; > @@ -762,12 +724,10 @@ ifuse_ops_unlink(struct fuse *f, struct > > vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); > if (vn == NULL) { > - free(fbuf->fb_dat); > fbuf->fb_err = -errno; > return (0); > } > > - free(fbuf->fb_dat); > realname = build_realname(f, vn->ino); > if (realname == NULL) { > fbuf->fb_err = -errno; > @@ -821,11 +781,9 @@ ifuse_ops_link(struct fuse *f, struct fu > vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); > if (vn == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > - free(fbuf->fb_dat); > realname = build_realname(f, oldnodeid); > if (realname == NULL) { > fbuf->fb_err = -errno; > @@ -863,14 +821,12 @@ ifuse_ops_setattr(struct fuse *f, struct > vn = tree_get(&f->vnode_tree, fbuf->fb_ino); > if (vn == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > realname = build_realname(f, vn->ino); > if (realname == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > io = fbtod(fbuf, struct fb_io *); > @@ -923,7 +879,6 @@ ifuse_ops_setattr(struct fuse *f, struct > if (!fbuf->fb_err) > fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn); > free(realname); > - free(fbuf->fb_dat); > > return (0); > } > @@ -940,7 +895,6 @@ ifuse_ops_symlink(unused struct fuse *f, > vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); > if (vn == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > @@ -949,7 +903,6 @@ ifuse_ops_symlink(unused struct fuse *f, > realname = build_realname(f, vn->ino); > if (realname == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > @@ -957,7 +910,6 @@ ifuse_ops_symlink(unused struct fuse *f, > fbuf->fb_err = f->op.symlink((const char *)&fbuf->fb_dat[len + 1], > realname); > fbuf->fb_ino = vn->ino; > - free(fbuf->fb_dat); > free(realname); > > return (0); > @@ -978,7 +930,6 @@ ifuse_ops_rename(struct fuse *f, struct > vnf = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); > if (vnf == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > @@ -986,12 +937,9 @@ ifuse_ops_rename(struct fuse *f, struct > fbuf->fb_io_ino); > if (vnt == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > - free(fbuf->fb_dat); > - > realnamef = build_realname(f, vnf->ino); > if (realnamef == NULL) { > fbuf->fb_err = -errno; > @@ -1060,11 +1008,9 @@ ifuse_ops_mknod(struct fuse *f, struct f > vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino); > if (vn == NULL) { > fbuf->fb_err = -errno; > - free(fbuf->fb_dat); > return (0); > } > > - free(fbuf->fb_dat); > realname = build_realname(f, vn->ino); > if (realname == NULL) { > fbuf->fb_err = -errno; > Index: sys/miscfs/fuse/fuse_device.c > =================================================================== > RCS file: /cvs/src/sys/miscfs/fuse/fuse_device.c,v > diff -u -p -r1.42 fuse_device.c > --- sys/miscfs/fuse/fuse_device.c 2 Sep 2025 17:18:40 -0000 1.42 > +++ sys/miscfs/fuse/fuse_device.c 4 Sep 2025 14:56:44 -0000 > @@ -404,13 +404,8 @@ fuseread(dev_t dev, struct uio *uio, int > struct fuse_d *fd; > struct fusebuf *fbuf; > struct fb_hdr hdr; > - void *tmpaddr; > int error = 0; > > - /* We get the whole fusebuf or nothing */ > - if (uio->uio_resid != FUSEBUFSIZE) > - return (EINVAL); > - > fd = fuse_lookup(minor(dev)); > if (fd == NULL) > return (ENXIO); > @@ -424,29 +419,43 @@ fuseread(dev_t dev, struct uio *uio, int > } > fbuf = SIMPLEQ_FIRST(&fd->fd_fbufs_in); > > + /* We get the whole fusebuf or nothing */ > + if (uio->uio_resid < sizeof(fbuf->fb_hdr) + sizeof(fbuf->FD) + > + fbuf->fb_len) { > + error = EINVAL; > + goto end; > + } > + > /* Do not send kernel pointers */ > memcpy(&hdr.fh_next, &fbuf->fb_next, sizeof(fbuf->fb_next)); > memset(&fbuf->fb_next, 0, sizeof(fbuf->fb_next)); > - tmpaddr = fbuf->fb_dat; > - fbuf->fb_dat = NULL; > - error = uiomove(fbuf, FUSEBUFSIZE, uio); > + > + error = uiomove(&fbuf->fb_hdr, sizeof(fbuf->fb_hdr), uio); > if (error) > goto end; > + error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio); > + if (error) > + goto end; > + if (fbuf->fb_len > 0) { > + error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio); > + if (error) > + goto end; > + } > > #ifdef FUSE_DEBUG > - fuse_dump_buff((char *)fbuf, FUSEBUFSIZE); > + fuse_dump_buff((char *)fbuf, sizeof(struct fusebuf)); > #endif > /* Restore kernel pointers */ > memcpy(&fbuf->fb_next, &hdr.fh_next, sizeof(fbuf->fb_next)); > - fbuf->fb_dat = tmpaddr; > > - /* Remove the fbuf if it does not contains data */ > - if (fbuf->fb_len == 0) { > - SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next); > - stat_fbufs_in--; > - SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next); > - stat_fbufs_wait++; > - } > + free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len); > + fbuf->fb_dat = NULL; > + > + /* Move the fbuf to the wait queue */ > + SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next); > + stat_fbufs_in--; > + SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next); > + stat_fbufs_wait++; > > end: > rw_exit_write(&fd->fd_lock); > @@ -466,13 +475,21 @@ fusewrite(dev_t dev, struct uio *uio, in > if (fd == NULL) > return (ENXIO); > > - /* We get the whole fusebuf or nothing */ > - if (uio->uio_resid != FUSEBUFSIZE) > + /* Check for sanity - must receive more than just the header */ > + if (uio->uio_resid <= sizeof(hdr)) > return (EINVAL); > > if ((error = uiomove(&hdr, sizeof(hdr), uio)) != 0) > return (error); > > + /* Check for sanity */ > + if (hdr.fh_len > FUSEBUFMAXSIZE) > + return (EINVAL); > + > + /* We get the whole fusebuf or nothing */ > + if (uio->uio_resid != sizeof(fbuf->FD) + hdr.fh_len) > + return (EINVAL); > + > /* looking for uuid in fd_fbufs_wait */ > SIMPLEQ_FOREACH(fbuf, &fd->fd_fbufs_wait, fb_next) { > if (fbuf->fb_uuid == hdr.fh_uuid) > @@ -497,12 +514,24 @@ fusewrite(dev_t dev, struct uio *uio, in > } > > /* Get the missing data from the fbuf */ > - error = uiomove(&fbuf->FD, uio->uio_resid, uio); > + error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio); > if (error) > return error; > + > fbuf->fb_dat = NULL; > + if (fbuf->fb_len > 0) { > + fbuf->fb_dat = malloc(fbuf->fb_len, M_FUSEFS, > + M_WAITOK | M_ZERO); > + error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio); > + if (error) { > + free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len); > + fbuf->fb_dat = NULL; > + return (error); > + } > + } > + > #ifdef FUSE_DEBUG > - fuse_dump_buff((char *)fbuf, FUSEBUFSIZE); > + fuse_dump_buff((char *)fbuf, sizeof(struct fusebuf)); > #endif > > switch (fbuf->fb_type) { > @@ -514,19 +543,17 @@ fusewrite(dev_t dev, struct uio *uio, in > break ; > } > end: > - /* Remove the fbuf if it does not contains data */ > - if (fbuf->fb_len == 0) { > - if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait)) > - SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next); > - else > - SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf, > - fb_next); > - stat_fbufs_wait--; > - if (fbuf->fb_type == FBT_INIT) > - fb_delete(fbuf); > - else > - wakeup(fbuf); > - } > + /* Remove the fbuf from the wait queue */ > + if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait)) > + SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next); > + else > + SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf, > + fb_next); > + stat_fbufs_wait--; > + if (fbuf->fb_type == FBT_INIT) > + fb_delete(fbuf); > + else > + wakeup(fbuf); > > return (error); > } > Index: sys/sys/fusebuf.h > =================================================================== > RCS file: /cvs/src/sys/sys/fusebuf.h,v > diff -u -p -r1.15 fusebuf.h > --- sys/sys/fusebuf.h 2 Sep 2025 17:18:40 -0000 1.15 > +++ sys/sys/fusebuf.h 4 Sep 2025 14:56:44 -0000 > @@ -20,9 +20,9 @@ > #define _SYS_FUSEBUF_H_ > > /* > - * Fusebufs are of a single size, 4096 bytes. > + * Maximum size of the read or write buffer sent from the kernel for VFS > + * syscalls: read, readdir, readlink, write. > */ > -#define FUSEBUFSIZE (sizeof(struct fusebuf)) > #define FUSEBUFMAXSIZE (4096*1024) > > /* header at beginning of each fusebuf: */ > -- :wq Claudio