Download raw body.
FUSE - read whole fusebuf v2
On Wed, Sep 03, 2025 at 07:50:36PM +0200, Helg wrote:
> Hi tech@
>
> I've taken Claudio's feedback onboard and updated my diff, and it's all
> the better for it.
>
> - no more #ifdef _Kernel in fusebuf.h
> - libfuse uses readv(2) and writev(2) to read the fusebuf and any io data
> into an allocated buffer
> - the libfuse buffer is 4MB since that's the max size that the kernel
> can send
> - additional checks in the kernel and libfuse
> - MIN(a,b) macro is gone
>
> This is the first of a series of changes required to make the fuse
> kernel interface compatible with other patforms. It updates /dev/fuse0 so
> that fusebufs must be read and written in their entirety. Currently,
> ioctls are needed to to read and write any io data associated with a
> syscall such as read(2), readdir(3), readlinki(2) and write(2).
>
> ok?
I don't really use any fuse fs so I did test this. I reviewed it.
OK claudio@
> Index: lib/libfuse/fuse.c
> ===================================================================
> RCS file: /cvs/src/lib/libfuse/fuse.c,v
> diff -u -p -r1.53 fuse.c
> --- lib/libfuse/fuse.c 2 Sep 2025 17:18:40 -0000 1.53
> +++ lib/libfuse/fuse.c 4 Sep 2025 14:56:25 -0000
> @@ -17,7 +17,7 @@
>
> #include <sys/wait.h>
> #include <sys/types.h>
> -#include <sys/ioctl.h>
> +#include <sys/uio.h>
>
> #include <miscfs/fuse/fusefs.h>
>
> @@ -154,9 +154,10 @@ fuse_loop(struct fuse *fuse)
> {
> struct fusebuf fbuf;
> struct fuse_context ctx;
> - struct fb_ioctl_xch ioexch;
> struct kevent event[5];
> struct kevent ev;
> + struct iovec iov[2];
> + size_t fb_dat_size = FUSEBUFMAXSIZE;
> ssize_t n;
> int ret;
>
> @@ -180,6 +181,14 @@ fuse_loop(struct fuse *fuse)
> EV_SET(&event[4], SIGTERM, EVFILT_SIGNAL, EV_ADD |
> EV_ENABLE, 0, 0, 0);
>
> + /* prepare the read and write data buffer */
> + fbuf.fb_dat = malloc(fb_dat_size);
> + if (fbuf.fb_dat == NULL)
> + return (-1);
> + iov[0].iov_base = &fbuf;
> + iov[0].iov_len = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD);
> + iov[1].iov_base = fbuf.fb_dat;
> +
> while (!fuse->fc->dead) {
> ret = kevent(fuse->fc->kq, &event[0], 5, &ev, 1, NULL);
> if (ret == -1) {
> @@ -201,27 +210,38 @@ fuse_loop(struct fuse *fuse)
> strsignal(signum));
> }
> } else if (ret > 0) {
> - n = read(fuse->fc->fd, &fbuf, sizeof(fbuf));
> - if (n != sizeof(fbuf)) {
> + iov[1].iov_len = fb_dat_size;
> + n = readv(fuse->fc->fd, iov, 2);
> + if (n == -1) {
> + perror("fuse_loop");
> + fprintf(stderr, "%s: bad fusebuf read: %s\n",
> + __func__, strerror(errno));
> + free(fbuf.fb_dat);
> + return (-1);
> + }
> + if (n < (ssize_t)sizeof(fbuf.fb_hdr)) {
> fprintf(stderr, "%s: bad fusebuf read\n",
> __func__);
> + free(fbuf.fb_dat);
> + return (-1);
> + }
> + if ((size_t)n != sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) +
> + fbuf.fb_len) {
> + fprintf(stderr, "%s: bad fusebuf read\n",
> + __func__);
> + free(fbuf.fb_dat);
> return (-1);
> }
>
> - /* check if there is data something present */
> - if (fbuf.fb_len) {
> - fbuf.fb_dat = malloc(fbuf.fb_len);
> - if (fbuf.fb_dat == NULL)
> - return (-1);
> - ioexch.fbxch_uuid = fbuf.fb_uuid;
> - ioexch.fbxch_len = fbuf.fb_len;
> - ioexch.fbxch_data = fbuf.fb_dat;
> -
> - if (ioctl(fuse->fc->fd, FIOCGETFBDAT,
> - &ioexch) == -1) {
> - free(fbuf.fb_dat);
> - return (-1);
> - }
> + /*
> + * fuse_ops check that they do not write more than
> + * fb_io_len when writing to fb_dat
> + */
> + if (fbuf.fb_io_len > fb_dat_size) {
> + fprintf(stderr, "%s: io exceeds buffer size\n",
> + __func__);
> + free(fbuf.fb_dat);
> + return (-1);
> }
>
> ctx.fuse = fuse;
> @@ -235,35 +255,25 @@ fuse_loop(struct fuse *fuse)
> ret = ifuse_exec_opcode(fuse, &fbuf);
> if (ret) {
> ictx = NULL;
> + free(fbuf.fb_dat);
> return (-1);
> }
>
> - n = write(fuse->fc->fd, &fbuf, sizeof(fbuf));
> - if (fbuf.fb_len) {
> - if (fbuf.fb_dat == NULL) {
> - fprintf(stderr, "%s: fb_dat is Null\n",
> - __func__);
> - return (-1);
> - }
> - ioexch.fbxch_uuid = fbuf.fb_uuid;
> - ioexch.fbxch_len = fbuf.fb_len;
> - ioexch.fbxch_data = fbuf.fb_dat;
> -
> - if (ioctl(fuse->fc->fd, FIOCSETFBDAT, &ioexch) == -1) {
> - free(fbuf.fb_dat);
> - return (-1);
> - }
> - free(fbuf.fb_dat);
> - }
> + iov[1].iov_len = fbuf.fb_len;
> + n = writev(fuse->fc->fd, iov, 2);
> +
> ictx = NULL;
>
> - if (n != FUSEBUFSIZE) {
> + if (n == -1 || (size_t)n != sizeof(fbuf.fb_hdr) +
> + sizeof(fbuf.FD) + fbuf.fb_len) {
> errno = EINVAL;
> + free(fbuf.fb_dat);
> return (-1);
> }
> }
> }
>
> + free(fbuf.fb_dat);
> return (0);
> }
> DEF(fuse_loop);
> Index: lib/libfuse/fuse_ops.c
> ===================================================================
> RCS file: /cvs/src/lib/libfuse/fuse_ops.c,v
> diff -u -p -r1.37 fuse_ops.c
> --- lib/libfuse/fuse_ops.c 2 Sep 2025 17:18:40 -0000 1.37
> +++ lib/libfuse/fuse_ops.c 4 Sep 2025 14:56:25 -0000
> @@ -317,17 +317,9 @@ ifuse_ops_readdir(struct fuse *f, struct
> offset = fbuf->fb_io_off;
> size = fbuf->fb_io_len;
>
> - fbuf->fb_dat = calloc(1, size);
> -
> - if (fbuf->fb_dat == NULL) {
> - fbuf->fb_err = -errno;
> - return (0);
> - }
> -
> vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -344,7 +336,6 @@ ifuse_ops_readdir(struct fuse *f, struct
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -362,9 +353,6 @@ ifuse_ops_readdir(struct fuse *f, struct
> else if (fd.full && fbuf->fb_len == 0)
> fbuf->fb_err = -ENOBUFS;
>
> - if (fbuf->fb_len == 0)
> - free(fbuf->fb_dat);
> -
> return (0);
> }
>
> @@ -526,7 +514,6 @@ ifuse_ops_lookup(struct fuse *f, struct
> fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
> set_vn(f, vn); /*XXX*/
> @@ -537,13 +524,11 @@ ifuse_ops_lookup(struct fuse *f, struct
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
> fbuf->fb_ino = vn->ino;
> - free(fbuf->fb_dat);
> free(realname);
>
> return (0);
> @@ -566,23 +551,15 @@ ifuse_ops_read(struct fuse *f, struct fu
> size = fbuf->fb_io_len;
> offset = fbuf->fb_io_off;
>
> - fbuf->fb_dat = malloc(size);
> - if (fbuf->fb_dat == NULL) {
> - fbuf->fb_err = -errno;
> - return (0);
> - }
> -
> vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -593,9 +570,6 @@ ifuse_ops_read(struct fuse *f, struct fu
> else
> fbuf->fb_err = ret;
>
> - if (fbuf->fb_len == 0)
> - free(fbuf->fb_dat);
> -
> return (0);
> }
>
> @@ -621,20 +595,17 @@ ifuse_ops_write(struct fuse *f, struct f
> vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> ret = f->op.write(realname, (char *)fbuf->fb_dat, size, offset, &ffi);
> free(realname);
> - free(fbuf->fb_dat);
>
> if (ret >= 0)
> fbuf->fb_io_len = ret;
> @@ -657,11 +628,9 @@ ifuse_ops_mkdir(struct fuse *f, struct f
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> - free(fbuf->fb_dat);
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> @@ -690,11 +659,9 @@ ifuse_ops_rmdir(struct fuse *f, struct f
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> - free(fbuf->fb_dat);
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> @@ -740,11 +707,6 @@ ifuse_ops_readlink(struct fuse *f, struc
> if (!ret) {
> len = strnlen(name, PATH_MAX);
> fbuf->fb_len = len;
> - fbuf->fb_dat = malloc(fbuf->fb_len);
> - if (fbuf->fb_dat == NULL) {
> - fbuf->fb_err = -errno;
> - return (0);
> - }
> memcpy(fbuf->fb_dat, name, len);
> } else
> fbuf->fb_len = 0;
> @@ -762,12 +724,10 @@ ifuse_ops_unlink(struct fuse *f, struct
>
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> - free(fbuf->fb_dat);
> fbuf->fb_err = -errno;
> return (0);
> }
>
> - free(fbuf->fb_dat);
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> @@ -821,11 +781,9 @@ ifuse_ops_link(struct fuse *f, struct fu
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> - free(fbuf->fb_dat);
> realname = build_realname(f, oldnodeid);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> @@ -863,14 +821,12 @@ ifuse_ops_setattr(struct fuse *f, struct
> vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
> io = fbtod(fbuf, struct fb_io *);
> @@ -923,7 +879,6 @@ ifuse_ops_setattr(struct fuse *f, struct
> if (!fbuf->fb_err)
> fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
> free(realname);
> - free(fbuf->fb_dat);
>
> return (0);
> }
> @@ -940,7 +895,6 @@ ifuse_ops_symlink(unused struct fuse *f,
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -949,7 +903,6 @@ ifuse_ops_symlink(unused struct fuse *f,
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -957,7 +910,6 @@ ifuse_ops_symlink(unused struct fuse *f,
> fbuf->fb_err = f->op.symlink((const char *)&fbuf->fb_dat[len + 1],
> realname);
> fbuf->fb_ino = vn->ino;
> - free(fbuf->fb_dat);
> free(realname);
>
> return (0);
> @@ -978,7 +930,6 @@ ifuse_ops_rename(struct fuse *f, struct
> vnf = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vnf == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -986,12 +937,9 @@ ifuse_ops_rename(struct fuse *f, struct
> fbuf->fb_io_ino);
> if (vnt == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> - free(fbuf->fb_dat);
> -
> realnamef = build_realname(f, vnf->ino);
> if (realnamef == NULL) {
> fbuf->fb_err = -errno;
> @@ -1060,11 +1008,9 @@ ifuse_ops_mknod(struct fuse *f, struct f
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> - free(fbuf->fb_dat);
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> Index: sys/miscfs/fuse/fuse_device.c
> ===================================================================
> RCS file: /cvs/src/sys/miscfs/fuse/fuse_device.c,v
> diff -u -p -r1.42 fuse_device.c
> --- sys/miscfs/fuse/fuse_device.c 2 Sep 2025 17:18:40 -0000 1.42
> +++ sys/miscfs/fuse/fuse_device.c 4 Sep 2025 14:56:44 -0000
> @@ -404,13 +404,8 @@ fuseread(dev_t dev, struct uio *uio, int
> struct fuse_d *fd;
> struct fusebuf *fbuf;
> struct fb_hdr hdr;
> - void *tmpaddr;
> int error = 0;
>
> - /* We get the whole fusebuf or nothing */
> - if (uio->uio_resid != FUSEBUFSIZE)
> - return (EINVAL);
> -
> fd = fuse_lookup(minor(dev));
> if (fd == NULL)
> return (ENXIO);
> @@ -424,29 +419,43 @@ fuseread(dev_t dev, struct uio *uio, int
> }
> fbuf = SIMPLEQ_FIRST(&fd->fd_fbufs_in);
>
> + /* We get the whole fusebuf or nothing */
> + if (uio->uio_resid < sizeof(fbuf->fb_hdr) + sizeof(fbuf->FD) +
> + fbuf->fb_len) {
> + error = EINVAL;
> + goto end;
> + }
> +
> /* Do not send kernel pointers */
> memcpy(&hdr.fh_next, &fbuf->fb_next, sizeof(fbuf->fb_next));
> memset(&fbuf->fb_next, 0, sizeof(fbuf->fb_next));
> - tmpaddr = fbuf->fb_dat;
> - fbuf->fb_dat = NULL;
> - error = uiomove(fbuf, FUSEBUFSIZE, uio);
> +
> + error = uiomove(&fbuf->fb_hdr, sizeof(fbuf->fb_hdr), uio);
> if (error)
> goto end;
> + error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
> + if (error)
> + goto end;
> + if (fbuf->fb_len > 0) {
> + error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
> + if (error)
> + goto end;
> + }
>
> #ifdef FUSE_DEBUG
> - fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
> + fuse_dump_buff((char *)fbuf, sizeof(struct fusebuf));
> #endif
> /* Restore kernel pointers */
> memcpy(&fbuf->fb_next, &hdr.fh_next, sizeof(fbuf->fb_next));
> - fbuf->fb_dat = tmpaddr;
>
> - /* Remove the fbuf if it does not contains data */
> - if (fbuf->fb_len == 0) {
> - SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
> - stat_fbufs_in--;
> - SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
> - stat_fbufs_wait++;
> - }
> + free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
> + fbuf->fb_dat = NULL;
> +
> + /* Move the fbuf to the wait queue */
> + SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
> + stat_fbufs_in--;
> + SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
> + stat_fbufs_wait++;
>
> end:
> rw_exit_write(&fd->fd_lock);
> @@ -466,13 +475,21 @@ fusewrite(dev_t dev, struct uio *uio, in
> if (fd == NULL)
> return (ENXIO);
>
> - /* We get the whole fusebuf or nothing */
> - if (uio->uio_resid != FUSEBUFSIZE)
> + /* Check for sanity - must receive more than just the header */
> + if (uio->uio_resid <= sizeof(hdr))
> return (EINVAL);
>
> if ((error = uiomove(&hdr, sizeof(hdr), uio)) != 0)
> return (error);
>
> + /* Check for sanity */
> + if (hdr.fh_len > FUSEBUFMAXSIZE)
> + return (EINVAL);
> +
> + /* We get the whole fusebuf or nothing */
> + if (uio->uio_resid != sizeof(fbuf->FD) + hdr.fh_len)
> + return (EINVAL);
> +
> /* looking for uuid in fd_fbufs_wait */
> SIMPLEQ_FOREACH(fbuf, &fd->fd_fbufs_wait, fb_next) {
> if (fbuf->fb_uuid == hdr.fh_uuid)
> @@ -497,12 +514,24 @@ fusewrite(dev_t dev, struct uio *uio, in
> }
>
> /* Get the missing data from the fbuf */
> - error = uiomove(&fbuf->FD, uio->uio_resid, uio);
> + error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
> if (error)
> return error;
> +
> fbuf->fb_dat = NULL;
> + if (fbuf->fb_len > 0) {
> + fbuf->fb_dat = malloc(fbuf->fb_len, M_FUSEFS,
> + M_WAITOK | M_ZERO);
> + error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
> + if (error) {
> + free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
> + fbuf->fb_dat = NULL;
> + return (error);
> + }
> + }
> +
> #ifdef FUSE_DEBUG
> - fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
> + fuse_dump_buff((char *)fbuf, sizeof(struct fusebuf));
> #endif
>
> switch (fbuf->fb_type) {
> @@ -514,19 +543,17 @@ fusewrite(dev_t dev, struct uio *uio, in
> break ;
> }
> end:
> - /* Remove the fbuf if it does not contains data */
> - if (fbuf->fb_len == 0) {
> - if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
> - SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
> - else
> - SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
> - fb_next);
> - stat_fbufs_wait--;
> - if (fbuf->fb_type == FBT_INIT)
> - fb_delete(fbuf);
> - else
> - wakeup(fbuf);
> - }
> + /* Remove the fbuf from the wait queue */
> + if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
> + SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
> + else
> + SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
> + fb_next);
> + stat_fbufs_wait--;
> + if (fbuf->fb_type == FBT_INIT)
> + fb_delete(fbuf);
> + else
> + wakeup(fbuf);
>
> return (error);
> }
> Index: sys/sys/fusebuf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/fusebuf.h,v
> diff -u -p -r1.15 fusebuf.h
> --- sys/sys/fusebuf.h 2 Sep 2025 17:18:40 -0000 1.15
> +++ sys/sys/fusebuf.h 4 Sep 2025 14:56:44 -0000
> @@ -20,9 +20,9 @@
> #define _SYS_FUSEBUF_H_
>
> /*
> - * Fusebufs are of a single size, 4096 bytes.
> + * Maximum size of the read or write buffer sent from the kernel for VFS
> + * syscalls: read, readdir, readlink, write.
> */
> -#define FUSEBUFSIZE (sizeof(struct fusebuf))
> #define FUSEBUFMAXSIZE (4096*1024)
>
> /* header at beginning of each fusebuf: */
>
--
:wq Claudio
FUSE - read whole fusebuf v2