Download raw body.
FUSE - read whole fusebuf
On Sun, Aug 31, 2025 at 04:25:20PM +0200, Helg wrote:
> Hi tech@
>
> I'd like to make the OpenBSD fuse implementation more compatible with other
> patforms so that we can support more fuse file systems in ports. This is
> going to take some fundamental changes in the kernel interface.
>
> Here's why:
>
> FUSE supports 3 different interfaces.
>
> 1. Reading and writing directly to the /dev/fuse0 device
> 2. A low-level API via libfuse
> 3. A high-level API via libfuse
>
> OpenBSD is currently only compatible with 3. To get from where we are
> today to supporting all 3 interfaces, we need to do the following.
>
> a. Update the kernel fuse device and libfuse to read and write entire
> fusebufs. Currently, ioctls are needed to to read and write the data
> for a fusebuf.
> b. Update the kernel fuse device and libfuse to block on read until a
> new fusebuf is available. Currently, the kernel sends an event when a
> new fusebuf is available.
> c. Change the fusebuf binary format to match other platforms.
> d. Implement the low-level fuse API in libfuse.
>
> This diff is the first part of a.
>
> What do you think? Is this OK?
>
> helg
>
Let me start with the fusebuf.h change:
> Index: sys/sys/fusebuf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/fusebuf.h,v
> diff -u -p -r1.13 fusebuf.h
> --- sys/sys/fusebuf.h 19 Jun 2018 11:27:54 -0000 1.13
> +++ sys/sys/fusebuf.h 31 Aug 2025 14:26:04 -0000
> @@ -69,7 +69,11 @@ struct fusebuf {
> struct stat FD_attr; /* for attr vnops */
> struct fb_io FD_io; /* for file io vnops */
> } FD;
> +#ifdef _KERNEL
> uint8_t *fb_dat; /* data's */
> +#else
> + uint8_t fb_dat[FUSEBUFMAXSIZE]; /* userland does i/o with fixed size buffer */
> +#endif
> };
>
> #define fb_next fb_hdr.fh_next
So struct fusebuf will be a 4MB+ struct. Which only needs to use a small
part of that memory most of the time.
Also FUSEBUFSIZE becomes a big trap. Since the value for the _KERNEL is
wrong. Not sure if it should be used in userland (instead of just using
sizeof().
Wouldn't it make more sense to just use writev() and readv() in userland
and allocate the buffer like it is done now?
I understand that for the read call (data from the kernel) you need to
pass in a FUSEBUFMAXSIZE data buffer since you don't know how much data
you may get.
Now all of this depends on your further steps. I only glanced at the
FreeBSD and Linux code and realized that their low level API is a fair bit
different (and especially there is no struct fusebuf).
... now the kernel bits:
> Index: sys/miscfs/fuse/fuse_device.c
> ===================================================================
> RCS file: /cvs/src/sys/miscfs/fuse/fuse_device.c,v
> diff -u -p -r1.40 fuse_device.c
> --- sys/miscfs/fuse/fuse_device.c 16 Dec 2023 22:17:08 -0000 1.40
> +++ sys/miscfs/fuse/fuse_device.c 31 Aug 2025 14:26:04 -0000
> @@ -404,13 +404,8 @@ fuseread(dev_t dev, struct uio *uio, int
> struct fuse_d *fd;
> struct fusebuf *fbuf;
> struct fb_hdr hdr;
> - void *tmpaddr;
> int error = 0;
>
> - /* We get the whole fusebuf or nothing */
> - if (uio->uio_resid != FUSEBUFSIZE)
> - return (EINVAL);
> -
> fd = fuse_lookup(minor(dev));
> if (fd == NULL)
> return (ENXIO);
> @@ -424,29 +419,42 @@ fuseread(dev_t dev, struct uio *uio, int
> }
> fbuf = SIMPLEQ_FIRST(&fd->fd_fbufs_in);
>
> + /* We get the whole fusebuf or nothing */
> + if (uio->uio_resid < FUSEBUFSIZE + fbuf->fb_len) {
This check is not quite right. FUSEBUFSIZE is a bit too large.
Either FUSEBUFSIZE needs to change or this should use:
sizeof(fbuf->fb_hdr) + sizeof(fbuf->FD) + fbuf->fb_len
> + error = EINVAL;
> + goto end;
> + }
> +
> /* Do not send kernel pointers */
> memcpy(&hdr.fh_next, &fbuf->fb_next, sizeof(fbuf->fb_next));
> memset(&fbuf->fb_next, 0, sizeof(fbuf->fb_next));
> - tmpaddr = fbuf->fb_dat;
> - fbuf->fb_dat = NULL;
> - error = uiomove(fbuf, FUSEBUFSIZE, uio);
> +
> + error = uiomove(&fbuf->fb_hdr, sizeof(fbuf->fb_hdr), uio);
> + if (error)
> + goto end;
> + error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
> if (error)
> goto end;
> + if (fbuf->fb_len > 0) {
> + error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
> + if (error)
> + goto end;
> + }
>
> #ifdef FUSE_DEBUG
> fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
> #endif
> /* Restore kernel pointers */
> memcpy(&fbuf->fb_next, &hdr.fh_next, sizeof(fbuf->fb_next));
> - fbuf->fb_dat = tmpaddr;
>
> - /* Remove the fbuf if it does not contains data */
> - if (fbuf->fb_len == 0) {
> - SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
> - stat_fbufs_in--;
> - SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
> - stat_fbufs_wait++;
> - }
> + free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
> + fbuf->fb_dat = NULL;
> +
> + /* Move the fbuf to the wait queue */
> + SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
> + stat_fbufs_in--;
> + SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
> + stat_fbufs_wait++;
>
> end:
> rw_exit_write(&fd->fd_lock);
> @@ -466,13 +474,17 @@ fusewrite(dev_t dev, struct uio *uio, in
> if (fd == NULL)
> return (ENXIO);
>
> - /* We get the whole fusebuf or nothing */
> - if (uio->uio_resid != FUSEBUFSIZE)
> - return (EINVAL);
Don't you need to check here first that the uio->uio_resid > sizeof(hdr)
Else you may short read below and then the hdr.fh_len check could access
uninitalized memory.
> -
> if ((error = uiomove(&hdr, sizeof(hdr), uio)) != 0)
> return (error);
>
> + /* Check for sanity */
> + if (hdr.fh_len > FUSEBUFMAXSIZE)
> + return (EINVAL);
> +
> + /* We get the whole fusebuf or nothing */
> + if (uio->uio_resid != sizeof(fbuf->FD) + hdr.fh_len)
> + return (EINVAL);
> +
> /* looking for uuid in fd_fbufs_wait */
> SIMPLEQ_FOREACH(fbuf, &fd->fd_fbufs_wait, fb_next) {
> if (fbuf->fb_uuid == hdr.fh_uuid)
> @@ -497,10 +509,19 @@ fusewrite(dev_t dev, struct uio *uio, in
> }
>
> /* Get the missing data from the fbuf */
> - error = uiomove(&fbuf->FD, uio->uio_resid, uio);
> + error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
> if (error)
> return error;
> - fbuf->fb_dat = NULL;
> +
> + fbuf->fb_dat = malloc(fbuf->fb_len, M_FUSEFS,
> + M_WAITOK | M_ZERO);
Are you sure fb_len can't be 0 here? Because I can't really see how that
is checked. If fh_err is != 0 then I think fh_len has to be 0.
In that case this would do a malloc with 0 size which is not ideal.
> + error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
> + if (error) {
> + free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
> + fbuf->fb_dat = NULL;
> + return (error);
> + }
> +
> #ifdef FUSE_DEBUG
> fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
> #endif
> @@ -514,19 +535,17 @@ fusewrite(dev_t dev, struct uio *uio, in
> break ;
> }
> end:
> - /* Remove the fbuf if it does not contains data */
> - if (fbuf->fb_len == 0) {
> - if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
> - SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
> - else
> - SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
> - fb_next);
> - stat_fbufs_wait--;
> - if (fbuf->fb_type == FBT_INIT)
> - fb_delete(fbuf);
> - else
> - wakeup(fbuf);
> - }
> + /* Remove the fbuf from the wait queue */
> + if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
> + SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
> + else
> + SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
> + fb_next);
> + stat_fbufs_wait--;
> + if (fbuf->fb_type == FBT_INIT)
> + fb_delete(fbuf);
> + else
> + wakeup(fbuf);
>
> return (error);
> }
... and finally libfuse
> Index: lib/libfuse/fuse.c
> ===================================================================
> RCS file: /cvs/src/lib/libfuse/fuse.c,v
> diff -u -p -r1.51 fuse.c
> --- lib/libfuse/fuse.c 28 Jun 2019 13:32:42 -0000 1.51
> +++ lib/libfuse/fuse.c 31 Aug 2025 14:26:02 -0000
> @@ -17,7 +17,6 @@
>
> #include <sys/wait.h>
> #include <sys/types.h>
> -#include <sys/ioctl.h>
>
> #include <miscfs/fuse/fusefs.h>
>
> @@ -154,9 +153,9 @@ fuse_loop(struct fuse *fuse)
> {
> struct fusebuf fbuf;
> struct fuse_context ctx;
> - struct fb_ioctl_xch ioexch;
> struct kevent event[5];
> struct kevent ev;
> + ssize_t fbuf_size;
> ssize_t n;
> int ret;
>
> @@ -201,29 +200,15 @@ fuse_loop(struct fuse *fuse)
> strsignal(signum));
> }
> } else if (ret > 0) {
> - n = read(fuse->fc->fd, &fbuf, sizeof(fbuf));
> - if (n != sizeof(fbuf)) {
> + n = read(fuse->fc->fd, &fbuf, FUSEBUFSIZE);
> + fbuf_size = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) +
> + fbuf.fb_len;
You should not access fbuf.fb_len before checking that n is >
sizeof(fbuf.fb_hdr).
I think there should be a check for n == -1 and n < sizeof(fbuf.fd_hdr)
before fbuf_size can be calculated.
> + if (n != fbuf_size) {
> fprintf(stderr, "%s: bad fusebuf read\n",
> __func__);
> return (-1);
> }
>
> - /* check if there is data something present */
> - if (fbuf.fb_len) {
> - fbuf.fb_dat = malloc(fbuf.fb_len);
> - if (fbuf.fb_dat == NULL)
> - return (-1);
> - ioexch.fbxch_uuid = fbuf.fb_uuid;
> - ioexch.fbxch_len = fbuf.fb_len;
> - ioexch.fbxch_data = fbuf.fb_dat;
> -
> - if (ioctl(fuse->fc->fd, FIOCGETFBDAT,
> - &ioexch) == -1) {
> - free(fbuf.fb_dat);
> - return (-1);
> - }
> - }
> -
> ctx.fuse = fuse;
> ctx.uid = fbuf.fb_uid;
> ctx.gid = fbuf.fb_gid;
> @@ -238,26 +223,13 @@ fuse_loop(struct fuse *fuse)
> return (-1);
> }
>
> - n = write(fuse->fc->fd, &fbuf, sizeof(fbuf));
> - if (fbuf.fb_len) {
> - if (fbuf.fb_dat == NULL) {
> - fprintf(stderr, "%s: fb_dat is Null\n",
> - __func__);
> - return (-1);
> - }
> - ioexch.fbxch_uuid = fbuf.fb_uuid;
> - ioexch.fbxch_len = fbuf.fb_len;
> - ioexch.fbxch_data = fbuf.fb_dat;
> -
> - if (ioctl(fuse->fc->fd, FIOCSETFBDAT, &ioexch) == -1) {
> - free(fbuf.fb_dat);
> - return (-1);
> - }
> - free(fbuf.fb_dat);
> - }
> + fbuf_size = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) +
> + fbuf.fb_len;
> + n = write(fuse->fc->fd, &fbuf, fbuf_size);
> +
> ictx = NULL;
>
> - if (n != FUSEBUFSIZE) {
> + if (n != fbuf_size) {
> errno = EINVAL;
> return (-1);
> }
> Index: lib/libfuse/fuse_ops.c
> ===================================================================
> RCS file: /cvs/src/lib/libfuse/fuse_ops.c,v
> diff -u -p -r1.35 fuse_ops.c
> --- lib/libfuse/fuse_ops.c 16 Jul 2018 13:10:53 -0000 1.35
> +++ lib/libfuse/fuse_ops.c 31 Aug 2025 14:26:02 -0000
> @@ -22,6 +22,8 @@
> #include "fuse_private.h"
> #include "debug.h"
>
> +#define MIN(a,b) (((a)<(b))?(a):(b))
> +
I dislike these macros. They are not quite safe when one of the passed
expression has side-effects since values are evaluated twice.
This is currently only used on one place with a const expression.
So right now this is fine. Still I prefer to have less of them in the
tree.
> #define CHECK_OPT(opname) DPRINTF("Opcode: %s\t", #opname); \
> DPRINTF("Inode: %llu\t", \
> (unsigned long long)fbuf->fb_ino); \
> @@ -317,17 +319,9 @@ ifuse_ops_readdir(struct fuse *f, struct
> offset = fbuf->fb_io_off;
> size = fbuf->fb_io_len;
>
> - fbuf->fb_dat = calloc(1, size);
> -
> - if (fbuf->fb_dat == NULL) {
> - fbuf->fb_err = -errno;
> - return (0);
> - }
> -
> vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -344,7 +338,6 @@ ifuse_ops_readdir(struct fuse *f, struct
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -362,9 +355,6 @@ ifuse_ops_readdir(struct fuse *f, struct
> else if (fd.full && fbuf->fb_len == 0)
> fbuf->fb_err = -ENOBUFS;
>
> - if (fbuf->fb_len == 0)
> - free(fbuf->fb_dat);
> -
> return (0);
> }
>
> @@ -526,7 +516,6 @@ ifuse_ops_lookup(struct fuse *f, struct
> fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
> set_vn(f, vn); /*XXX*/
> @@ -537,13 +526,11 @@ ifuse_ops_lookup(struct fuse *f, struct
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
> fbuf->fb_ino = vn->ino;
> - free(fbuf->fb_dat);
> free(realname);
>
> return (0);
> @@ -566,23 +553,15 @@ ifuse_ops_read(struct fuse *f, struct fu
> size = fbuf->fb_io_len;
> offset = fbuf->fb_io_off;
>
> - fbuf->fb_dat = malloc(size);
> - if (fbuf->fb_dat == NULL) {
> - fbuf->fb_err = -errno;
> - return (0);
> - }
> -
> vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -593,9 +572,6 @@ ifuse_ops_read(struct fuse *f, struct fu
> else
> fbuf->fb_err = ret;
>
> - if (fbuf->fb_len == 0)
> - free(fbuf->fb_dat);
> -
> return (0);
> }
>
> @@ -621,20 +597,17 @@ ifuse_ops_write(struct fuse *f, struct f
> vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> ret = f->op.write(realname, (char *)fbuf->fb_dat, size, offset, &ffi);
> free(realname);
> - free(fbuf->fb_dat);
>
> if (ret >= 0)
> fbuf->fb_io_len = ret;
> @@ -657,11 +630,9 @@ ifuse_ops_mkdir(struct fuse *f, struct f
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> - free(fbuf->fb_dat);
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> @@ -690,11 +661,9 @@ ifuse_ops_rmdir(struct fuse *f, struct f
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> - free(fbuf->fb_dat);
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> @@ -738,13 +707,8 @@ ifuse_ops_readlink(struct fuse *f, struc
>
> fbuf->fb_err = ret;
> if (!ret) {
> - len = strnlen(name, PATH_MAX);
> + len = strnlen(name, MIN(PATH_MAX, FUSEBUFMAXSIZE));
> fbuf->fb_len = len;
> - fbuf->fb_dat = malloc(fbuf->fb_len);
> - if (fbuf->fb_dat == NULL) {
> - fbuf->fb_err = -errno;
> - return (0);
> - }
> memcpy(fbuf->fb_dat, name, len);
> } else
> fbuf->fb_len = 0;
> @@ -762,12 +726,10 @@ ifuse_ops_unlink(struct fuse *f, struct
>
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> - free(fbuf->fb_dat);
> fbuf->fb_err = -errno;
> return (0);
> }
>
> - free(fbuf->fb_dat);
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> @@ -821,11 +783,9 @@ ifuse_ops_link(struct fuse *f, struct fu
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> - free(fbuf->fb_dat);
> realname = build_realname(f, oldnodeid);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> @@ -863,14 +823,12 @@ ifuse_ops_setattr(struct fuse *f, struct
> vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
> io = fbtod(fbuf, struct fb_io *);
> @@ -923,7 +881,6 @@ ifuse_ops_setattr(struct fuse *f, struct
> if (!fbuf->fb_err)
> fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
> free(realname);
> - free(fbuf->fb_dat);
>
> return (0);
> }
> @@ -940,7 +897,6 @@ ifuse_ops_symlink(unused struct fuse *f,
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -949,7 +905,6 @@ ifuse_ops_symlink(unused struct fuse *f,
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -957,7 +912,6 @@ ifuse_ops_symlink(unused struct fuse *f,
> fbuf->fb_err = f->op.symlink((const char *)&fbuf->fb_dat[len + 1],
> realname);
> fbuf->fb_ino = vn->ino;
> - free(fbuf->fb_dat);
> free(realname);
>
> return (0);
> @@ -978,7 +932,6 @@ ifuse_ops_rename(struct fuse *f, struct
> vnf = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vnf == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> @@ -986,12 +939,9 @@ ifuse_ops_rename(struct fuse *f, struct
> fbuf->fb_io_ino);
> if (vnt == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> - free(fbuf->fb_dat);
> -
> realnamef = build_realname(f, vnf->ino);
> if (realnamef == NULL) {
> fbuf->fb_err = -errno;
> @@ -1060,11 +1010,9 @@ ifuse_ops_mknod(struct fuse *f, struct f
> vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
> if (vn == NULL) {
> fbuf->fb_err = -errno;
> - free(fbuf->fb_dat);
> return (0);
> }
>
> - free(fbuf->fb_dat);
> realname = build_realname(f, vn->ino);
> if (realname == NULL) {
> fbuf->fb_err = -errno;
--
:wq Claudio
FUSE - read whole fusebuf