Index | Thread | Search

From:
Martin Pieuchot <mpi@grenadille.net>
Subject:
Re: FUSE - read whole fusebuf
To:
Helg <helg-openbsd@gmx.de>
Cc:
tech@openbsd.org
Date:
Tue, 2 Sep 2025 10:12:39 +0200

Download raw body.

Thread
On 31/08/25(Sun) 16:25, Helg wrote:
> Hi tech@
> 
> I'd like to make the OpenBSD fuse implementation more compatible with other
> patforms so that we can support more fuse file systems in ports. This is
> going to take some fundamental changes in the kernel interface.

Makes sense.  I appreciate the work you're doing especially with looking
at filesystem regression tests.  This is very valuable to me.

> Here's why:
> 
> FUSE supports 3 different interfaces.
> 
> 1. Reading and writing directly to the /dev/fuse0 device
> 2. A low-level API via libfuse
> 3. A high-level API via libfuse
> 
> OpenBSD is currently only compatible with 3. To get from where we are
> today to supporting all 3 interfaces, we need to do the following.
> 
> a. Update the kernel fuse device and libfuse to read and write entire
>    fusebufs. Currently, ioctls are needed to to read and write the data
>    for a fusebuf.

If all data need to be written & read to and from the kernel by design,
then go for it.  I hopped we could reduce the amount of copy...  Anyway
compatibility and stability are more important ATM.

> b. Update the kernel fuse device and libfuse to block on read until a
>    new fusebuf is available. Currently, the kernel sends an event when a
>    new fusebuf is available.
> c. Change the fusebuf binary format to match other platforms.
> d. Implement the low-level fuse API in libfuse.
> 
> This diff is the first part of a.
> 
> What do you think? Is this OK?

ok mpi@

> Index: lib/libfuse/fuse.c
> ===================================================================
> RCS file: /cvs/src/lib/libfuse/fuse.c,v
> diff -u -p -r1.51 fuse.c
> --- lib/libfuse/fuse.c	28 Jun 2019 13:32:42 -0000	1.51
> +++ lib/libfuse/fuse.c	31 Aug 2025 14:26:02 -0000
> @@ -17,7 +17,6 @@
>  
>  #include <sys/wait.h>
>  #include <sys/types.h>
> -#include <sys/ioctl.h>
>  
>  #include <miscfs/fuse/fusefs.h>
>  
> @@ -154,9 +153,9 @@ fuse_loop(struct fuse *fuse)
>  {
>  	struct fusebuf fbuf;
>  	struct fuse_context ctx;
> -	struct fb_ioctl_xch ioexch;
>  	struct kevent event[5];
>  	struct kevent ev;
> +	ssize_t fbuf_size;
>  	ssize_t n;
>  	int ret;
>  
> @@ -201,29 +200,15 @@ fuse_loop(struct fuse *fuse)
>  					strsignal(signum));
>  			}
>  		} else if (ret > 0) {
> -			n = read(fuse->fc->fd, &fbuf, sizeof(fbuf));
> -			if (n != sizeof(fbuf)) {
> +			n = read(fuse->fc->fd, &fbuf, FUSEBUFSIZE);
> +			fbuf_size = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) +
> +			    fbuf.fb_len;
> +			if (n != fbuf_size) {
>  				fprintf(stderr, "%s: bad fusebuf read\n",
>  				    __func__);
>  				return (-1);
>  			}
>  
> -			/* check if there is data something present */
> -			if (fbuf.fb_len) {
> -				fbuf.fb_dat = malloc(fbuf.fb_len);
> -				if (fbuf.fb_dat == NULL)
> -					return (-1);
> -				ioexch.fbxch_uuid = fbuf.fb_uuid;
> -				ioexch.fbxch_len = fbuf.fb_len;
> -				ioexch.fbxch_data = fbuf.fb_dat;
> -
> -				if (ioctl(fuse->fc->fd, FIOCGETFBDAT,
> -				    &ioexch) == -1) {
> -					free(fbuf.fb_dat);
> -					return (-1);
> -				}
> -			}
> -
>  			ctx.fuse = fuse;
>  			ctx.uid = fbuf.fb_uid;
>  			ctx.gid = fbuf.fb_gid;
> @@ -238,26 +223,13 @@ fuse_loop(struct fuse *fuse)
>  				return (-1);
>  			}
>  
> -			n = write(fuse->fc->fd, &fbuf, sizeof(fbuf));
> -			if (fbuf.fb_len) {
> -				if (fbuf.fb_dat == NULL) {
> -					fprintf(stderr, "%s: fb_dat is Null\n",
> -					    __func__);
> -					return (-1);
> -				}
> -				ioexch.fbxch_uuid = fbuf.fb_uuid;
> -				ioexch.fbxch_len = fbuf.fb_len;
> -				ioexch.fbxch_data = fbuf.fb_dat;
> -
> -				if (ioctl(fuse->fc->fd, FIOCSETFBDAT, &ioexch) == -1) {
> -					free(fbuf.fb_dat);
> -					return (-1);
> -				}
> -				free(fbuf.fb_dat);
> -			}
> +			fbuf_size = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) +
> +			    fbuf.fb_len;
> +			n = write(fuse->fc->fd, &fbuf, fbuf_size);
> +
>  			ictx = NULL;
>  
> -			if (n != FUSEBUFSIZE) {
> +			if (n != fbuf_size) {
>  				errno = EINVAL;
>  				return (-1);
>  			}
> Index: lib/libfuse/fuse_ops.c
> ===================================================================
> RCS file: /cvs/src/lib/libfuse/fuse_ops.c,v
> diff -u -p -r1.35 fuse_ops.c
> --- lib/libfuse/fuse_ops.c	16 Jul 2018 13:10:53 -0000	1.35
> +++ lib/libfuse/fuse_ops.c	31 Aug 2025 14:26:02 -0000
> @@ -22,6 +22,8 @@
>  #include "fuse_private.h"
>  #include "debug.h"
>  
> +#define MIN(a,b) (((a)<(b))?(a):(b))
> +
>  #define CHECK_OPT(opname)	DPRINTF("Opcode: %s\t", #opname);	\
>  				DPRINTF("Inode: %llu\t",		\
>  				    (unsigned long long)fbuf->fb_ino);	\
> @@ -317,17 +319,9 @@ ifuse_ops_readdir(struct fuse *f, struct
>  	offset = fbuf->fb_io_off;
>  	size = fbuf->fb_io_len;
>  
> -	fbuf->fb_dat = calloc(1, size);
> -
> -	if (fbuf->fb_dat == NULL) {
> -		fbuf->fb_err = -errno;
> -		return (0);
> -	}
> -
>  	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
>  	if (vn == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> @@ -344,7 +338,6 @@ ifuse_ops_readdir(struct fuse *f, struct
>  	realname = build_realname(f, vn->ino);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> @@ -362,9 +355,6 @@ ifuse_ops_readdir(struct fuse *f, struct
>  	else if (fd.full && fbuf->fb_len == 0)
>  		fbuf->fb_err = -ENOBUFS;
>  
> -	if (fbuf->fb_len == 0)
> -		free(fbuf->fb_dat);
> -
>  	return (0);
>  }
>  
> @@ -526,7 +516,6 @@ ifuse_ops_lookup(struct fuse *f, struct 
>  			    fbuf->fb_ino);
>  			if (vn == NULL) {
>  				fbuf->fb_err = -errno;
> -				free(fbuf->fb_dat);
>  				return (0);
>  			}
>  			set_vn(f, vn); /*XXX*/
> @@ -537,13 +526,11 @@ ifuse_ops_lookup(struct fuse *f, struct 
>  	realname = build_realname(f, vn->ino);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
>  	fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
>  	fbuf->fb_ino = vn->ino;
> -	free(fbuf->fb_dat);
>  	free(realname);
>  
>  	return (0);
> @@ -566,23 +553,15 @@ ifuse_ops_read(struct fuse *f, struct fu
>  	size = fbuf->fb_io_len;
>  	offset = fbuf->fb_io_off;
>  
> -	fbuf->fb_dat = malloc(size);
> -	if (fbuf->fb_dat == NULL) {
> -		fbuf->fb_err = -errno;
> -		return (0);
> -	}
> -
>  	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
>  	if (vn == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
>  	realname = build_realname(f, vn->ino);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> @@ -593,9 +572,6 @@ ifuse_ops_read(struct fuse *f, struct fu
>  	else
>  		fbuf->fb_err = ret;
>  
> -	if (fbuf->fb_len == 0)
> -		free(fbuf->fb_dat);
> -
>  	return (0);
>  }
>  
> @@ -621,20 +597,17 @@ ifuse_ops_write(struct fuse *f, struct f
>  	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
>  	if (vn == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
>  	realname = build_realname(f, vn->ino);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
>  	ret = f->op.write(realname, (char *)fbuf->fb_dat, size, offset, &ffi);
>  	free(realname);
> -	free(fbuf->fb_dat);
>  
>  	if (ret >= 0)
>  		fbuf->fb_io_len = ret;
> @@ -657,11 +630,9 @@ ifuse_ops_mkdir(struct fuse *f, struct f
>  	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
>  	if (vn == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> -	free(fbuf->fb_dat);
>  	realname = build_realname(f, vn->ino);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> @@ -690,11 +661,9 @@ ifuse_ops_rmdir(struct fuse *f, struct f
>  	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
>  	if (vn == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> -	free(fbuf->fb_dat);
>  	realname = build_realname(f, vn->ino);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> @@ -738,13 +707,8 @@ ifuse_ops_readlink(struct fuse *f, struc
>  
>  	fbuf->fb_err = ret;
>  	if (!ret) {
> -		len = strnlen(name, PATH_MAX);
> +		len = strnlen(name, MIN(PATH_MAX, FUSEBUFMAXSIZE));
>  		fbuf->fb_len = len;
> -		fbuf->fb_dat = malloc(fbuf->fb_len);
> -		if (fbuf->fb_dat == NULL) {
> -			fbuf->fb_err = -errno;
> -			return (0);
> -		}
>  		memcpy(fbuf->fb_dat, name, len);
>  	} else
>  		fbuf->fb_len = 0;
> @@ -762,12 +726,10 @@ ifuse_ops_unlink(struct fuse *f, struct 
>  
>  	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
>  	if (vn == NULL) {
> -		free(fbuf->fb_dat);
>  		fbuf->fb_err = -errno;
>  		return (0);
>  	}
>  
> -	free(fbuf->fb_dat);
>  	realname = build_realname(f, vn->ino);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> @@ -821,11 +783,9 @@ ifuse_ops_link(struct fuse *f, struct fu
>  	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
>  	if (vn == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> -	free(fbuf->fb_dat);
>  	realname = build_realname(f, oldnodeid);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> @@ -863,14 +823,12 @@ ifuse_ops_setattr(struct fuse *f, struct
>  	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
>  	if (vn == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
>  	realname = build_realname(f, vn->ino);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  	io = fbtod(fbuf, struct fb_io *);
> @@ -923,7 +881,6 @@ ifuse_ops_setattr(struct fuse *f, struct
>  	if (!fbuf->fb_err)
>  		fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
>  	free(realname);
> -	free(fbuf->fb_dat);
>  
>  	return (0);
>  }
> @@ -940,7 +897,6 @@ ifuse_ops_symlink(unused struct fuse *f,
>  	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
>  	if (vn == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> @@ -949,7 +905,6 @@ ifuse_ops_symlink(unused struct fuse *f,
>  	realname = build_realname(f, vn->ino);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> @@ -957,7 +912,6 @@ ifuse_ops_symlink(unused struct fuse *f,
>  	fbuf->fb_err = f->op.symlink((const char *)&fbuf->fb_dat[len + 1],
>  	    realname);
>  	fbuf->fb_ino = vn->ino;
> -	free(fbuf->fb_dat);
>  	free(realname);
>  
>  	return (0);
> @@ -978,7 +932,6 @@ ifuse_ops_rename(struct fuse *f, struct 
>  	vnf = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
>  	if (vnf == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> @@ -986,12 +939,9 @@ ifuse_ops_rename(struct fuse *f, struct 
>  	    fbuf->fb_io_ino);
>  	if (vnt == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> -	free(fbuf->fb_dat);
> -
>  	realnamef = build_realname(f, vnf->ino);
>  	if (realnamef == NULL) {
>  		fbuf->fb_err = -errno;
> @@ -1060,11 +1010,9 @@ ifuse_ops_mknod(struct fuse *f, struct f
>  	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
>  	if (vn == NULL) {
>  		fbuf->fb_err = -errno;
> -		free(fbuf->fb_dat);
>  		return (0);
>  	}
>  
> -	free(fbuf->fb_dat);
>  	realname = build_realname(f, vn->ino);
>  	if (realname == NULL) {
>  		fbuf->fb_err = -errno;
> Index: sys/miscfs/fuse/fuse_device.c
> ===================================================================
> RCS file: /cvs/src/sys/miscfs/fuse/fuse_device.c,v
> diff -u -p -r1.40 fuse_device.c
> --- sys/miscfs/fuse/fuse_device.c	16 Dec 2023 22:17:08 -0000	1.40
> +++ sys/miscfs/fuse/fuse_device.c	31 Aug 2025 14:26:04 -0000
> @@ -404,13 +404,8 @@ fuseread(dev_t dev, struct uio *uio, int
>  	struct fuse_d *fd;
>  	struct fusebuf *fbuf;
>  	struct fb_hdr hdr;
> -	void *tmpaddr;
>  	int error = 0;
>  
> -	/* We get the whole fusebuf or nothing */
> -	if (uio->uio_resid != FUSEBUFSIZE)
> -		return (EINVAL);
> -
>  	fd = fuse_lookup(minor(dev));
>  	if (fd == NULL)
>  		return (ENXIO);
> @@ -424,29 +419,42 @@ fuseread(dev_t dev, struct uio *uio, int
>  	}
>  	fbuf = SIMPLEQ_FIRST(&fd->fd_fbufs_in);
>  
> +	/* We get the whole fusebuf or nothing */
> +	if (uio->uio_resid < FUSEBUFSIZE + fbuf->fb_len) {
> +		error = EINVAL;
> +		goto end;
> +	}
> +
>  	/* Do not send kernel pointers */
>  	memcpy(&hdr.fh_next, &fbuf->fb_next, sizeof(fbuf->fb_next));
>  	memset(&fbuf->fb_next, 0, sizeof(fbuf->fb_next));
> -	tmpaddr = fbuf->fb_dat;
> -	fbuf->fb_dat = NULL;
> -	error = uiomove(fbuf, FUSEBUFSIZE, uio);
> +
> +	error = uiomove(&fbuf->fb_hdr, sizeof(fbuf->fb_hdr), uio);
> +	if (error)
> +		goto end;
> +	error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
>  	if (error)
>  		goto end;
> +	if (fbuf->fb_len > 0) {
> +		error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
> +		if (error)
> +			goto end;
> +	}
>  
>  #ifdef FUSE_DEBUG
>  	fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
>  #endif
>  	/* Restore kernel pointers */
>  	memcpy(&fbuf->fb_next, &hdr.fh_next, sizeof(fbuf->fb_next));
> -	fbuf->fb_dat = tmpaddr;
>  
> -	/* Remove the fbuf if it does not contains data */
> -	if (fbuf->fb_len == 0) {
> -		SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
> -		stat_fbufs_in--;
> -		SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
> -		stat_fbufs_wait++;
> -	}
> +	free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
> +	fbuf->fb_dat = NULL;
> + 
> +	/* Move the fbuf to the wait queue */
> +	SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
> +	stat_fbufs_in--;
> +	SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
> +	stat_fbufs_wait++;
>  
>  end:
>  	rw_exit_write(&fd->fd_lock);
> @@ -466,13 +474,17 @@ fusewrite(dev_t dev, struct uio *uio, in
>  	if (fd == NULL)
>  		return (ENXIO);
>  
> -	/* We get the whole fusebuf or nothing */
> -	if (uio->uio_resid != FUSEBUFSIZE)
> -		return (EINVAL);
> -
>  	if ((error = uiomove(&hdr, sizeof(hdr), uio)) != 0)
>  		return (error);
>  
> +	/* Check for sanity */
> +	if (hdr.fh_len > FUSEBUFMAXSIZE)
> +		return (EINVAL);
> +
> +	/* We get the whole fusebuf or nothing */
> +	if (uio->uio_resid != sizeof(fbuf->FD) + hdr.fh_len)
> +		return (EINVAL);
> +
>  	/* looking for uuid in fd_fbufs_wait */
>  	SIMPLEQ_FOREACH(fbuf, &fd->fd_fbufs_wait, fb_next) {
>  		if (fbuf->fb_uuid == hdr.fh_uuid)
> @@ -497,10 +509,19 @@ fusewrite(dev_t dev, struct uio *uio, in
>  	}
>  
>  	/* Get the missing data from the fbuf */
> -	error = uiomove(&fbuf->FD, uio->uio_resid, uio);
> +	error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
>  	if (error)
>  		return error;
> -	fbuf->fb_dat = NULL;
> +
> +	fbuf->fb_dat = malloc(fbuf->fb_len, M_FUSEFS,
> +	    M_WAITOK | M_ZERO);
> +	error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
> +	if (error) {
> +		free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
> +		fbuf->fb_dat = NULL;
> +		return (error);
> +	}
> +
>  #ifdef FUSE_DEBUG
>  	fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
>  #endif
> @@ -514,19 +535,17 @@ fusewrite(dev_t dev, struct uio *uio, in
>  		break ;
>  	}
>  end:
> -	/* Remove the fbuf if it does not contains data */
> -	if (fbuf->fb_len == 0) {
> -		if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
> -			SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
> -		else
> -			SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
> -			    fb_next);
> -		stat_fbufs_wait--;
> -		if (fbuf->fb_type == FBT_INIT)
> -			fb_delete(fbuf);
> -		else
> -			wakeup(fbuf);
> -	}
> +	/* Remove the fbuf from the wait queue */
> +	if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
> +		SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
> +	else
> +		SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
> +		    fb_next);
> +	stat_fbufs_wait--;
> +	if (fbuf->fb_type == FBT_INIT)
> +		fb_delete(fbuf);
> +	else
> +		wakeup(fbuf);
>  
>  	return (error);
>  }
> Index: sys/sys/fusebuf.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/fusebuf.h,v
> diff -u -p -r1.13 fusebuf.h
> --- sys/sys/fusebuf.h	19 Jun 2018 11:27:54 -0000	1.13
> +++ sys/sys/fusebuf.h	31 Aug 2025 14:26:04 -0000
> @@ -69,7 +69,11 @@ struct fusebuf {
>  		struct stat	FD_attr;	/* for attr vnops */
>  		struct fb_io	FD_io;		/* for file io vnops */
>  	} FD;
> +#ifdef _KERNEL
>  	uint8_t *fb_dat;			/* data's */
> +#else
> +	uint8_t fb_dat[FUSEBUFMAXSIZE];		/* userland does i/o with fixed size buffer */
> +#endif
>  };
>  
>  #define fb_next		fb_hdr.fh_next
>