Index | Thread | Search

From:
Helg <helg-openbsd@gmx.de>
Subject:
FUSE - read whole fusebuf
To:
tech@openbsd.org
Date:
Sun, 31 Aug 2025 16:25:20 +0200

Download raw body.

Thread
Hi tech@

I'd like to make the OpenBSD fuse implementation more compatible with other
patforms so that we can support more fuse file systems in ports. This is
going to take some fundamental changes in the kernel interface.

Here's why:

FUSE supports 3 different interfaces.

1. Reading and writing directly to the /dev/fuse0 device
2. A low-level API via libfuse
3. A high-level API via libfuse

OpenBSD is currently only compatible with 3. To get from where we are
today to supporting all 3 interfaces, we need to do the following.

a. Update the kernel fuse device and libfuse to read and write entire
   fusebufs. Currently, ioctls are needed to to read and write the data
   for a fusebuf.
b. Update the kernel fuse device and libfuse to block on read until a
   new fusebuf is available. Currently, the kernel sends an event when a
   new fusebuf is available.
c. Change the fusebuf binary format to match other platforms.
d. Implement the low-level fuse API in libfuse.

This diff is the first part of a.

What do you think? Is this OK?

helg


Index: lib/libfuse/fuse.c
===================================================================
RCS file: /cvs/src/lib/libfuse/fuse.c,v
diff -u -p -r1.51 fuse.c
--- lib/libfuse/fuse.c	28 Jun 2019 13:32:42 -0000	1.51
+++ lib/libfuse/fuse.c	31 Aug 2025 14:26:02 -0000
@@ -17,7 +17,6 @@
 
 #include <sys/wait.h>
 #include <sys/types.h>
-#include <sys/ioctl.h>
 
 #include <miscfs/fuse/fusefs.h>
 
@@ -154,9 +153,9 @@ fuse_loop(struct fuse *fuse)
 {
 	struct fusebuf fbuf;
 	struct fuse_context ctx;
-	struct fb_ioctl_xch ioexch;
 	struct kevent event[5];
 	struct kevent ev;
+	ssize_t fbuf_size;
 	ssize_t n;
 	int ret;
 
@@ -201,29 +200,15 @@ fuse_loop(struct fuse *fuse)
 					strsignal(signum));
 			}
 		} else if (ret > 0) {
-			n = read(fuse->fc->fd, &fbuf, sizeof(fbuf));
-			if (n != sizeof(fbuf)) {
+			n = read(fuse->fc->fd, &fbuf, FUSEBUFSIZE);
+			fbuf_size = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) +
+			    fbuf.fb_len;
+			if (n != fbuf_size) {
 				fprintf(stderr, "%s: bad fusebuf read\n",
 				    __func__);
 				return (-1);
 			}
 
-			/* check if there is data something present */
-			if (fbuf.fb_len) {
-				fbuf.fb_dat = malloc(fbuf.fb_len);
-				if (fbuf.fb_dat == NULL)
-					return (-1);
-				ioexch.fbxch_uuid = fbuf.fb_uuid;
-				ioexch.fbxch_len = fbuf.fb_len;
-				ioexch.fbxch_data = fbuf.fb_dat;
-
-				if (ioctl(fuse->fc->fd, FIOCGETFBDAT,
-				    &ioexch) == -1) {
-					free(fbuf.fb_dat);
-					return (-1);
-				}
-			}
-
 			ctx.fuse = fuse;
 			ctx.uid = fbuf.fb_uid;
 			ctx.gid = fbuf.fb_gid;
@@ -238,26 +223,13 @@ fuse_loop(struct fuse *fuse)
 				return (-1);
 			}
 
-			n = write(fuse->fc->fd, &fbuf, sizeof(fbuf));
-			if (fbuf.fb_len) {
-				if (fbuf.fb_dat == NULL) {
-					fprintf(stderr, "%s: fb_dat is Null\n",
-					    __func__);
-					return (-1);
-				}
-				ioexch.fbxch_uuid = fbuf.fb_uuid;
-				ioexch.fbxch_len = fbuf.fb_len;
-				ioexch.fbxch_data = fbuf.fb_dat;
-
-				if (ioctl(fuse->fc->fd, FIOCSETFBDAT, &ioexch) == -1) {
-					free(fbuf.fb_dat);
-					return (-1);
-				}
-				free(fbuf.fb_dat);
-			}
+			fbuf_size = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) +
+			    fbuf.fb_len;
+			n = write(fuse->fc->fd, &fbuf, fbuf_size);
+
 			ictx = NULL;
 
-			if (n != FUSEBUFSIZE) {
+			if (n != fbuf_size) {
 				errno = EINVAL;
 				return (-1);
 			}
Index: lib/libfuse/fuse_ops.c
===================================================================
RCS file: /cvs/src/lib/libfuse/fuse_ops.c,v
diff -u -p -r1.35 fuse_ops.c
--- lib/libfuse/fuse_ops.c	16 Jul 2018 13:10:53 -0000	1.35
+++ lib/libfuse/fuse_ops.c	31 Aug 2025 14:26:02 -0000
@@ -22,6 +22,8 @@
 #include "fuse_private.h"
 #include "debug.h"
 
+#define MIN(a,b) (((a)<(b))?(a):(b))
+
 #define CHECK_OPT(opname)	DPRINTF("Opcode: %s\t", #opname);	\
 				DPRINTF("Inode: %llu\t",		\
 				    (unsigned long long)fbuf->fb_ino);	\
@@ -317,17 +319,9 @@ ifuse_ops_readdir(struct fuse *f, struct
 	offset = fbuf->fb_io_off;
 	size = fbuf->fb_io_len;
 
-	fbuf->fb_dat = calloc(1, size);
-
-	if (fbuf->fb_dat == NULL) {
-		fbuf->fb_err = -errno;
-		return (0);
-	}
-
 	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -344,7 +338,6 @@ ifuse_ops_readdir(struct fuse *f, struct
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -362,9 +355,6 @@ ifuse_ops_readdir(struct fuse *f, struct
 	else if (fd.full && fbuf->fb_len == 0)
 		fbuf->fb_err = -ENOBUFS;
 
-	if (fbuf->fb_len == 0)
-		free(fbuf->fb_dat);
-
 	return (0);
 }
 
@@ -526,7 +516,6 @@ ifuse_ops_lookup(struct fuse *f, struct 
 			    fbuf->fb_ino);
 			if (vn == NULL) {
 				fbuf->fb_err = -errno;
-				free(fbuf->fb_dat);
 				return (0);
 			}
 			set_vn(f, vn); /*XXX*/
@@ -537,13 +526,11 @@ ifuse_ops_lookup(struct fuse *f, struct 
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
 	fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
 	fbuf->fb_ino = vn->ino;
-	free(fbuf->fb_dat);
 	free(realname);
 
 	return (0);
@@ -566,23 +553,15 @@ ifuse_ops_read(struct fuse *f, struct fu
 	size = fbuf->fb_io_len;
 	offset = fbuf->fb_io_off;
 
-	fbuf->fb_dat = malloc(size);
-	if (fbuf->fb_dat == NULL) {
-		fbuf->fb_err = -errno;
-		return (0);
-	}
-
 	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -593,9 +572,6 @@ ifuse_ops_read(struct fuse *f, struct fu
 	else
 		fbuf->fb_err = ret;
 
-	if (fbuf->fb_len == 0)
-		free(fbuf->fb_dat);
-
 	return (0);
 }
 
@@ -621,20 +597,17 @@ ifuse_ops_write(struct fuse *f, struct f
 	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
 	ret = f->op.write(realname, (char *)fbuf->fb_dat, size, offset, &ffi);
 	free(realname);
-	free(fbuf->fb_dat);
 
 	if (ret >= 0)
 		fbuf->fb_io_len = ret;
@@ -657,11 +630,9 @@ ifuse_ops_mkdir(struct fuse *f, struct f
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
@@ -690,11 +661,9 @@ ifuse_ops_rmdir(struct fuse *f, struct f
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
@@ -738,13 +707,8 @@ ifuse_ops_readlink(struct fuse *f, struc
 
 	fbuf->fb_err = ret;
 	if (!ret) {
-		len = strnlen(name, PATH_MAX);
+		len = strnlen(name, MIN(PATH_MAX, FUSEBUFMAXSIZE));
 		fbuf->fb_len = len;
-		fbuf->fb_dat = malloc(fbuf->fb_len);
-		if (fbuf->fb_dat == NULL) {
-			fbuf->fb_err = -errno;
-			return (0);
-		}
 		memcpy(fbuf->fb_dat, name, len);
 	} else
 		fbuf->fb_len = 0;
@@ -762,12 +726,10 @@ ifuse_ops_unlink(struct fuse *f, struct 
 
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
-		free(fbuf->fb_dat);
 		fbuf->fb_err = -errno;
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
@@ -821,11 +783,9 @@ ifuse_ops_link(struct fuse *f, struct fu
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
 	realname = build_realname(f, oldnodeid);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
@@ -863,14 +823,12 @@ ifuse_ops_setattr(struct fuse *f, struct
 	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 	io = fbtod(fbuf, struct fb_io *);
@@ -923,7 +881,6 @@ ifuse_ops_setattr(struct fuse *f, struct
 	if (!fbuf->fb_err)
 		fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
 	free(realname);
-	free(fbuf->fb_dat);
 
 	return (0);
 }
@@ -940,7 +897,6 @@ ifuse_ops_symlink(unused struct fuse *f,
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -949,7 +905,6 @@ ifuse_ops_symlink(unused struct fuse *f,
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -957,7 +912,6 @@ ifuse_ops_symlink(unused struct fuse *f,
 	fbuf->fb_err = f->op.symlink((const char *)&fbuf->fb_dat[len + 1],
 	    realname);
 	fbuf->fb_ino = vn->ino;
-	free(fbuf->fb_dat);
 	free(realname);
 
 	return (0);
@@ -978,7 +932,6 @@ ifuse_ops_rename(struct fuse *f, struct 
 	vnf = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vnf == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -986,12 +939,9 @@ ifuse_ops_rename(struct fuse *f, struct 
 	    fbuf->fb_io_ino);
 	if (vnt == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
-
 	realnamef = build_realname(f, vnf->ino);
 	if (realnamef == NULL) {
 		fbuf->fb_err = -errno;
@@ -1060,11 +1010,9 @@ ifuse_ops_mknod(struct fuse *f, struct f
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
Index: sys/miscfs/fuse/fuse_device.c
===================================================================
RCS file: /cvs/src/sys/miscfs/fuse/fuse_device.c,v
diff -u -p -r1.40 fuse_device.c
--- sys/miscfs/fuse/fuse_device.c	16 Dec 2023 22:17:08 -0000	1.40
+++ sys/miscfs/fuse/fuse_device.c	31 Aug 2025 14:26:04 -0000
@@ -404,13 +404,8 @@ fuseread(dev_t dev, struct uio *uio, int
 	struct fuse_d *fd;
 	struct fusebuf *fbuf;
 	struct fb_hdr hdr;
-	void *tmpaddr;
 	int error = 0;
 
-	/* We get the whole fusebuf or nothing */
-	if (uio->uio_resid != FUSEBUFSIZE)
-		return (EINVAL);
-
 	fd = fuse_lookup(minor(dev));
 	if (fd == NULL)
 		return (ENXIO);
@@ -424,29 +419,42 @@ fuseread(dev_t dev, struct uio *uio, int
 	}
 	fbuf = SIMPLEQ_FIRST(&fd->fd_fbufs_in);
 
+	/* We get the whole fusebuf or nothing */
+	if (uio->uio_resid < FUSEBUFSIZE + fbuf->fb_len) {
+		error = EINVAL;
+		goto end;
+	}
+
 	/* Do not send kernel pointers */
 	memcpy(&hdr.fh_next, &fbuf->fb_next, sizeof(fbuf->fb_next));
 	memset(&fbuf->fb_next, 0, sizeof(fbuf->fb_next));
-	tmpaddr = fbuf->fb_dat;
-	fbuf->fb_dat = NULL;
-	error = uiomove(fbuf, FUSEBUFSIZE, uio);
+
+	error = uiomove(&fbuf->fb_hdr, sizeof(fbuf->fb_hdr), uio);
+	if (error)
+		goto end;
+	error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
 	if (error)
 		goto end;
+	if (fbuf->fb_len > 0) {
+		error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
+		if (error)
+			goto end;
+	}
 
 #ifdef FUSE_DEBUG
 	fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
 #endif
 	/* Restore kernel pointers */
 	memcpy(&fbuf->fb_next, &hdr.fh_next, sizeof(fbuf->fb_next));
-	fbuf->fb_dat = tmpaddr;
 
-	/* Remove the fbuf if it does not contains data */
-	if (fbuf->fb_len == 0) {
-		SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
-		stat_fbufs_in--;
-		SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
-		stat_fbufs_wait++;
-	}
+	free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
+	fbuf->fb_dat = NULL;
+ 
+	/* Move the fbuf to the wait queue */
+	SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
+	stat_fbufs_in--;
+	SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
+	stat_fbufs_wait++;
 
 end:
 	rw_exit_write(&fd->fd_lock);
@@ -466,13 +474,17 @@ fusewrite(dev_t dev, struct uio *uio, in
 	if (fd == NULL)
 		return (ENXIO);
 
-	/* We get the whole fusebuf or nothing */
-	if (uio->uio_resid != FUSEBUFSIZE)
-		return (EINVAL);
-
 	if ((error = uiomove(&hdr, sizeof(hdr), uio)) != 0)
 		return (error);
 
+	/* Check for sanity */
+	if (hdr.fh_len > FUSEBUFMAXSIZE)
+		return (EINVAL);
+
+	/* We get the whole fusebuf or nothing */
+	if (uio->uio_resid != sizeof(fbuf->FD) + hdr.fh_len)
+		return (EINVAL);
+
 	/* looking for uuid in fd_fbufs_wait */
 	SIMPLEQ_FOREACH(fbuf, &fd->fd_fbufs_wait, fb_next) {
 		if (fbuf->fb_uuid == hdr.fh_uuid)
@@ -497,10 +509,19 @@ fusewrite(dev_t dev, struct uio *uio, in
 	}
 
 	/* Get the missing data from the fbuf */
-	error = uiomove(&fbuf->FD, uio->uio_resid, uio);
+	error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
 	if (error)
 		return error;
-	fbuf->fb_dat = NULL;
+
+	fbuf->fb_dat = malloc(fbuf->fb_len, M_FUSEFS,
+	    M_WAITOK | M_ZERO);
+	error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
+	if (error) {
+		free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
+		fbuf->fb_dat = NULL;
+		return (error);
+	}
+
 #ifdef FUSE_DEBUG
 	fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
 #endif
@@ -514,19 +535,17 @@ fusewrite(dev_t dev, struct uio *uio, in
 		break ;
 	}
 end:
-	/* Remove the fbuf if it does not contains data */
-	if (fbuf->fb_len == 0) {
-		if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
-			SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
-		else
-			SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
-			    fb_next);
-		stat_fbufs_wait--;
-		if (fbuf->fb_type == FBT_INIT)
-			fb_delete(fbuf);
-		else
-			wakeup(fbuf);
-	}
+	/* Remove the fbuf from the wait queue */
+	if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
+		SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
+	else
+		SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
+		    fb_next);
+	stat_fbufs_wait--;
+	if (fbuf->fb_type == FBT_INIT)
+		fb_delete(fbuf);
+	else
+		wakeup(fbuf);
 
 	return (error);
 }
Index: sys/sys/fusebuf.h
===================================================================
RCS file: /cvs/src/sys/sys/fusebuf.h,v
diff -u -p -r1.13 fusebuf.h
--- sys/sys/fusebuf.h	19 Jun 2018 11:27:54 -0000	1.13
+++ sys/sys/fusebuf.h	31 Aug 2025 14:26:04 -0000
@@ -69,7 +69,11 @@ struct fusebuf {
 		struct stat	FD_attr;	/* for attr vnops */
 		struct fb_io	FD_io;		/* for file io vnops */
 	} FD;
+#ifdef _KERNEL
 	uint8_t *fb_dat;			/* data's */
+#else
+	uint8_t fb_dat[FUSEBUFMAXSIZE];		/* userland does i/o with fixed size buffer */
+#endif
 };
 
 #define fb_next		fb_hdr.fh_next