Index | Thread | Search

From:
Helg <helg-openbsd@gmx.de>
Subject:
FUSE - read whole fusebuf v2
To:
tech@openbsd.org
Date:
Wed, 3 Sep 2025 19:50:36 +0200

Download raw body.

Thread
Hi tech@

I've taken Claudio's feedback onboard and updated my diff, and it's all
the better for it.

- no more #ifdef _Kernel in fusebuf.h
- libfuse uses readv(2) and writev(2) to read the fusebuf and any io data
  into an allocated buffer
- the libfuse buffer is 4MB since that's the max size that the kernel
  can send
- additional checks in the kernel and libfuse
- MIN(a,b) macro is gone

This is the first of a series of changes required to make the fuse
kernel interface compatible with other patforms. It updates /dev/fuse0 so
that fusebufs must be read and written in their entirety. Currently,
ioctls are needed to to read and write any io data associated with a
syscall such as read(2), readdir(3), readlinki(2) and write(2).

ok?


Index: lib/libfuse/fuse.c
===================================================================
RCS file: /cvs/src/lib/libfuse/fuse.c,v
diff -u -p -r1.53 fuse.c
--- lib/libfuse/fuse.c	2 Sep 2025 17:18:40 -0000	1.53
+++ lib/libfuse/fuse.c	4 Sep 2025 14:56:25 -0000
@@ -17,7 +17,7 @@
 
 #include <sys/wait.h>
 #include <sys/types.h>
-#include <sys/ioctl.h>
+#include <sys/uio.h>
 
 #include <miscfs/fuse/fusefs.h>
 
@@ -154,9 +154,10 @@ fuse_loop(struct fuse *fuse)
 {
 	struct fusebuf fbuf;
 	struct fuse_context ctx;
-	struct fb_ioctl_xch ioexch;
 	struct kevent event[5];
 	struct kevent ev;
+	struct iovec iov[2];
+	size_t fb_dat_size = FUSEBUFMAXSIZE;
 	ssize_t n;
 	int ret;
 
@@ -180,6 +181,14 @@ fuse_loop(struct fuse *fuse)
 	EV_SET(&event[4], SIGTERM, EVFILT_SIGNAL, EV_ADD |
 	    EV_ENABLE, 0, 0, 0);
 
+	/* prepare the read and write data buffer */
+	fbuf.fb_dat = malloc(fb_dat_size);
+	if (fbuf.fb_dat == NULL)
+		return (-1);
+	iov[0].iov_base = &fbuf;
+	iov[0].iov_len  = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD);
+	iov[1].iov_base = fbuf.fb_dat;
+
 	while (!fuse->fc->dead) {
 		ret = kevent(fuse->fc->kq, &event[0], 5, &ev, 1, NULL);
 		if (ret == -1) {
@@ -201,27 +210,38 @@ fuse_loop(struct fuse *fuse)
 					strsignal(signum));
 			}
 		} else if (ret > 0) {
-			n = read(fuse->fc->fd, &fbuf, sizeof(fbuf));
-			if (n != sizeof(fbuf)) {
+			iov[1].iov_len = fb_dat_size;
+			n = readv(fuse->fc->fd, iov, 2);
+			if (n == -1) {
+				perror("fuse_loop");
+				fprintf(stderr, "%s: bad fusebuf read: %s\n",
+				    __func__, strerror(errno));
+				free(fbuf.fb_dat);
+				return (-1);
+			}
+			if (n < (ssize_t)sizeof(fbuf.fb_hdr)) {
 				fprintf(stderr, "%s: bad fusebuf read\n",
 				    __func__);
+				free(fbuf.fb_dat);
+				return (-1);
+			}
+			if ((size_t)n != sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) +
+                            fbuf.fb_len) {
+				fprintf(stderr, "%s: bad fusebuf read\n",
+				    __func__);
+				free(fbuf.fb_dat);
 				return (-1);
 			}
 
-			/* check if there is data something present */
-			if (fbuf.fb_len) {
-				fbuf.fb_dat = malloc(fbuf.fb_len);
-				if (fbuf.fb_dat == NULL)
-					return (-1);
-				ioexch.fbxch_uuid = fbuf.fb_uuid;
-				ioexch.fbxch_len = fbuf.fb_len;
-				ioexch.fbxch_data = fbuf.fb_dat;
-
-				if (ioctl(fuse->fc->fd, FIOCGETFBDAT,
-				    &ioexch) == -1) {
-					free(fbuf.fb_dat);
-					return (-1);
-				}
+			/*
+			 * fuse_ops check that they do not write more than
+			 * fb_io_len when writing to fb_dat
+			 */
+			if (fbuf.fb_io_len > fb_dat_size) {
+				fprintf(stderr, "%s: io exceeds buffer size\n",
+				    __func__);
+				free(fbuf.fb_dat);
+				return (-1);
 			}
 
 			ctx.fuse = fuse;
@@ -235,35 +255,25 @@ fuse_loop(struct fuse *fuse)
 			ret = ifuse_exec_opcode(fuse, &fbuf);
 			if (ret) {
 				ictx = NULL;
+				free(fbuf.fb_dat);
 				return (-1);
 			}
 
-			n = write(fuse->fc->fd, &fbuf, sizeof(fbuf));
-			if (fbuf.fb_len) {
-				if (fbuf.fb_dat == NULL) {
-					fprintf(stderr, "%s: fb_dat is Null\n",
-					    __func__);
-					return (-1);
-				}
-				ioexch.fbxch_uuid = fbuf.fb_uuid;
-				ioexch.fbxch_len = fbuf.fb_len;
-				ioexch.fbxch_data = fbuf.fb_dat;
-
-				if (ioctl(fuse->fc->fd, FIOCSETFBDAT, &ioexch) == -1) {
-					free(fbuf.fb_dat);
-					return (-1);
-				}
-				free(fbuf.fb_dat);
-			}
+			iov[1].iov_len = fbuf.fb_len;
+			n = writev(fuse->fc->fd, iov, 2);
+
 			ictx = NULL;
 
-			if (n != FUSEBUFSIZE) {
+			if (n == -1 || (size_t)n != sizeof(fbuf.fb_hdr) +
+			    sizeof(fbuf.FD) + fbuf.fb_len) {
 				errno = EINVAL;
+				free(fbuf.fb_dat);
 				return (-1);
 			}
 		}
 	}
 
+	free(fbuf.fb_dat);
 	return (0);
 }
 DEF(fuse_loop);
Index: lib/libfuse/fuse_ops.c
===================================================================
RCS file: /cvs/src/lib/libfuse/fuse_ops.c,v
diff -u -p -r1.37 fuse_ops.c
--- lib/libfuse/fuse_ops.c	2 Sep 2025 17:18:40 -0000	1.37
+++ lib/libfuse/fuse_ops.c	4 Sep 2025 14:56:25 -0000
@@ -317,17 +317,9 @@ ifuse_ops_readdir(struct fuse *f, struct
 	offset = fbuf->fb_io_off;
 	size = fbuf->fb_io_len;
 
-	fbuf->fb_dat = calloc(1, size);
-
-	if (fbuf->fb_dat == NULL) {
-		fbuf->fb_err = -errno;
-		return (0);
-	}
-
 	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -344,7 +336,6 @@ ifuse_ops_readdir(struct fuse *f, struct
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -362,9 +353,6 @@ ifuse_ops_readdir(struct fuse *f, struct
 	else if (fd.full && fbuf->fb_len == 0)
 		fbuf->fb_err = -ENOBUFS;
 
-	if (fbuf->fb_len == 0)
-		free(fbuf->fb_dat);
-
 	return (0);
 }
 
@@ -526,7 +514,6 @@ ifuse_ops_lookup(struct fuse *f, struct 
 			    fbuf->fb_ino);
 			if (vn == NULL) {
 				fbuf->fb_err = -errno;
-				free(fbuf->fb_dat);
 				return (0);
 			}
 			set_vn(f, vn); /*XXX*/
@@ -537,13 +524,11 @@ ifuse_ops_lookup(struct fuse *f, struct 
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
 	fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
 	fbuf->fb_ino = vn->ino;
-	free(fbuf->fb_dat);
 	free(realname);
 
 	return (0);
@@ -566,23 +551,15 @@ ifuse_ops_read(struct fuse *f, struct fu
 	size = fbuf->fb_io_len;
 	offset = fbuf->fb_io_off;
 
-	fbuf->fb_dat = malloc(size);
-	if (fbuf->fb_dat == NULL) {
-		fbuf->fb_err = -errno;
-		return (0);
-	}
-
 	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -593,9 +570,6 @@ ifuse_ops_read(struct fuse *f, struct fu
 	else
 		fbuf->fb_err = ret;
 
-	if (fbuf->fb_len == 0)
-		free(fbuf->fb_dat);
-
 	return (0);
 }
 
@@ -621,20 +595,17 @@ ifuse_ops_write(struct fuse *f, struct f
 	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
 	ret = f->op.write(realname, (char *)fbuf->fb_dat, size, offset, &ffi);
 	free(realname);
-	free(fbuf->fb_dat);
 
 	if (ret >= 0)
 		fbuf->fb_io_len = ret;
@@ -657,11 +628,9 @@ ifuse_ops_mkdir(struct fuse *f, struct f
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
@@ -690,11 +659,9 @@ ifuse_ops_rmdir(struct fuse *f, struct f
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
@@ -740,11 +707,6 @@ ifuse_ops_readlink(struct fuse *f, struc
 	if (!ret) {
 		len = strnlen(name, PATH_MAX);
 		fbuf->fb_len = len;
-		fbuf->fb_dat = malloc(fbuf->fb_len);
-		if (fbuf->fb_dat == NULL) {
-			fbuf->fb_err = -errno;
-			return (0);
-		}
 		memcpy(fbuf->fb_dat, name, len);
 	} else
 		fbuf->fb_len = 0;
@@ -762,12 +724,10 @@ ifuse_ops_unlink(struct fuse *f, struct 
 
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
-		free(fbuf->fb_dat);
 		fbuf->fb_err = -errno;
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
@@ -821,11 +781,9 @@ ifuse_ops_link(struct fuse *f, struct fu
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
 	realname = build_realname(f, oldnodeid);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
@@ -863,14 +821,12 @@ ifuse_ops_setattr(struct fuse *f, struct
 	vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 	io = fbtod(fbuf, struct fb_io *);
@@ -923,7 +879,6 @@ ifuse_ops_setattr(struct fuse *f, struct
 	if (!fbuf->fb_err)
 		fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
 	free(realname);
-	free(fbuf->fb_dat);
 
 	return (0);
 }
@@ -940,7 +895,6 @@ ifuse_ops_symlink(unused struct fuse *f,
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -949,7 +903,6 @@ ifuse_ops_symlink(unused struct fuse *f,
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -957,7 +910,6 @@ ifuse_ops_symlink(unused struct fuse *f,
 	fbuf->fb_err = f->op.symlink((const char *)&fbuf->fb_dat[len + 1],
 	    realname);
 	fbuf->fb_ino = vn->ino;
-	free(fbuf->fb_dat);
 	free(realname);
 
 	return (0);
@@ -978,7 +930,6 @@ ifuse_ops_rename(struct fuse *f, struct 
 	vnf = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vnf == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
@@ -986,12 +937,9 @@ ifuse_ops_rename(struct fuse *f, struct 
 	    fbuf->fb_io_ino);
 	if (vnt == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
-
 	realnamef = build_realname(f, vnf->ino);
 	if (realnamef == NULL) {
 		fbuf->fb_err = -errno;
@@ -1060,11 +1008,9 @@ ifuse_ops_mknod(struct fuse *f, struct f
 	vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
 	if (vn == NULL) {
 		fbuf->fb_err = -errno;
-		free(fbuf->fb_dat);
 		return (0);
 	}
 
-	free(fbuf->fb_dat);
 	realname = build_realname(f, vn->ino);
 	if (realname == NULL) {
 		fbuf->fb_err = -errno;
Index: sys/miscfs/fuse/fuse_device.c
===================================================================
RCS file: /cvs/src/sys/miscfs/fuse/fuse_device.c,v
diff -u -p -r1.42 fuse_device.c
--- sys/miscfs/fuse/fuse_device.c	2 Sep 2025 17:18:40 -0000	1.42
+++ sys/miscfs/fuse/fuse_device.c	4 Sep 2025 14:56:44 -0000
@@ -404,13 +404,8 @@ fuseread(dev_t dev, struct uio *uio, int
 	struct fuse_d *fd;
 	struct fusebuf *fbuf;
 	struct fb_hdr hdr;
-	void *tmpaddr;
 	int error = 0;
 
-	/* We get the whole fusebuf or nothing */
-	if (uio->uio_resid != FUSEBUFSIZE)
-		return (EINVAL);
-
 	fd = fuse_lookup(minor(dev));
 	if (fd == NULL)
 		return (ENXIO);
@@ -424,29 +419,43 @@ fuseread(dev_t dev, struct uio *uio, int
 	}
 	fbuf = SIMPLEQ_FIRST(&fd->fd_fbufs_in);
 
+	/* We get the whole fusebuf or nothing */
+	if (uio->uio_resid < sizeof(fbuf->fb_hdr) + sizeof(fbuf->FD) +
+	    fbuf->fb_len) {
+		error = EINVAL;
+		goto end;
+	}
+
 	/* Do not send kernel pointers */
 	memcpy(&hdr.fh_next, &fbuf->fb_next, sizeof(fbuf->fb_next));
 	memset(&fbuf->fb_next, 0, sizeof(fbuf->fb_next));
-	tmpaddr = fbuf->fb_dat;
-	fbuf->fb_dat = NULL;
-	error = uiomove(fbuf, FUSEBUFSIZE, uio);
+
+	error = uiomove(&fbuf->fb_hdr, sizeof(fbuf->fb_hdr), uio);
 	if (error)
 		goto end;
+	error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
+	if (error)
+		goto end;
+	if (fbuf->fb_len > 0) {
+		error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
+		if (error)
+			goto end;
+	}
 
 #ifdef FUSE_DEBUG
-	fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
+	fuse_dump_buff((char *)fbuf, sizeof(struct fusebuf));
 #endif
 	/* Restore kernel pointers */
 	memcpy(&fbuf->fb_next, &hdr.fh_next, sizeof(fbuf->fb_next));
-	fbuf->fb_dat = tmpaddr;
 
-	/* Remove the fbuf if it does not contains data */
-	if (fbuf->fb_len == 0) {
-		SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
-		stat_fbufs_in--;
-		SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
-		stat_fbufs_wait++;
-	}
+	free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
+	fbuf->fb_dat = NULL;
+
+	/* Move the fbuf to the wait queue */
+	SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
+	stat_fbufs_in--;
+	SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
+	stat_fbufs_wait++;
 
 end:
 	rw_exit_write(&fd->fd_lock);
@@ -466,13 +475,21 @@ fusewrite(dev_t dev, struct uio *uio, in
 	if (fd == NULL)
 		return (ENXIO);
 
-	/* We get the whole fusebuf or nothing */
-	if (uio->uio_resid != FUSEBUFSIZE)
+	/* Check for sanity - must receive more than just the header */
+	if (uio->uio_resid <= sizeof(hdr))
 		return (EINVAL);
 
 	if ((error = uiomove(&hdr, sizeof(hdr), uio)) != 0)
 		return (error);
 
+	/* Check for sanity */
+	if (hdr.fh_len > FUSEBUFMAXSIZE)
+		return (EINVAL);
+
+	/* We get the whole fusebuf or nothing */
+	if (uio->uio_resid != sizeof(fbuf->FD) + hdr.fh_len)
+		return (EINVAL);
+
 	/* looking for uuid in fd_fbufs_wait */
 	SIMPLEQ_FOREACH(fbuf, &fd->fd_fbufs_wait, fb_next) {
 		if (fbuf->fb_uuid == hdr.fh_uuid)
@@ -497,12 +514,24 @@ fusewrite(dev_t dev, struct uio *uio, in
 	}
 
 	/* Get the missing data from the fbuf */
-	error = uiomove(&fbuf->FD, uio->uio_resid, uio);
+	error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
 	if (error)
 		return error;
+
 	fbuf->fb_dat = NULL;
+	if (fbuf->fb_len > 0) {
+		fbuf->fb_dat = malloc(fbuf->fb_len, M_FUSEFS,
+		    M_WAITOK | M_ZERO);
+		error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
+		if (error) {
+			free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
+			fbuf->fb_dat = NULL;
+			return (error);
+		}
+	}
+
 #ifdef FUSE_DEBUG
-	fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
+	fuse_dump_buff((char *)fbuf, sizeof(struct fusebuf));
 #endif
 
 	switch (fbuf->fb_type) {
@@ -514,19 +543,17 @@ fusewrite(dev_t dev, struct uio *uio, in
 		break ;
 	}
 end:
-	/* Remove the fbuf if it does not contains data */
-	if (fbuf->fb_len == 0) {
-		if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
-			SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
-		else
-			SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
-			    fb_next);
-		stat_fbufs_wait--;
-		if (fbuf->fb_type == FBT_INIT)
-			fb_delete(fbuf);
-		else
-			wakeup(fbuf);
-	}
+	/* Remove the fbuf from the wait queue */
+	if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
+		SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
+	else
+		SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
+		    fb_next);
+	stat_fbufs_wait--;
+	if (fbuf->fb_type == FBT_INIT)
+		fb_delete(fbuf);
+	else
+		wakeup(fbuf);
 
 	return (error);
 }
Index: sys/sys/fusebuf.h
===================================================================
RCS file: /cvs/src/sys/sys/fusebuf.h,v
diff -u -p -r1.15 fusebuf.h
--- sys/sys/fusebuf.h	2 Sep 2025 17:18:40 -0000	1.15
+++ sys/sys/fusebuf.h	4 Sep 2025 14:56:44 -0000
@@ -20,9 +20,9 @@
 #define _SYS_FUSEBUF_H_
 
 /*
- * Fusebufs are of a single size, 4096 bytes.
+ * Maximum size of the read or write buffer sent from the kernel for VFS
+ * syscalls: read, readdir, readlink, write.
  */
-#define	FUSEBUFSIZE	(sizeof(struct fusebuf))
 #define FUSEBUFMAXSIZE	(4096*1024)
 
 /* header at beginning of each fusebuf: */