Download raw body.
FUSE - read whole fusebuf v2
Hi tech@
I've taken Claudio's feedback onboard and updated my diff, and it's all
the better for it.
- no more #ifdef _Kernel in fusebuf.h
- libfuse uses readv(2) and writev(2) to read the fusebuf and any io data
into an allocated buffer
- the libfuse buffer is 4MB since that's the max size that the kernel
can send
- additional checks in the kernel and libfuse
- MIN(a,b) macro is gone
This is the first of a series of changes required to make the fuse
kernel interface compatible with other patforms. It updates /dev/fuse0 so
that fusebufs must be read and written in their entirety. Currently,
ioctls are needed to to read and write any io data associated with a
syscall such as read(2), readdir(3), readlinki(2) and write(2).
ok?
Index: lib/libfuse/fuse.c
===================================================================
RCS file: /cvs/src/lib/libfuse/fuse.c,v
diff -u -p -r1.53 fuse.c
--- lib/libfuse/fuse.c 2 Sep 2025 17:18:40 -0000 1.53
+++ lib/libfuse/fuse.c 4 Sep 2025 14:56:25 -0000
@@ -17,7 +17,7 @@
#include <sys/wait.h>
#include <sys/types.h>
-#include <sys/ioctl.h>
+#include <sys/uio.h>
#include <miscfs/fuse/fusefs.h>
@@ -154,9 +154,10 @@ fuse_loop(struct fuse *fuse)
{
struct fusebuf fbuf;
struct fuse_context ctx;
- struct fb_ioctl_xch ioexch;
struct kevent event[5];
struct kevent ev;
+ struct iovec iov[2];
+ size_t fb_dat_size = FUSEBUFMAXSIZE;
ssize_t n;
int ret;
@@ -180,6 +181,14 @@ fuse_loop(struct fuse *fuse)
EV_SET(&event[4], SIGTERM, EVFILT_SIGNAL, EV_ADD |
EV_ENABLE, 0, 0, 0);
+ /* prepare the read and write data buffer */
+ fbuf.fb_dat = malloc(fb_dat_size);
+ if (fbuf.fb_dat == NULL)
+ return (-1);
+ iov[0].iov_base = &fbuf;
+ iov[0].iov_len = sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD);
+ iov[1].iov_base = fbuf.fb_dat;
+
while (!fuse->fc->dead) {
ret = kevent(fuse->fc->kq, &event[0], 5, &ev, 1, NULL);
if (ret == -1) {
@@ -201,27 +210,38 @@ fuse_loop(struct fuse *fuse)
strsignal(signum));
}
} else if (ret > 0) {
- n = read(fuse->fc->fd, &fbuf, sizeof(fbuf));
- if (n != sizeof(fbuf)) {
+ iov[1].iov_len = fb_dat_size;
+ n = readv(fuse->fc->fd, iov, 2);
+ if (n == -1) {
+ perror("fuse_loop");
+ fprintf(stderr, "%s: bad fusebuf read: %s\n",
+ __func__, strerror(errno));
+ free(fbuf.fb_dat);
+ return (-1);
+ }
+ if (n < (ssize_t)sizeof(fbuf.fb_hdr)) {
fprintf(stderr, "%s: bad fusebuf read\n",
__func__);
+ free(fbuf.fb_dat);
+ return (-1);
+ }
+ if ((size_t)n != sizeof(fbuf.fb_hdr) + sizeof(fbuf.FD) +
+ fbuf.fb_len) {
+ fprintf(stderr, "%s: bad fusebuf read\n",
+ __func__);
+ free(fbuf.fb_dat);
return (-1);
}
- /* check if there is data something present */
- if (fbuf.fb_len) {
- fbuf.fb_dat = malloc(fbuf.fb_len);
- if (fbuf.fb_dat == NULL)
- return (-1);
- ioexch.fbxch_uuid = fbuf.fb_uuid;
- ioexch.fbxch_len = fbuf.fb_len;
- ioexch.fbxch_data = fbuf.fb_dat;
-
- if (ioctl(fuse->fc->fd, FIOCGETFBDAT,
- &ioexch) == -1) {
- free(fbuf.fb_dat);
- return (-1);
- }
+ /*
+ * fuse_ops check that they do not write more than
+ * fb_io_len when writing to fb_dat
+ */
+ if (fbuf.fb_io_len > fb_dat_size) {
+ fprintf(stderr, "%s: io exceeds buffer size\n",
+ __func__);
+ free(fbuf.fb_dat);
+ return (-1);
}
ctx.fuse = fuse;
@@ -235,35 +255,25 @@ fuse_loop(struct fuse *fuse)
ret = ifuse_exec_opcode(fuse, &fbuf);
if (ret) {
ictx = NULL;
+ free(fbuf.fb_dat);
return (-1);
}
- n = write(fuse->fc->fd, &fbuf, sizeof(fbuf));
- if (fbuf.fb_len) {
- if (fbuf.fb_dat == NULL) {
- fprintf(stderr, "%s: fb_dat is Null\n",
- __func__);
- return (-1);
- }
- ioexch.fbxch_uuid = fbuf.fb_uuid;
- ioexch.fbxch_len = fbuf.fb_len;
- ioexch.fbxch_data = fbuf.fb_dat;
-
- if (ioctl(fuse->fc->fd, FIOCSETFBDAT, &ioexch) == -1) {
- free(fbuf.fb_dat);
- return (-1);
- }
- free(fbuf.fb_dat);
- }
+ iov[1].iov_len = fbuf.fb_len;
+ n = writev(fuse->fc->fd, iov, 2);
+
ictx = NULL;
- if (n != FUSEBUFSIZE) {
+ if (n == -1 || (size_t)n != sizeof(fbuf.fb_hdr) +
+ sizeof(fbuf.FD) + fbuf.fb_len) {
errno = EINVAL;
+ free(fbuf.fb_dat);
return (-1);
}
}
}
+ free(fbuf.fb_dat);
return (0);
}
DEF(fuse_loop);
Index: lib/libfuse/fuse_ops.c
===================================================================
RCS file: /cvs/src/lib/libfuse/fuse_ops.c,v
diff -u -p -r1.37 fuse_ops.c
--- lib/libfuse/fuse_ops.c 2 Sep 2025 17:18:40 -0000 1.37
+++ lib/libfuse/fuse_ops.c 4 Sep 2025 14:56:25 -0000
@@ -317,17 +317,9 @@ ifuse_ops_readdir(struct fuse *f, struct
offset = fbuf->fb_io_off;
size = fbuf->fb_io_len;
- fbuf->fb_dat = calloc(1, size);
-
- if (fbuf->fb_dat == NULL) {
- fbuf->fb_err = -errno;
- return (0);
- }
-
vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
if (vn == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
@@ -344,7 +336,6 @@ ifuse_ops_readdir(struct fuse *f, struct
realname = build_realname(f, vn->ino);
if (realname == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
@@ -362,9 +353,6 @@ ifuse_ops_readdir(struct fuse *f, struct
else if (fd.full && fbuf->fb_len == 0)
fbuf->fb_err = -ENOBUFS;
- if (fbuf->fb_len == 0)
- free(fbuf->fb_dat);
-
return (0);
}
@@ -526,7 +514,6 @@ ifuse_ops_lookup(struct fuse *f, struct
fbuf->fb_ino);
if (vn == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
set_vn(f, vn); /*XXX*/
@@ -537,13 +524,11 @@ ifuse_ops_lookup(struct fuse *f, struct
realname = build_realname(f, vn->ino);
if (realname == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
fbuf->fb_ino = vn->ino;
- free(fbuf->fb_dat);
free(realname);
return (0);
@@ -566,23 +551,15 @@ ifuse_ops_read(struct fuse *f, struct fu
size = fbuf->fb_io_len;
offset = fbuf->fb_io_off;
- fbuf->fb_dat = malloc(size);
- if (fbuf->fb_dat == NULL) {
- fbuf->fb_err = -errno;
- return (0);
- }
-
vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
if (vn == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
realname = build_realname(f, vn->ino);
if (realname == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
@@ -593,9 +570,6 @@ ifuse_ops_read(struct fuse *f, struct fu
else
fbuf->fb_err = ret;
- if (fbuf->fb_len == 0)
- free(fbuf->fb_dat);
-
return (0);
}
@@ -621,20 +595,17 @@ ifuse_ops_write(struct fuse *f, struct f
vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
if (vn == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
realname = build_realname(f, vn->ino);
if (realname == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
ret = f->op.write(realname, (char *)fbuf->fb_dat, size, offset, &ffi);
free(realname);
- free(fbuf->fb_dat);
if (ret >= 0)
fbuf->fb_io_len = ret;
@@ -657,11 +628,9 @@ ifuse_ops_mkdir(struct fuse *f, struct f
vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
if (vn == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
- free(fbuf->fb_dat);
realname = build_realname(f, vn->ino);
if (realname == NULL) {
fbuf->fb_err = -errno;
@@ -690,11 +659,9 @@ ifuse_ops_rmdir(struct fuse *f, struct f
vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
if (vn == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
- free(fbuf->fb_dat);
realname = build_realname(f, vn->ino);
if (realname == NULL) {
fbuf->fb_err = -errno;
@@ -740,11 +707,6 @@ ifuse_ops_readlink(struct fuse *f, struc
if (!ret) {
len = strnlen(name, PATH_MAX);
fbuf->fb_len = len;
- fbuf->fb_dat = malloc(fbuf->fb_len);
- if (fbuf->fb_dat == NULL) {
- fbuf->fb_err = -errno;
- return (0);
- }
memcpy(fbuf->fb_dat, name, len);
} else
fbuf->fb_len = 0;
@@ -762,12 +724,10 @@ ifuse_ops_unlink(struct fuse *f, struct
vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
if (vn == NULL) {
- free(fbuf->fb_dat);
fbuf->fb_err = -errno;
return (0);
}
- free(fbuf->fb_dat);
realname = build_realname(f, vn->ino);
if (realname == NULL) {
fbuf->fb_err = -errno;
@@ -821,11 +781,9 @@ ifuse_ops_link(struct fuse *f, struct fu
vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
if (vn == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
- free(fbuf->fb_dat);
realname = build_realname(f, oldnodeid);
if (realname == NULL) {
fbuf->fb_err = -errno;
@@ -863,14 +821,12 @@ ifuse_ops_setattr(struct fuse *f, struct
vn = tree_get(&f->vnode_tree, fbuf->fb_ino);
if (vn == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
realname = build_realname(f, vn->ino);
if (realname == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
io = fbtod(fbuf, struct fb_io *);
@@ -923,7 +879,6 @@ ifuse_ops_setattr(struct fuse *f, struct
if (!fbuf->fb_err)
fbuf->fb_err = update_attr(f, &fbuf->fb_attr, realname, vn);
free(realname);
- free(fbuf->fb_dat);
return (0);
}
@@ -940,7 +895,6 @@ ifuse_ops_symlink(unused struct fuse *f,
vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
if (vn == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
@@ -949,7 +903,6 @@ ifuse_ops_symlink(unused struct fuse *f,
realname = build_realname(f, vn->ino);
if (realname == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
@@ -957,7 +910,6 @@ ifuse_ops_symlink(unused struct fuse *f,
fbuf->fb_err = f->op.symlink((const char *)&fbuf->fb_dat[len + 1],
realname);
fbuf->fb_ino = vn->ino;
- free(fbuf->fb_dat);
free(realname);
return (0);
@@ -978,7 +930,6 @@ ifuse_ops_rename(struct fuse *f, struct
vnf = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
if (vnf == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
@@ -986,12 +937,9 @@ ifuse_ops_rename(struct fuse *f, struct
fbuf->fb_io_ino);
if (vnt == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
- free(fbuf->fb_dat);
-
realnamef = build_realname(f, vnf->ino);
if (realnamef == NULL) {
fbuf->fb_err = -errno;
@@ -1060,11 +1008,9 @@ ifuse_ops_mknod(struct fuse *f, struct f
vn = get_vn_by_name_and_parent(f, fbuf->fb_dat, fbuf->fb_ino);
if (vn == NULL) {
fbuf->fb_err = -errno;
- free(fbuf->fb_dat);
return (0);
}
- free(fbuf->fb_dat);
realname = build_realname(f, vn->ino);
if (realname == NULL) {
fbuf->fb_err = -errno;
Index: sys/miscfs/fuse/fuse_device.c
===================================================================
RCS file: /cvs/src/sys/miscfs/fuse/fuse_device.c,v
diff -u -p -r1.42 fuse_device.c
--- sys/miscfs/fuse/fuse_device.c 2 Sep 2025 17:18:40 -0000 1.42
+++ sys/miscfs/fuse/fuse_device.c 4 Sep 2025 14:56:44 -0000
@@ -404,13 +404,8 @@ fuseread(dev_t dev, struct uio *uio, int
struct fuse_d *fd;
struct fusebuf *fbuf;
struct fb_hdr hdr;
- void *tmpaddr;
int error = 0;
- /* We get the whole fusebuf or nothing */
- if (uio->uio_resid != FUSEBUFSIZE)
- return (EINVAL);
-
fd = fuse_lookup(minor(dev));
if (fd == NULL)
return (ENXIO);
@@ -424,29 +419,43 @@ fuseread(dev_t dev, struct uio *uio, int
}
fbuf = SIMPLEQ_FIRST(&fd->fd_fbufs_in);
+ /* We get the whole fusebuf or nothing */
+ if (uio->uio_resid < sizeof(fbuf->fb_hdr) + sizeof(fbuf->FD) +
+ fbuf->fb_len) {
+ error = EINVAL;
+ goto end;
+ }
+
/* Do not send kernel pointers */
memcpy(&hdr.fh_next, &fbuf->fb_next, sizeof(fbuf->fb_next));
memset(&fbuf->fb_next, 0, sizeof(fbuf->fb_next));
- tmpaddr = fbuf->fb_dat;
- fbuf->fb_dat = NULL;
- error = uiomove(fbuf, FUSEBUFSIZE, uio);
+
+ error = uiomove(&fbuf->fb_hdr, sizeof(fbuf->fb_hdr), uio);
if (error)
goto end;
+ error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
+ if (error)
+ goto end;
+ if (fbuf->fb_len > 0) {
+ error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
+ if (error)
+ goto end;
+ }
#ifdef FUSE_DEBUG
- fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
+ fuse_dump_buff((char *)fbuf, sizeof(struct fusebuf));
#endif
/* Restore kernel pointers */
memcpy(&fbuf->fb_next, &hdr.fh_next, sizeof(fbuf->fb_next));
- fbuf->fb_dat = tmpaddr;
- /* Remove the fbuf if it does not contains data */
- if (fbuf->fb_len == 0) {
- SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
- stat_fbufs_in--;
- SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
- stat_fbufs_wait++;
- }
+ free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
+ fbuf->fb_dat = NULL;
+
+ /* Move the fbuf to the wait queue */
+ SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_in, fb_next);
+ stat_fbufs_in--;
+ SIMPLEQ_INSERT_TAIL(&fd->fd_fbufs_wait, fbuf, fb_next);
+ stat_fbufs_wait++;
end:
rw_exit_write(&fd->fd_lock);
@@ -466,13 +475,21 @@ fusewrite(dev_t dev, struct uio *uio, in
if (fd == NULL)
return (ENXIO);
- /* We get the whole fusebuf or nothing */
- if (uio->uio_resid != FUSEBUFSIZE)
+ /* Check for sanity - must receive more than just the header */
+ if (uio->uio_resid <= sizeof(hdr))
return (EINVAL);
if ((error = uiomove(&hdr, sizeof(hdr), uio)) != 0)
return (error);
+ /* Check for sanity */
+ if (hdr.fh_len > FUSEBUFMAXSIZE)
+ return (EINVAL);
+
+ /* We get the whole fusebuf or nothing */
+ if (uio->uio_resid != sizeof(fbuf->FD) + hdr.fh_len)
+ return (EINVAL);
+
/* looking for uuid in fd_fbufs_wait */
SIMPLEQ_FOREACH(fbuf, &fd->fd_fbufs_wait, fb_next) {
if (fbuf->fb_uuid == hdr.fh_uuid)
@@ -497,12 +514,24 @@ fusewrite(dev_t dev, struct uio *uio, in
}
/* Get the missing data from the fbuf */
- error = uiomove(&fbuf->FD, uio->uio_resid, uio);
+ error = uiomove(&fbuf->FD, sizeof(fbuf->FD), uio);
if (error)
return error;
+
fbuf->fb_dat = NULL;
+ if (fbuf->fb_len > 0) {
+ fbuf->fb_dat = malloc(fbuf->fb_len, M_FUSEFS,
+ M_WAITOK | M_ZERO);
+ error = uiomove(fbuf->fb_dat, fbuf->fb_len, uio);
+ if (error) {
+ free(fbuf->fb_dat, M_FUSEFS, fbuf->fb_len);
+ fbuf->fb_dat = NULL;
+ return (error);
+ }
+ }
+
#ifdef FUSE_DEBUG
- fuse_dump_buff((char *)fbuf, FUSEBUFSIZE);
+ fuse_dump_buff((char *)fbuf, sizeof(struct fusebuf));
#endif
switch (fbuf->fb_type) {
@@ -514,19 +543,17 @@ fusewrite(dev_t dev, struct uio *uio, in
break ;
}
end:
- /* Remove the fbuf if it does not contains data */
- if (fbuf->fb_len == 0) {
- if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
- SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
- else
- SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
- fb_next);
- stat_fbufs_wait--;
- if (fbuf->fb_type == FBT_INIT)
- fb_delete(fbuf);
- else
- wakeup(fbuf);
- }
+ /* Remove the fbuf from the wait queue */
+ if (fbuf == SIMPLEQ_FIRST(&fd->fd_fbufs_wait))
+ SIMPLEQ_REMOVE_HEAD(&fd->fd_fbufs_wait, fb_next);
+ else
+ SIMPLEQ_REMOVE_AFTER(&fd->fd_fbufs_wait, lastfbuf,
+ fb_next);
+ stat_fbufs_wait--;
+ if (fbuf->fb_type == FBT_INIT)
+ fb_delete(fbuf);
+ else
+ wakeup(fbuf);
return (error);
}
Index: sys/sys/fusebuf.h
===================================================================
RCS file: /cvs/src/sys/sys/fusebuf.h,v
diff -u -p -r1.15 fusebuf.h
--- sys/sys/fusebuf.h 2 Sep 2025 17:18:40 -0000 1.15
+++ sys/sys/fusebuf.h 4 Sep 2025 14:56:44 -0000
@@ -20,9 +20,9 @@
#define _SYS_FUSEBUF_H_
/*
- * Fusebufs are of a single size, 4096 bytes.
+ * Maximum size of the read or write buffer sent from the kernel for VFS
+ * syscalls: read, readdir, readlink, write.
*/
-#define FUSEBUFSIZE (sizeof(struct fusebuf))
#define FUSEBUFMAXSIZE (4096*1024)
/* header at beginning of each fusebuf: */
FUSE - read whole fusebuf v2