Index | Thread | Search

From:
Kirill A. Korinsky <kirill@korins.ky>
Subject:
ffs_readdir
To:
OpenBSD tech <tech@openbsd.org>
Date:
Mon, 06 Apr 2026 14:57:21 +0200

Download raw body.

Thread
  • Kirill A. Korinsky:

    ffs_readdir

tech@,

I'd like to share with you a draft of ffs_readdir()

Idea is using lower and cheaper internal API to make listing of
directories via ffs_readdir() faster / cheaper than ufs_readdir()

This code may be wrong or it can contain bugs. Anyway, it boots, and I
already use it without any noticeble issue so far.

Performance impact?

ufs_readdir()

~ $ time find /home/cvs >/dev/null
    0m54.19s real     0m01.90s user     0m22.82s system
~ $ time find /home/cvs >/dev/null
    0m05.91s real     0m00.44s user     0m05.51s system
~ $ time find /home/cvs >/dev/null             
    0m05.91s real     0m00.58s user     0m05.30s system
~ $ 

ffs_readdir()

~ $ time find /home/cvs >/dev/null 
    0m42.88s real     0m01.84s user     0m11.29s system
~ $ time find /home/cvs >/dev/null 
    0m02.45s real     0m00.40s user     0m02.07s system
~ $ time find /home/cvs >/dev/null 
    0m02.34s real     0m00.52s user     0m01.81s system
~ $ 

Anyone willing to test it?


Index: sys/ufs/ffs/ffs_extern.h
===================================================================
RCS file: /home/cvs/src/sys/ufs/ffs/ffs_extern.h,v
diff -u -p -r1.51 ffs_extern.h
--- sys/ufs/ffs/ffs_extern.h	8 Oct 2024 02:58:26 -0000	1.51
+++ sys/ufs/ffs/ffs_extern.h	6 Apr 2026 12:49:45 -0000
@@ -150,6 +150,7 @@ int ffs_sysctl(int *, u_int, void *, siz
 int ffs_sbupdate(struct ufsmount *, int);
 
 /* ffs_vnops.c */
+int ffs_readdir(void *);
 int ffs_read(void *);
 int ffs_write(void *);
 int ffs_fsync(void *);
Index: sys/ufs/ffs/ffs_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/ufs/ffs/ffs_vnops.c,v
diff -u -p -r1.103 ffs_vnops.c
--- sys/ufs/ffs/ffs_vnops.c	27 Mar 2025 23:30:54 -0000	1.103
+++ sys/ufs/ffs/ffs_vnops.c	6 Apr 2026 12:49:45 -0000
@@ -38,6 +38,7 @@
 #include <sys/kernel.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
+#include <sys/dirent.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
@@ -78,7 +79,7 @@ const struct vops ffs_vops = {
 	.vop_mkdir	= ufs_mkdir,
 	.vop_rmdir	= ufs_rmdir,
 	.vop_symlink	= ufs_symlink,
-	.vop_readdir	= ufs_readdir,
+	.vop_readdir	= ffs_readdir,
 	.vop_readlink	= ufs_readlink,
 	.vop_abortop	= vop_generic_abortop,
 	.vop_inactive	= ufs_inactive,
@@ -173,6 +174,134 @@ const struct vops ffs_fifovops = {
 	.vop_advlock	= fifo_advlock
 };
 #endif /* FIFO */
+
+/*
+ * Vnode op for reading directories.
+ */
+int
+ffs_readdir(void *v)
+{
+	struct vop_readdir_args *ap = v;
+	struct uio *uio = ap->a_uio;
+	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
+	struct fs *fs = ip->i_fs;
+	union {
+		struct dirent dn;
+		char __pad[roundup(sizeof(struct dirent), 8)];
+	} u;
+	struct direct *dp;
+	struct buf *bp;
+	char *edp;
+	off_t bytesinfile, off;
+	daddr_t lbn, nextlbn;
+	size_t count, entries;
+	int blkoffset, bsize, done, error, readcnt, size, xfersize;
+
+	if (uio->uio_rw != UIO_READ)
+		return (EINVAL);
+
+	count = uio->uio_resid;
+	entries = (uio->uio_offset + count) & (DIRBLKSIZ - 1);
+
+	if (count <= entries)
+		return (EINVAL);
+
+	readcnt = (int)(min(count, (size_t)64 * 1024) - entries);
+	off = uio->uio_offset;
+	done = 0;
+	error = 0;
+	bp = NULL;
+
+	memset(&u, 0, sizeof(u));
+
+	while (error == 0 && done == 0 && readcnt > 0) {
+		bp = NULL;
+		if ((bytesinfile = DIP(ip, size) - off) <= 0)
+			break;
+		lbn = lblkno(fs, off);
+		nextlbn = lbn + 1;
+		bsize = blksize(fs, ip, lbn);
+		blkoffset = blkoff(fs, off);
+		xfersize = bsize - blkoffset;
+		if (readcnt < xfersize)
+			xfersize = readcnt;
+		if (bytesinfile < xfersize)
+			xfersize = bytesinfile;
+
+		if (lblktosize(fs, nextlbn) >= DIP(ip, size))
+			error = bread(vp, lbn, fs->fs_bsize, &bp);
+		else if (lbn - 1 == ip->i_ci.ci_lastr || readcnt > xfersize)
+			error = bread_cluster(vp, lbn, fs->fs_bsize, &bp);
+		else
+			error = bread(vp, lbn, fs->fs_bsize, &bp);
+		if (error)
+			break;
+		ip->i_ci.ci_lastr = lbn;
+
+		buf_adjcnt(bp, bsize);
+		size = bsize - bp->b_resid;
+		if (size <= blkoffset) {
+			brelse(bp);
+			bp = NULL;
+			break;
+		}
+		if (size < blkoffset + xfersize)
+			xfersize = size - blkoffset;
+
+		dp = (struct direct *)(bp->b_data + blkoffset);
+		edp = (char *)dp + xfersize;
+
+		while (error == 0 &&
+		    (char *)dp + offsetof(struct direct, d_name) < edp &&
+		    dp->d_reclen > offsetof(struct direct, d_name) &&
+		    (char *)dp + dp->d_reclen <= edp) {
+			u.dn.d_reclen = roundup(dp->d_namlen + 1 +
+			    offsetof(struct dirent, d_name), 8);
+			if (u.dn.d_reclen > uio->uio_resid) {
+				done = 1;
+				break;
+			}
+			off += dp->d_reclen;
+			u.dn.d_off = off;
+			u.dn.d_fileno = dp->d_ino;
+			u.dn.d_type = dp->d_type;
+			u.dn.d_namlen = dp->d_namlen;
+			memcpy(u.dn.d_name, dp->d_name, u.dn.d_namlen);
+			memset(u.dn.d_name + u.dn.d_namlen, 0,
+			    u.dn.d_reclen - u.dn.d_namlen -
+			    offsetof(struct dirent, d_name));
+
+			if (memchr(u.dn.d_name, '/', u.dn.d_namlen) != NULL) {
+				error = EINVAL;
+				break;
+			}
+
+			error = uiomove(&u.dn, u.dn.d_reclen, uio);
+			dp = (struct direct *)((char *)dp + dp->d_reclen);
+		}
+
+		if (error == 0 &&
+		    (char *)dp + offsetof(struct direct, d_name) < edp &&
+		    dp->d_reclen <= offsetof(struct direct, d_name))
+			error = EIO;
+
+		brelse(bp);
+		bp = NULL;
+		readcnt -= xfersize;
+	}
+	if (bp != NULL)
+		brelse(bp);
+
+	uio->uio_offset = off;
+	*ap->a_eofflag = DIP(ip, size) <= off;
+
+	if (!(vp->v_mount->mnt_flag & MNT_NOATIME) ||
+	    (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
+		ip->i_flag |= IN_ACCESS;
+
+	return (error);
+}
 
 /*
  * Vnode op for reading.


-- 
wbr, Kirill