From: Kirill A. Korinsky Subject: ffs_readdir To: OpenBSD tech Date: Mon, 06 Apr 2026 14:57:21 +0200 tech@, I'd like to share with you a draft of ffs_readdir() Idea is using lower and cheaper internal API to make listing of directories via ffs_readdir() faster / cheaper than ufs_readdir() This code may be wrong or it can contain bugs. Anyway, it boots, and I already use it without any noticeble issue so far. Performance impact? ufs_readdir() ~ $ time find /home/cvs >/dev/null 0m54.19s real 0m01.90s user 0m22.82s system ~ $ time find /home/cvs >/dev/null 0m05.91s real 0m00.44s user 0m05.51s system ~ $ time find /home/cvs >/dev/null 0m05.91s real 0m00.58s user 0m05.30s system ~ $ ffs_readdir() ~ $ time find /home/cvs >/dev/null 0m42.88s real 0m01.84s user 0m11.29s system ~ $ time find /home/cvs >/dev/null 0m02.45s real 0m00.40s user 0m02.07s system ~ $ time find /home/cvs >/dev/null 0m02.34s real 0m00.52s user 0m01.81s system ~ $ Anyone willing to test it? Index: sys/ufs/ffs/ffs_extern.h =================================================================== RCS file: /home/cvs/src/sys/ufs/ffs/ffs_extern.h,v diff -u -p -r1.51 ffs_extern.h --- sys/ufs/ffs/ffs_extern.h 8 Oct 2024 02:58:26 -0000 1.51 +++ sys/ufs/ffs/ffs_extern.h 6 Apr 2026 12:49:45 -0000 @@ -150,6 +150,7 @@ int ffs_sysctl(int *, u_int, void *, siz int ffs_sbupdate(struct ufsmount *, int); /* ffs_vnops.c */ +int ffs_readdir(void *); int ffs_read(void *); int ffs_write(void *); int ffs_fsync(void *); Index: sys/ufs/ffs/ffs_vnops.c =================================================================== RCS file: /home/cvs/src/sys/ufs/ffs/ffs_vnops.c,v diff -u -p -r1.103 ffs_vnops.c --- sys/ufs/ffs/ffs_vnops.c 27 Mar 2025 23:30:54 -0000 1.103 +++ sys/ufs/ffs/ffs_vnops.c 6 Apr 2026 12:49:45 -0000 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -78,7 +79,7 @@ const struct vops ffs_vops = { .vop_mkdir = ufs_mkdir, .vop_rmdir = ufs_rmdir, .vop_symlink = ufs_symlink, - .vop_readdir = ufs_readdir, + .vop_readdir = ffs_readdir, .vop_readlink = ufs_readlink, .vop_abortop = vop_generic_abortop, .vop_inactive = ufs_inactive, @@ -173,6 +174,134 @@ const struct vops ffs_fifovops = { .vop_advlock = fifo_advlock }; #endif /* FIFO */ + +/* + * Vnode op for reading directories. + */ +int +ffs_readdir(void *v) +{ + struct vop_readdir_args *ap = v; + struct uio *uio = ap->a_uio; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct fs *fs = ip->i_fs; + union { + struct dirent dn; + char __pad[roundup(sizeof(struct dirent), 8)]; + } u; + struct direct *dp; + struct buf *bp; + char *edp; + off_t bytesinfile, off; + daddr_t lbn, nextlbn; + size_t count, entries; + int blkoffset, bsize, done, error, readcnt, size, xfersize; + + if (uio->uio_rw != UIO_READ) + return (EINVAL); + + count = uio->uio_resid; + entries = (uio->uio_offset + count) & (DIRBLKSIZ - 1); + + if (count <= entries) + return (EINVAL); + + readcnt = (int)(min(count, (size_t)64 * 1024) - entries); + off = uio->uio_offset; + done = 0; + error = 0; + bp = NULL; + + memset(&u, 0, sizeof(u)); + + while (error == 0 && done == 0 && readcnt > 0) { + bp = NULL; + if ((bytesinfile = DIP(ip, size) - off) <= 0) + break; + lbn = lblkno(fs, off); + nextlbn = lbn + 1; + bsize = blksize(fs, ip, lbn); + blkoffset = blkoff(fs, off); + xfersize = bsize - blkoffset; + if (readcnt < xfersize) + xfersize = readcnt; + if (bytesinfile < xfersize) + xfersize = bytesinfile; + + if (lblktosize(fs, nextlbn) >= DIP(ip, size)) + error = bread(vp, lbn, fs->fs_bsize, &bp); + else if (lbn - 1 == ip->i_ci.ci_lastr || readcnt > xfersize) + error = bread_cluster(vp, lbn, fs->fs_bsize, &bp); + else + error = bread(vp, lbn, fs->fs_bsize, &bp); + if (error) + break; + ip->i_ci.ci_lastr = lbn; + + buf_adjcnt(bp, bsize); + size = bsize - bp->b_resid; + if (size <= blkoffset) { + brelse(bp); + bp = NULL; + break; + } + if (size < blkoffset + xfersize) + xfersize = size - blkoffset; + + dp = (struct direct *)(bp->b_data + blkoffset); + edp = (char *)dp + xfersize; + + while (error == 0 && + (char *)dp + offsetof(struct direct, d_name) < edp && + dp->d_reclen > offsetof(struct direct, d_name) && + (char *)dp + dp->d_reclen <= edp) { + u.dn.d_reclen = roundup(dp->d_namlen + 1 + + offsetof(struct dirent, d_name), 8); + if (u.dn.d_reclen > uio->uio_resid) { + done = 1; + break; + } + off += dp->d_reclen; + u.dn.d_off = off; + u.dn.d_fileno = dp->d_ino; + u.dn.d_type = dp->d_type; + u.dn.d_namlen = dp->d_namlen; + memcpy(u.dn.d_name, dp->d_name, u.dn.d_namlen); + memset(u.dn.d_name + u.dn.d_namlen, 0, + u.dn.d_reclen - u.dn.d_namlen - + offsetof(struct dirent, d_name)); + + if (memchr(u.dn.d_name, '/', u.dn.d_namlen) != NULL) { + error = EINVAL; + break; + } + + error = uiomove(&u.dn, u.dn.d_reclen, uio); + dp = (struct direct *)((char *)dp + dp->d_reclen); + } + + if (error == 0 && + (char *)dp + offsetof(struct direct, d_name) < edp && + dp->d_reclen <= offsetof(struct direct, d_name)) + error = EIO; + + brelse(bp); + bp = NULL; + readcnt -= xfersize; + } + if (bp != NULL) + brelse(bp); + + uio->uio_offset = off; + *ap->a_eofflag = DIP(ip, size) <= off; + + if (!(vp->v_mount->mnt_flag & MNT_NOATIME) || + (ip->i_flag & (IN_CHANGE | IN_UPDATE))) + ip->i_flag |= IN_ACCESS; + + return (error); +} /* * Vnode op for reading. -- wbr, Kirill