Download raw body.
ffs_readdir
On Tue, 07 Apr 2026 09:18:11 +0200,
Helg <helg-openbsd@gmx.de> wrote:
>
> Are ffs and ufs so fundamentally different that ufs_readdir can't
> also benefit from this optimisation?
>
Thanks for challenge it.
I, actually implemented it and with UFS_BUFATOFF() it looks cleaner.
Benchamrks, the base line:
~ $ time find /home/cvs | md5
e9bcc64014befa712a396051d2696703
0m53.46s real 0m01.62s user 0m22.16s system
~ $ time find /home/cvs | md5
e9bcc64014befa712a396051d2696703
0m06.03s real 0m00.65s user 0m05.52s system
~ $ time find /home/cvs | md5
e9bcc64014befa712a396051d2696703
0m06.67s real 0m00.77s user 0m06.16s system
~ $
new ufs_readdir()
~ $ time find /home/cvs | md5
e9bcc64014befa712a396051d2696703
0m44.67s real 0m01.94s user 0m11.99s system
~ $ time find /home/cvs | md5
e9bcc64014befa712a396051d2696703
0m02.29s real 0m00.55s user 0m01.81s system
~ $ time find /home/cvs | md5
e9bcc64014befa712a396051d2696703
0m02.50s real 0m00.58s user 0m01.99s system
~ $
ffs_readdir():
~ $ time find /home/cvs | md5
e9bcc64014befa712a396051d2696703
0m45.20s real 0m02.13s user 0m12.84s system
~ $ time find /home/cvs | md5
e9bcc64014befa712a396051d2696703
0m02.27s real 0m00.47s user 0m01.89s system
~ $ time find /home/cvs | md5
e9bcc64014befa712a396051d2696703
0m02.27s real 0m00.53s user 0m01.85s system
~ $
I think it is clear that ffs_readdir() is a wrong way and the right
direction is ufs_readdir().
Here the diff:
Index: sys/ufs/ufs/ufs_vnops.c
===================================================================
RCS file: /home/cvs/src/sys/ufs/ufs/ufs_vnops.c,v
diff -u -p -r1.164 ufs_vnops.c
--- sys/ufs/ufs/ufs_vnops.c 18 Oct 2024 05:52:33 -0000 1.164
+++ sys/ufs/ufs/ufs_vnops.c 8 Apr 2026 00:04:04 -0000
@@ -1323,18 +1323,22 @@ int
ufs_readdir(void *v)
{
struct vop_readdir_args *ap = v;
- struct uio auio, *uio = ap->a_uio;
- struct iovec aiov;
+ struct uio *uio = ap->a_uio;
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
union {
struct dirent dn;
char __pad[roundup(sizeof(struct dirent), 8)];
} u;
off_t off = uio->uio_offset;
+ off_t bytesinfile;
struct direct *dp;
+ struct buf *bp;
char *edp;
- caddr_t diskbuf;
+ char *dirbuf;
size_t count, entries;
- int bufsize, readcnt, error;
+ int done, error, readcnt, xfersize;
+ long size;
if (uio->uio_rw != UIO_READ)
return (EINVAL);
@@ -1346,75 +1350,102 @@ ufs_readdir(void *v)
if (count <= entries)
return (EINVAL);
- /*
- * Convert and copy back the on-disk struct direct format to
- * the user-space struct dirent format, one entry at a time
- */
-
/* read from disk, stopping on a block boundary, max 64kB */
readcnt = min(count, 64*1024) - entries;
- auio = *uio;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_resid = readcnt;
- auio.uio_segflg = UIO_SYSSPACE;
- aiov.iov_len = readcnt;
- bufsize = readcnt;
- diskbuf = malloc(bufsize, M_TEMP, M_WAITOK);
- aiov.iov_base = diskbuf;
- error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
- readcnt -= auio.uio_resid;
- dp = (struct direct *)diskbuf;
- edp = &diskbuf[readcnt];
+ off = uio->uio_offset;
+ done = 0;
+ error = 0;
+ bp = NULL;
memset(&u, 0, sizeof(u));
- /*
- * While
- * - we haven't failed to VOP_READ or uiomove()
- * - there's space in the read buf for the head of an entry
- * - that entry has a valid d_reclen, and
- * - there's space for the *entire* entry
- * then we're good to process this one.
- */
- while (error == 0 &&
- (char *)dp + offsetof(struct direct, d_name) < edp &&
- dp->d_reclen > offsetof(struct direct, d_name) &&
- (char *)dp + dp->d_reclen <= edp) {
- u.dn.d_reclen = roundup(dp->d_namlen+1 +
- offsetof(struct dirent, d_name), 8);
- if (u.dn.d_reclen > uio->uio_resid)
+ while (error == 0 && done == 0 && readcnt > 0) {
+ bp = NULL;
+ if ((bytesinfile = DIP(ip, size) - off) <= 0)
break;
- off += dp->d_reclen;
- u.dn.d_off = off;
- u.dn.d_fileno = dp->d_ino;
- u.dn.d_type = dp->d_type;
- u.dn.d_namlen = dp->d_namlen;
- memcpy(u.dn.d_name, dp->d_name, u.dn.d_namlen);
- memset(u.dn.d_name + u.dn.d_namlen, 0, u.dn.d_reclen
- - u.dn.d_namlen - offsetof(struct dirent, d_name));
-
- if (memchr(u.dn.d_name, '/', u.dn.d_namlen) != NULL) {
- error = EINVAL;
+ if ((error = UFS_BUFATOFF(ip, off, &dirbuf, &bp)) != 0)
+ break;
+ size = bp->b_bcount - bp->b_resid -
+ (dirbuf - (char *)bp->b_data);
+ if (size <= 0) {
+ brelse(bp);
+ bp = NULL;
break;
}
- error = uiomove(&u.dn, u.dn.d_reclen, uio);
- dp = (struct direct *)((char *)dp + dp->d_reclen);
- }
+ xfersize = readcnt;
+ if (size < xfersize)
+ xfersize = size;
+ if (bytesinfile < xfersize)
+ xfersize = bytesinfile;
+
+ /*
+ * Convert and copy back the on-disk struct direct format to
+ * the user-space struct dirent format, one entry at a time
+ */
+ dp = (struct direct *)dirbuf;
+ edp = dirbuf + xfersize;
+
+ /*
+ * While
+ * - we haven't failed to read or uiomove()
+ * - there's space in the read buf for the head of an entry
+ * - that entry has a valid d_reclen, and
+ * - there's space for the *entire* entry
+ * then we're good to process this one.
+ */
+ while (error == 0 &&
+ (char *)dp + offsetof(struct direct, d_name) < edp &&
+ dp->d_reclen > offsetof(struct direct, d_name) &&
+ (char *)dp + dp->d_reclen <= edp) {
+ u.dn.d_reclen = roundup(dp->d_namlen + 1 +
+ offsetof(struct dirent, d_name), 8);
+ if (u.dn.d_reclen > uio->uio_resid) {
+ done = 1;
+ break;
+ }
+ off += dp->d_reclen;
+ u.dn.d_off = off;
+ u.dn.d_fileno = dp->d_ino;
+ u.dn.d_type = dp->d_type;
+ u.dn.d_namlen = dp->d_namlen;
+ memcpy(u.dn.d_name, dp->d_name, u.dn.d_namlen);
+ memset(u.dn.d_name + u.dn.d_namlen, 0,
+ u.dn.d_reclen - u.dn.d_namlen -
+ offsetof(struct dirent, d_name));
+
+ if (memchr(u.dn.d_name, '/', u.dn.d_namlen) != NULL) {
+ error = EINVAL;
+ break;
+ }
- /*
- * If there was room for an entry in what we read but its
- * d_reclen is bogus, fail
- */
- if ((char *)dp + offsetof(struct direct, d_name) < edp &&
- dp->d_reclen <= offsetof(struct direct, d_name))
- error = EIO;
- free(diskbuf, M_TEMP, bufsize);
+ error = uiomove(&u.dn, u.dn.d_reclen, uio);
+ dp = (struct direct *)((char *)dp + dp->d_reclen);
+ }
+
+ /*
+ * If there was room for an entry in what we read but its
+ * d_reclen is bogus, fail
+ */
+ if (error == 0 &&
+ (char *)dp + offsetof(struct direct, d_name) < edp &&
+ dp->d_reclen <= offsetof(struct direct, d_name))
+ error = EIO;
+
+ brelse(bp);
+ bp = NULL;
+ readcnt -= xfersize;
+ }
+ if (bp != NULL)
+ brelse(bp);
uio->uio_offset = off;
- *ap->a_eofflag = DIP(VTOI(ap->a_vp), size) <= off;
+ *ap->a_eofflag = DIP(ip, size) <= off;
+
+ if (!(vp->v_mount->mnt_flag & MNT_NOATIME) ||
+ (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
+ ip->i_flag |= IN_ACCESS;
return (error);
}
--
wbr, Kirill
ffs_readdir