From: Kirill A. Korinsky Subject: Re: ffs_readdir To: tech@openbsd.org Date: Sat, 16 May 2026 21:27:44 +0200 On Thu, 07 May 2026 18:25:32 +0200, Kirill A. Korinsky wrote: > > On Wed, 08 Apr 2026 02:37:27 +0200, > "Kirill A. Korinsky" wrote: > > > > On Tue, 07 Apr 2026 09:18:11 +0200, > > Helg wrote: > > > > > > Are ffs and ufs so fundamentally different that ufs_readdir can't > > > also benefit from this optimisation? > > > > > > > Thanks for challenge it. > > > > I, actually implemented it and with UFS_BUFATOFF() it looks cleaner. > > > > Benchamrks, the base line: > > > > ~ $ time find /home/cvs | md5 > > e9bcc64014befa712a396051d2696703 > > 0m53.46s real 0m01.62s user 0m22.16s system > > ~ $ time find /home/cvs | md5 > > e9bcc64014befa712a396051d2696703 > > 0m06.03s real 0m00.65s user 0m05.52s system > > ~ $ time find /home/cvs | md5 > > e9bcc64014befa712a396051d2696703 > > 0m06.67s real 0m00.77s user 0m06.16s system > > ~ $ > > > > new ufs_readdir() > > > > ~ $ time find /home/cvs | md5 > > e9bcc64014befa712a396051d2696703 > > 0m44.67s real 0m01.94s user 0m11.99s system > > ~ $ time find /home/cvs | md5 > > e9bcc64014befa712a396051d2696703 > > 0m02.29s real 0m00.55s user 0m01.81s system > > ~ $ time find /home/cvs | md5 > > e9bcc64014befa712a396051d2696703 > > 0m02.50s real 0m00.58s user 0m01.99s system > > ~ $ > > > > ffs_readdir(): > > > > ~ $ time find /home/cvs | md5 > > e9bcc64014befa712a396051d2696703 > > 0m45.20s real 0m02.13s user 0m12.84s system > > ~ $ time find /home/cvs | md5 > > e9bcc64014befa712a396051d2696703 > > 0m02.27s real 0m00.47s user 0m01.89s system > > ~ $ time find /home/cvs | md5 > > e9bcc64014befa712a396051d2696703 > > 0m02.27s real 0m00.53s user 0m01.85s system > > ~ $ > > > > I think it is clear that ffs_readdir() is a wrong way and the right > > direction is ufs_readdir(). > > > > I use that diff on few machines for last month. No regression observed. > > I think it worth to go in in that early stage of development that buys > plenty of time to backout if it brokes something. > > Thouhgs? Ok? > Anyone? Index: sys/ufs/ufs/ufs_vnops.c =================================================================== RCS file: /home/cvs/src/sys/ufs/ufs/ufs_vnops.c,v diff -u -p -r1.164 ufs_vnops.c --- sys/ufs/ufs/ufs_vnops.c 18 Oct 2024 05:52:33 -0000 1.164 +++ sys/ufs/ufs/ufs_vnops.c 8 Apr 2026 00:04:04 -0000 @@ -1323,18 +1323,22 @@ int ufs_readdir(void *v) { struct vop_readdir_args *ap = v; - struct uio auio, *uio = ap->a_uio; - struct iovec aiov; + struct uio *uio = ap->a_uio; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); union { struct dirent dn; char __pad[roundup(sizeof(struct dirent), 8)]; } u; off_t off = uio->uio_offset; + off_t bytesinfile; struct direct *dp; + struct buf *bp; char *edp; - caddr_t diskbuf; + char *dirbuf; size_t count, entries; - int bufsize, readcnt, error; + int done, error, readcnt, xfersize; + long size; if (uio->uio_rw != UIO_READ) return (EINVAL); @@ -1346,75 +1350,102 @@ ufs_readdir(void *v) if (count <= entries) return (EINVAL); - /* - * Convert and copy back the on-disk struct direct format to - * the user-space struct dirent format, one entry at a time - */ - /* read from disk, stopping on a block boundary, max 64kB */ readcnt = min(count, 64*1024) - entries; - auio = *uio; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_resid = readcnt; - auio.uio_segflg = UIO_SYSSPACE; - aiov.iov_len = readcnt; - bufsize = readcnt; - diskbuf = malloc(bufsize, M_TEMP, M_WAITOK); - aiov.iov_base = diskbuf; - error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); - readcnt -= auio.uio_resid; - dp = (struct direct *)diskbuf; - edp = &diskbuf[readcnt]; + off = uio->uio_offset; + done = 0; + error = 0; + bp = NULL; memset(&u, 0, sizeof(u)); - /* - * While - * - we haven't failed to VOP_READ or uiomove() - * - there's space in the read buf for the head of an entry - * - that entry has a valid d_reclen, and - * - there's space for the *entire* entry - * then we're good to process this one. - */ - while (error == 0 && - (char *)dp + offsetof(struct direct, d_name) < edp && - dp->d_reclen > offsetof(struct direct, d_name) && - (char *)dp + dp->d_reclen <= edp) { - u.dn.d_reclen = roundup(dp->d_namlen+1 + - offsetof(struct dirent, d_name), 8); - if (u.dn.d_reclen > uio->uio_resid) + while (error == 0 && done == 0 && readcnt > 0) { + bp = NULL; + if ((bytesinfile = DIP(ip, size) - off) <= 0) break; - off += dp->d_reclen; - u.dn.d_off = off; - u.dn.d_fileno = dp->d_ino; - u.dn.d_type = dp->d_type; - u.dn.d_namlen = dp->d_namlen; - memcpy(u.dn.d_name, dp->d_name, u.dn.d_namlen); - memset(u.dn.d_name + u.dn.d_namlen, 0, u.dn.d_reclen - - u.dn.d_namlen - offsetof(struct dirent, d_name)); - - if (memchr(u.dn.d_name, '/', u.dn.d_namlen) != NULL) { - error = EINVAL; + if ((error = UFS_BUFATOFF(ip, off, &dirbuf, &bp)) != 0) + break; + size = bp->b_bcount - bp->b_resid - + (dirbuf - (char *)bp->b_data); + if (size <= 0) { + brelse(bp); + bp = NULL; break; } - error = uiomove(&u.dn, u.dn.d_reclen, uio); - dp = (struct direct *)((char *)dp + dp->d_reclen); - } + xfersize = readcnt; + if (size < xfersize) + xfersize = size; + if (bytesinfile < xfersize) + xfersize = bytesinfile; + + /* + * Convert and copy back the on-disk struct direct format to + * the user-space struct dirent format, one entry at a time + */ + dp = (struct direct *)dirbuf; + edp = dirbuf + xfersize; + + /* + * While + * - we haven't failed to read or uiomove() + * - there's space in the read buf for the head of an entry + * - that entry has a valid d_reclen, and + * - there's space for the *entire* entry + * then we're good to process this one. + */ + while (error == 0 && + (char *)dp + offsetof(struct direct, d_name) < edp && + dp->d_reclen > offsetof(struct direct, d_name) && + (char *)dp + dp->d_reclen <= edp) { + u.dn.d_reclen = roundup(dp->d_namlen + 1 + + offsetof(struct dirent, d_name), 8); + if (u.dn.d_reclen > uio->uio_resid) { + done = 1; + break; + } + off += dp->d_reclen; + u.dn.d_off = off; + u.dn.d_fileno = dp->d_ino; + u.dn.d_type = dp->d_type; + u.dn.d_namlen = dp->d_namlen; + memcpy(u.dn.d_name, dp->d_name, u.dn.d_namlen); + memset(u.dn.d_name + u.dn.d_namlen, 0, + u.dn.d_reclen - u.dn.d_namlen - + offsetof(struct dirent, d_name)); + + if (memchr(u.dn.d_name, '/', u.dn.d_namlen) != NULL) { + error = EINVAL; + break; + } - /* - * If there was room for an entry in what we read but its - * d_reclen is bogus, fail - */ - if ((char *)dp + offsetof(struct direct, d_name) < edp && - dp->d_reclen <= offsetof(struct direct, d_name)) - error = EIO; - free(diskbuf, M_TEMP, bufsize); + error = uiomove(&u.dn, u.dn.d_reclen, uio); + dp = (struct direct *)((char *)dp + dp->d_reclen); + } + + /* + * If there was room for an entry in what we read but its + * d_reclen is bogus, fail + */ + if (error == 0 && + (char *)dp + offsetof(struct direct, d_name) < edp && + dp->d_reclen <= offsetof(struct direct, d_name)) + error = EIO; + + brelse(bp); + bp = NULL; + readcnt -= xfersize; + } + if (bp != NULL) + brelse(bp); uio->uio_offset = off; - *ap->a_eofflag = DIP(VTOI(ap->a_vp), size) <= off; + *ap->a_eofflag = DIP(ip, size) <= off; + + if (!(vp->v_mount->mnt_flag & MNT_NOATIME) || + (ip->i_flag & (IN_CHANGE | IN_UPDATE))) + ip->i_flag |= IN_ACCESS; return (error); } -- wbr, Kirill