From: Claudio Jeker Subject: rsync: refactor blk_match() as a step to kill mmap To: tech@openbsd.org Date: Tue, 27 Feb 2024 16:39:28 +0100 Next step to kill mmap in rsync. Refactor blk_match() and collaps the empty file or no blocks case with the emit remaining data and send terminator token. In the empty file case st->offs is 0 so last is 0 and if end is 0 then the for loop is skipped and and so the emit remaining data block is doing the same work. -- :wq Claudio Index: blocks.c =================================================================== RCS file: /cvs/src/usr.bin/rsync/blocks.c,v diff -u -p -r1.22 blocks.c --- blocks.c 27 Feb 2024 11:28:30 -0000 1.22 +++ blocks.c 27 Feb 2024 13:55:21 -0000 @@ -243,7 +243,7 @@ void blk_match(struct sess *sess, const struct blkset *blks, const char *path, struct blkstat *st) { - off_t last, end, sz; + off_t last, end = 0, sz; int32_t tok; size_t i; const struct blk *blk; @@ -265,66 +265,55 @@ blk_match(struct sess *sess, const struc */ end = st->mapsz + 1 - blks->blks[blks->blksz - 1].len; - last = st->offs; + } - for (i = 0; st->offs < end; st->offs++, i++) { - blk = blk_find(sess, st, blks, path, i == 0); - if (blk == NULL) - continue; - - sz = st->offs - last; - st->dirty += sz; - st->total += sz; - LOG4("%s: flushing %jd B before %zu B block %zu", - path, (intmax_t)sz, - blk->len, blk->idx); - tok = -(blk->idx + 1); - - hash_file_buf(&st->ctx, st->map + last, sz + blk->len); - - /* - * Write the data we have, then follow it with - * the tag of the block that matches. - */ - - st->curpos = last; - st->curlen = st->curpos + sz; - st->curtok = tok; - assert(st->curtok != 0); - st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK; - st->total += blk->len; - st->offs += blk->len; - st->hint = blk->idx + 1; - - return; - } - - /* Emit remaining data and send terminator token. */ - - sz = st->mapsz - last; - LOG4("%s: flushing remaining %jd B", - path, (intmax_t)sz); + last = st->offs; + for (i = 0; st->offs < end; st->offs++, i++) { + blk = blk_find(sess, st, blks, path, i == 0); + if (blk == NULL) + continue; - st->total += sz; + sz = st->offs - last; st->dirty += sz; + st->total += sz; + LOG4("%s: flushing %jd B before %zu B block %zu", + path, (intmax_t)sz, + blk->len, blk->idx); + tok = -(blk->idx + 1); + + hash_file_buf(&st->ctx, st->map + last, sz + blk->len); + + /* + * Write the data we have, then follow it with + * the tag of the block that matches. + */ + st->curpos = last; st->curlen = st->curpos + sz; - st->curtok = 0; + st->curtok = tok; + assert(st->curtok != 0); st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK; + st->total += blk->len; + st->offs += blk->len; + st->hint = blk->idx + 1; - hash_file_buf(&st->ctx, st->map + st->curpos, sz); - } else { - st->curpos = 0; - st->curlen = st->mapsz; - st->curtok = 0; - st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK; - st->dirty = st->total = st->mapsz; + return; + } - hash_file_buf(&st->ctx, st->map, st->mapsz); + /* Emit remaining data and send terminator token. */ - LOG4("%s: flushing whole file %zu B", - path, st->mapsz); - } + sz = st->mapsz - last; + LOG4("%s: flushing %s %jd B", path, + last == 0 ? "whole" : "remaining", (intmax_t)sz); + + hash_file_buf(&st->ctx, st->map + last, sz); + + st->total += sz; + st->dirty += sz; + st->curpos = last; + st->curlen = st->curpos + sz; + st->curtok = 0; + st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK; } /*