From: Theo Buehler Subject: Re: rsync: small step towards killing mmap To: tech@openbsd.org Date: Tue, 27 Feb 2024 10:49:46 +0100 On Mon, Feb 26, 2024 at 11:45:19AM +0100, Claudio Jeker wrote: > This is the first small step to kill mmap in rsync. > Using mmap was a mistake since it breaks the atomicity of access. > It is pretty evident with hash_file() where the hash of the file is > generated after blocks were compared and shipped. So a later modification > of the file could result in a bad hash. > > This diff breaks up hash_file() into a start, buf / block and finial > function and puts the calls in the right spot to work. > > With this regress still passes, more testing and review welcome :) The diff reads ok. I'm not much of an rsync user though. > -- > :wq Claudio > > Index: blocks.c > =================================================================== > RCS file: /cvs/src/usr.bin/rsync/blocks.c,v > diff -u -p -r1.21 blocks.c > --- blocks.c 3 Nov 2021 14:42:12 -0000 1.21 > +++ blocks.c 25 Feb 2024 07:30:38 -0000 > @@ -280,6 +280,8 @@ blk_match(struct sess *sess, const struc > blk->len, blk->idx); > tok = -(blk->idx + 1); > > + hash_file_buf(&st->ctx, st->map + last, sz + blk->len); > + > /* > * Write the data we have, then follow it with > * the tag of the block that matches. > @@ -293,6 +295,7 @@ blk_match(struct sess *sess, const struc > st->total += blk->len; > st->offs += blk->len; > st->hint = blk->idx + 1; > + > return; > } > > @@ -308,12 +311,16 @@ blk_match(struct sess *sess, const struc > st->curlen = st->curpos + sz; > st->curtok = 0; > st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK; > + > + hash_file_buf(&st->ctx, st->map + st->curpos, sz); > } else { > st->curpos = 0; > st->curlen = st->mapsz; > st->curtok = 0; > st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK; > st->dirty = st->total = st->mapsz; > + > + hash_file_buf(&st->ctx, st->map, st->mapsz); > > LOG4("%s: flushing whole file %zu B", > path, st->mapsz); > Index: extern.h > =================================================================== > RCS file: /cvs/src/usr.bin/rsync/extern.h,v > diff -u -p -r1.47 extern.h > --- extern.h 27 Nov 2023 11:30:49 -0000 1.47 > +++ extern.h 24 Feb 2024 07:34:29 -0000 > @@ -17,6 +17,8 @@ > #ifndef EXTERN_H > #define EXTERN_H > > +#include > + > /* > * This is the rsync protocol version that we support. > */ > @@ -214,6 +216,7 @@ struct blkstat { > struct blktab *blktab; /* hashtable of blocks */ > uint32_t s1; /* partial sum for computing fast hash */ > uint32_t s2; /* partial sum for computing fast hash */ > + MD4_CTX ctx; /* context for hash_file */ > }; > > /* > @@ -388,8 +391,10 @@ int blk_send_ack(struct sess *, int, s > uint32_t hash_fast(const void *, size_t); > void hash_slow(const void *, size_t, unsigned char *, > const struct sess *); > -void hash_file(const void *, size_t, unsigned char *, > - const struct sess *); > + > +void hash_file_start(MD4_CTX *, const struct sess *); > +void hash_file_buf(MD4_CTX *, const void *, size_t); > +void hash_file_final(MD4_CTX *, unsigned char *); > > void copy_file(int, const char *, const struct flist *); > > Index: hash.c > =================================================================== > RCS file: /cvs/src/usr.bin/rsync/hash.c,v > diff -u -p -r1.4 hash.c > --- hash.c 30 Jun 2021 13:10:04 -0000 1.4 > +++ hash.c 23 Feb 2024 11:14:17 -0000 > @@ -82,14 +82,21 @@ hash_slow(const void *buf, size_t len, > * of the sequence, not the beginning. > */ > void > -hash_file(const void *buf, size_t len, > - unsigned char *md, const struct sess *sess) > +hash_file_start(MD4_CTX *ctx, const struct sess *sess) > { > - MD4_CTX ctx; > int32_t seed = htole32(sess->seed); > > - MD4_Init(&ctx); > - MD4_Update(&ctx, (unsigned char *)&seed, sizeof(int32_t)); > - MD4_Update(&ctx, buf, len); > - MD4_Final(md, &ctx); > + MD4_Init(ctx); > + MD4_Update(ctx, (unsigned char *)&seed, sizeof(int32_t)); > +} > + > +void > +hash_file_buf(MD4_CTX *ctx, const void *buf, size_t len) > +{ > + MD4_Update(ctx, buf, len); > +} > +void > +hash_file_final(MD4_CTX *ctx, unsigned char *md) > +{ > + MD4_Final(md, ctx); > } > Index: sender.c > =================================================================== > RCS file: /cvs/src/usr.bin/rsync/sender.c,v > diff -u -p -r1.31 sender.c > --- sender.c 19 Feb 2024 16:39:18 -0000 1.31 > +++ sender.c 24 Feb 2024 07:35:10 -0000 > @@ -159,7 +159,7 @@ send_up_fsm(struct sess *sess, size_t *p > * finished with the file. > */ > > - hash_file(up->stat.map, up->stat.mapsz, fmd, sess); > + hash_file_final(&up->stat.ctx, fmd); > if (!io_lowbuffer_alloc(sess, wb, wbsz, wbmax, dsz)) { > ERRX1("io_lowbuffer_alloc"); > return 0; > @@ -619,6 +619,7 @@ rsync_sender(struct sess *sess, int fdin > > /* Hash our blocks. */ > > + hash_file_start(&up.stat.ctx, sess); > blkhash_set(up.stat.blktab, up.cur->blks); > > /* >