Download raw body.
rsync: small step towards killing mmap
On Mon, Feb 26, 2024 at 11:45:19AM +0100, Claudio Jeker wrote:
> This is the first small step to kill mmap in rsync.
> Using mmap was a mistake since it breaks the atomicity of access.
> It is pretty evident with hash_file() where the hash of the file is
> generated after blocks were compared and shipped. So a later modification
> of the file could result in a bad hash.
>
> This diff breaks up hash_file() into a start, buf / block and finial
> function and puts the calls in the right spot to work.
>
> With this regress still passes, more testing and review welcome :)
The diff reads ok. I'm not much of an rsync user though.
> --
> :wq Claudio
>
> Index: blocks.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/rsync/blocks.c,v
> diff -u -p -r1.21 blocks.c
> --- blocks.c 3 Nov 2021 14:42:12 -0000 1.21
> +++ blocks.c 25 Feb 2024 07:30:38 -0000
> @@ -280,6 +280,8 @@ blk_match(struct sess *sess, const struc
> blk->len, blk->idx);
> tok = -(blk->idx + 1);
>
> + hash_file_buf(&st->ctx, st->map + last, sz + blk->len);
> +
> /*
> * Write the data we have, then follow it with
> * the tag of the block that matches.
> @@ -293,6 +295,7 @@ blk_match(struct sess *sess, const struc
> st->total += blk->len;
> st->offs += blk->len;
> st->hint = blk->idx + 1;
> +
> return;
> }
>
> @@ -308,12 +311,16 @@ blk_match(struct sess *sess, const struc
> st->curlen = st->curpos + sz;
> st->curtok = 0;
> st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
> +
> + hash_file_buf(&st->ctx, st->map + st->curpos, sz);
> } else {
> st->curpos = 0;
> st->curlen = st->mapsz;
> st->curtok = 0;
> st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK;
> st->dirty = st->total = st->mapsz;
> +
> + hash_file_buf(&st->ctx, st->map, st->mapsz);
>
> LOG4("%s: flushing whole file %zu B",
> path, st->mapsz);
> Index: extern.h
> ===================================================================
> RCS file: /cvs/src/usr.bin/rsync/extern.h,v
> diff -u -p -r1.47 extern.h
> --- extern.h 27 Nov 2023 11:30:49 -0000 1.47
> +++ extern.h 24 Feb 2024 07:34:29 -0000
> @@ -17,6 +17,8 @@
> #ifndef EXTERN_H
> #define EXTERN_H
>
> +#include <openssl/md4.h>
> +
> /*
> * This is the rsync protocol version that we support.
> */
> @@ -214,6 +216,7 @@ struct blkstat {
> struct blktab *blktab; /* hashtable of blocks */
> uint32_t s1; /* partial sum for computing fast hash */
> uint32_t s2; /* partial sum for computing fast hash */
> + MD4_CTX ctx; /* context for hash_file */
> };
>
> /*
> @@ -388,8 +391,10 @@ int blk_send_ack(struct sess *, int, s
> uint32_t hash_fast(const void *, size_t);
> void hash_slow(const void *, size_t, unsigned char *,
> const struct sess *);
> -void hash_file(const void *, size_t, unsigned char *,
> - const struct sess *);
> +
> +void hash_file_start(MD4_CTX *, const struct sess *);
> +void hash_file_buf(MD4_CTX *, const void *, size_t);
> +void hash_file_final(MD4_CTX *, unsigned char *);
>
> void copy_file(int, const char *, const struct flist *);
>
> Index: hash.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/rsync/hash.c,v
> diff -u -p -r1.4 hash.c
> --- hash.c 30 Jun 2021 13:10:04 -0000 1.4
> +++ hash.c 23 Feb 2024 11:14:17 -0000
> @@ -82,14 +82,21 @@ hash_slow(const void *buf, size_t len,
> * of the sequence, not the beginning.
> */
> void
> -hash_file(const void *buf, size_t len,
> - unsigned char *md, const struct sess *sess)
> +hash_file_start(MD4_CTX *ctx, const struct sess *sess)
> {
> - MD4_CTX ctx;
> int32_t seed = htole32(sess->seed);
>
> - MD4_Init(&ctx);
> - MD4_Update(&ctx, (unsigned char *)&seed, sizeof(int32_t));
> - MD4_Update(&ctx, buf, len);
> - MD4_Final(md, &ctx);
> + MD4_Init(ctx);
> + MD4_Update(ctx, (unsigned char *)&seed, sizeof(int32_t));
> +}
> +
> +void
> +hash_file_buf(MD4_CTX *ctx, const void *buf, size_t len)
> +{
> + MD4_Update(ctx, buf, len);
> +}
> +void
> +hash_file_final(MD4_CTX *ctx, unsigned char *md)
> +{
> + MD4_Final(md, ctx);
> }
> Index: sender.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/rsync/sender.c,v
> diff -u -p -r1.31 sender.c
> --- sender.c 19 Feb 2024 16:39:18 -0000 1.31
> +++ sender.c 24 Feb 2024 07:35:10 -0000
> @@ -159,7 +159,7 @@ send_up_fsm(struct sess *sess, size_t *p
> * finished with the file.
> */
>
> - hash_file(up->stat.map, up->stat.mapsz, fmd, sess);
> + hash_file_final(&up->stat.ctx, fmd);
> if (!io_lowbuffer_alloc(sess, wb, wbsz, wbmax, dsz)) {
> ERRX1("io_lowbuffer_alloc");
> return 0;
> @@ -619,6 +619,7 @@ rsync_sender(struct sess *sess, int fdin
>
> /* Hash our blocks. */
>
> + hash_file_start(&up.stat.ctx, sess);
> blkhash_set(up.stat.blktab, up.cur->blks);
>
> /*
>
rsync: small step towards killing mmap