Download raw body.
rsync: small step towards killing mmap
This is the first small step to kill mmap in rsync.
Using mmap was a mistake since it breaks the atomicity of access.
It is pretty evident with hash_file() where the hash of the file is
generated after blocks were compared and shipped. So a later modification
of the file could result in a bad hash.
This diff breaks up hash_file() into a start, buf / block and finial
function and puts the calls in the right spot to work.
With this regress still passes, more testing and review welcome :)
--
:wq Claudio
Index: blocks.c
===================================================================
RCS file: /cvs/src/usr.bin/rsync/blocks.c,v
diff -u -p -r1.21 blocks.c
--- blocks.c 3 Nov 2021 14:42:12 -0000 1.21
+++ blocks.c 25 Feb 2024 07:30:38 -0000
@@ -280,6 +280,8 @@ blk_match(struct sess *sess, const struc
blk->len, blk->idx);
tok = -(blk->idx + 1);
+ hash_file_buf(&st->ctx, st->map + last, sz + blk->len);
+
/*
* Write the data we have, then follow it with
* the tag of the block that matches.
@@ -293,6 +295,7 @@ blk_match(struct sess *sess, const struc
st->total += blk->len;
st->offs += blk->len;
st->hint = blk->idx + 1;
+
return;
}
@@ -308,12 +311,16 @@ blk_match(struct sess *sess, const struc
st->curlen = st->curpos + sz;
st->curtok = 0;
st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
+
+ hash_file_buf(&st->ctx, st->map + st->curpos, sz);
} else {
st->curpos = 0;
st->curlen = st->mapsz;
st->curtok = 0;
st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK;
st->dirty = st->total = st->mapsz;
+
+ hash_file_buf(&st->ctx, st->map, st->mapsz);
LOG4("%s: flushing whole file %zu B",
path, st->mapsz);
Index: extern.h
===================================================================
RCS file: /cvs/src/usr.bin/rsync/extern.h,v
diff -u -p -r1.47 extern.h
--- extern.h 27 Nov 2023 11:30:49 -0000 1.47
+++ extern.h 24 Feb 2024 07:34:29 -0000
@@ -17,6 +17,8 @@
#ifndef EXTERN_H
#define EXTERN_H
+#include <openssl/md4.h>
+
/*
* This is the rsync protocol version that we support.
*/
@@ -214,6 +216,7 @@ struct blkstat {
struct blktab *blktab; /* hashtable of blocks */
uint32_t s1; /* partial sum for computing fast hash */
uint32_t s2; /* partial sum for computing fast hash */
+ MD4_CTX ctx; /* context for hash_file */
};
/*
@@ -388,8 +391,10 @@ int blk_send_ack(struct sess *, int, s
uint32_t hash_fast(const void *, size_t);
void hash_slow(const void *, size_t, unsigned char *,
const struct sess *);
-void hash_file(const void *, size_t, unsigned char *,
- const struct sess *);
+
+void hash_file_start(MD4_CTX *, const struct sess *);
+void hash_file_buf(MD4_CTX *, const void *, size_t);
+void hash_file_final(MD4_CTX *, unsigned char *);
void copy_file(int, const char *, const struct flist *);
Index: hash.c
===================================================================
RCS file: /cvs/src/usr.bin/rsync/hash.c,v
diff -u -p -r1.4 hash.c
--- hash.c 30 Jun 2021 13:10:04 -0000 1.4
+++ hash.c 23 Feb 2024 11:14:17 -0000
@@ -82,14 +82,21 @@ hash_slow(const void *buf, size_t len,
* of the sequence, not the beginning.
*/
void
-hash_file(const void *buf, size_t len,
- unsigned char *md, const struct sess *sess)
+hash_file_start(MD4_CTX *ctx, const struct sess *sess)
{
- MD4_CTX ctx;
int32_t seed = htole32(sess->seed);
- MD4_Init(&ctx);
- MD4_Update(&ctx, (unsigned char *)&seed, sizeof(int32_t));
- MD4_Update(&ctx, buf, len);
- MD4_Final(md, &ctx);
+ MD4_Init(ctx);
+ MD4_Update(ctx, (unsigned char *)&seed, sizeof(int32_t));
+}
+
+void
+hash_file_buf(MD4_CTX *ctx, const void *buf, size_t len)
+{
+ MD4_Update(ctx, buf, len);
+}
+void
+hash_file_final(MD4_CTX *ctx, unsigned char *md)
+{
+ MD4_Final(md, ctx);
}
Index: sender.c
===================================================================
RCS file: /cvs/src/usr.bin/rsync/sender.c,v
diff -u -p -r1.31 sender.c
--- sender.c 19 Feb 2024 16:39:18 -0000 1.31
+++ sender.c 24 Feb 2024 07:35:10 -0000
@@ -159,7 +159,7 @@ send_up_fsm(struct sess *sess, size_t *p
* finished with the file.
*/
- hash_file(up->stat.map, up->stat.mapsz, fmd, sess);
+ hash_file_final(&up->stat.ctx, fmd);
if (!io_lowbuffer_alloc(sess, wb, wbsz, wbmax, dsz)) {
ERRX1("io_lowbuffer_alloc");
return 0;
@@ -619,6 +619,7 @@ rsync_sender(struct sess *sess, int fdin
/* Hash our blocks. */
+ hash_file_start(&up.stat.ctx, sess);
blkhash_set(up.stat.blktab, up.cur->blks);
/*
rsync: small step towards killing mmap