Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
rsync: small step towards killing mmap
To:
tech@openbsd.org
Date:
Mon, 26 Feb 2024 11:45:19 +0100

Download raw body.

Thread
This is the first small step to kill mmap in rsync.
Using mmap was a mistake since it breaks the atomicity of access.
It is pretty evident with hash_file() where the hash of the file is
generated after blocks were compared and shipped. So a later modification
of the file could result in a bad hash.

This diff breaks up hash_file() into a start, buf / block and finial
function and puts the calls in the right spot to work.

With this regress still passes, more testing and review welcome :)
-- 
:wq Claudio

Index: blocks.c
===================================================================
RCS file: /cvs/src/usr.bin/rsync/blocks.c,v
diff -u -p -r1.21 blocks.c
--- blocks.c	3 Nov 2021 14:42:12 -0000	1.21
+++ blocks.c	25 Feb 2024 07:30:38 -0000
@@ -280,6 +280,8 @@ blk_match(struct sess *sess, const struc
 			    blk->len, blk->idx);
 			tok = -(blk->idx + 1);
 
+			hash_file_buf(&st->ctx, st->map + last, sz + blk->len);
+
 			/*
 			 * Write the data we have, then follow it with
 			 * the tag of the block that matches.
@@ -293,6 +295,7 @@ blk_match(struct sess *sess, const struc
 			st->total += blk->len;
 			st->offs += blk->len;
 			st->hint = blk->idx + 1;
+
 			return;
 		}
 
@@ -308,12 +311,16 @@ blk_match(struct sess *sess, const struc
 		st->curlen = st->curpos + sz;
 		st->curtok = 0;
 		st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
+
+		hash_file_buf(&st->ctx, st->map + st->curpos, sz);
 	} else {
 		st->curpos = 0;
 		st->curlen = st->mapsz;
 		st->curtok = 0;
 		st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK;
 		st->dirty = st->total = st->mapsz;
+
+		hash_file_buf(&st->ctx, st->map, st->mapsz);
 
 		LOG4("%s: flushing whole file %zu B",
 		    path, st->mapsz);
Index: extern.h
===================================================================
RCS file: /cvs/src/usr.bin/rsync/extern.h,v
diff -u -p -r1.47 extern.h
--- extern.h	27 Nov 2023 11:30:49 -0000	1.47
+++ extern.h	24 Feb 2024 07:34:29 -0000
@@ -17,6 +17,8 @@
 #ifndef EXTERN_H
 #define EXTERN_H
 
+#include <openssl/md4.h>
+
 /*
  * This is the rsync protocol version that we support.
  */
@@ -214,6 +216,7 @@ struct	blkstat {
 	struct blktab	*blktab; /* hashtable of blocks */
 	uint32_t	 s1; /* partial sum for computing fast hash */
 	uint32_t	 s2; /* partial sum for computing fast hash */
+	MD4_CTX		 ctx; /* context for hash_file */
 };
 
 /*
@@ -388,8 +391,10 @@ int		 blk_send_ack(struct sess *, int, s
 uint32_t	 hash_fast(const void *, size_t);
 void		 hash_slow(const void *, size_t, unsigned char *,
 		    const struct sess *);
-void		 hash_file(const void *, size_t, unsigned char *,
-		    const struct sess *);
+
+void		 hash_file_start(MD4_CTX *, const struct sess *);
+void		 hash_file_buf(MD4_CTX *, const void *, size_t);
+void		 hash_file_final(MD4_CTX *, unsigned char *);
 
 void		 copy_file(int, const char *, const struct flist *);
 
Index: hash.c
===================================================================
RCS file: /cvs/src/usr.bin/rsync/hash.c,v
diff -u -p -r1.4 hash.c
--- hash.c	30 Jun 2021 13:10:04 -0000	1.4
+++ hash.c	23 Feb 2024 11:14:17 -0000
@@ -82,14 +82,21 @@ hash_slow(const void *buf, size_t len,
  * of the sequence, not the beginning.
  */
 void
-hash_file(const void *buf, size_t len,
-	unsigned char *md, const struct sess *sess)
+hash_file_start(MD4_CTX *ctx, const struct sess *sess)
 {
-	MD4_CTX		 ctx;
 	int32_t		 seed = htole32(sess->seed);
 
-	MD4_Init(&ctx);
-	MD4_Update(&ctx, (unsigned char *)&seed, sizeof(int32_t));
-	MD4_Update(&ctx, buf, len);
-	MD4_Final(md, &ctx);
+	MD4_Init(ctx);
+	MD4_Update(ctx, (unsigned char *)&seed, sizeof(int32_t));
+}
+
+void
+hash_file_buf(MD4_CTX *ctx, const void *buf, size_t len)
+{
+	MD4_Update(ctx, buf, len);
+}
+void
+hash_file_final(MD4_CTX *ctx, unsigned char *md)
+{
+	MD4_Final(md, ctx);
 }
Index: sender.c
===================================================================
RCS file: /cvs/src/usr.bin/rsync/sender.c,v
diff -u -p -r1.31 sender.c
--- sender.c	19 Feb 2024 16:39:18 -0000	1.31
+++ sender.c	24 Feb 2024 07:35:10 -0000
@@ -159,7 +159,7 @@ send_up_fsm(struct sess *sess, size_t *p
 		 * finished with the file.
 		 */
 
-		hash_file(up->stat.map, up->stat.mapsz, fmd, sess);
+		hash_file_final(&up->stat.ctx, fmd);
 		if (!io_lowbuffer_alloc(sess, wb, wbsz, wbmax, dsz)) {
 			ERRX1("io_lowbuffer_alloc");
 			return 0;
@@ -619,6 +619,7 @@ rsync_sender(struct sess *sess, int fdin
 
 			/* Hash our blocks. */
 
+			hash_file_start(&up.stat.ctx, sess);
 			blkhash_set(up.stat.blktab, up.cur->blks);
 
 			/*