Index | Thread | Search

From:
Theo Buehler <tb@theobuehler.org>
Subject:
Re: rsync: small step towards killing mmap
To:
tech@openbsd.org
Date:
Tue, 27 Feb 2024 10:49:46 +0100

Download raw body.

Thread
On Mon, Feb 26, 2024 at 11:45:19AM +0100, Claudio Jeker wrote:
> This is the first small step to kill mmap in rsync.
> Using mmap was a mistake since it breaks the atomicity of access.
> It is pretty evident with hash_file() where the hash of the file is
> generated after blocks were compared and shipped. So a later modification
> of the file could result in a bad hash.
> 
> This diff breaks up hash_file() into a start, buf / block and finial
> function and puts the calls in the right spot to work.
> 
> With this regress still passes, more testing and review welcome :)

The diff reads ok. I'm not much of an rsync user though.

> -- 
> :wq Claudio
> 
> Index: blocks.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/rsync/blocks.c,v
> diff -u -p -r1.21 blocks.c
> --- blocks.c	3 Nov 2021 14:42:12 -0000	1.21
> +++ blocks.c	25 Feb 2024 07:30:38 -0000
> @@ -280,6 +280,8 @@ blk_match(struct sess *sess, const struc
>  			    blk->len, blk->idx);
>  			tok = -(blk->idx + 1);
>  
> +			hash_file_buf(&st->ctx, st->map + last, sz + blk->len);
> +
>  			/*
>  			 * Write the data we have, then follow it with
>  			 * the tag of the block that matches.
> @@ -293,6 +295,7 @@ blk_match(struct sess *sess, const struc
>  			st->total += blk->len;
>  			st->offs += blk->len;
>  			st->hint = blk->idx + 1;
> +
>  			return;
>  		}
>  
> @@ -308,12 +311,16 @@ blk_match(struct sess *sess, const struc
>  		st->curlen = st->curpos + sz;
>  		st->curtok = 0;
>  		st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK;
> +
> +		hash_file_buf(&st->ctx, st->map + st->curpos, sz);
>  	} else {
>  		st->curpos = 0;
>  		st->curlen = st->mapsz;
>  		st->curtok = 0;
>  		st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK;
>  		st->dirty = st->total = st->mapsz;
> +
> +		hash_file_buf(&st->ctx, st->map, st->mapsz);
>  
>  		LOG4("%s: flushing whole file %zu B",
>  		    path, st->mapsz);
> Index: extern.h
> ===================================================================
> RCS file: /cvs/src/usr.bin/rsync/extern.h,v
> diff -u -p -r1.47 extern.h
> --- extern.h	27 Nov 2023 11:30:49 -0000	1.47
> +++ extern.h	24 Feb 2024 07:34:29 -0000
> @@ -17,6 +17,8 @@
>  #ifndef EXTERN_H
>  #define EXTERN_H
>  
> +#include <openssl/md4.h>
> +
>  /*
>   * This is the rsync protocol version that we support.
>   */
> @@ -214,6 +216,7 @@ struct	blkstat {
>  	struct blktab	*blktab; /* hashtable of blocks */
>  	uint32_t	 s1; /* partial sum for computing fast hash */
>  	uint32_t	 s2; /* partial sum for computing fast hash */
> +	MD4_CTX		 ctx; /* context for hash_file */
>  };
>  
>  /*
> @@ -388,8 +391,10 @@ int		 blk_send_ack(struct sess *, int, s
>  uint32_t	 hash_fast(const void *, size_t);
>  void		 hash_slow(const void *, size_t, unsigned char *,
>  		    const struct sess *);
> -void		 hash_file(const void *, size_t, unsigned char *,
> -		    const struct sess *);
> +
> +void		 hash_file_start(MD4_CTX *, const struct sess *);
> +void		 hash_file_buf(MD4_CTX *, const void *, size_t);
> +void		 hash_file_final(MD4_CTX *, unsigned char *);
>  
>  void		 copy_file(int, const char *, const struct flist *);
>  
> Index: hash.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/rsync/hash.c,v
> diff -u -p -r1.4 hash.c
> --- hash.c	30 Jun 2021 13:10:04 -0000	1.4
> +++ hash.c	23 Feb 2024 11:14:17 -0000
> @@ -82,14 +82,21 @@ hash_slow(const void *buf, size_t len,
>   * of the sequence, not the beginning.
>   */
>  void
> -hash_file(const void *buf, size_t len,
> -	unsigned char *md, const struct sess *sess)
> +hash_file_start(MD4_CTX *ctx, const struct sess *sess)
>  {
> -	MD4_CTX		 ctx;
>  	int32_t		 seed = htole32(sess->seed);
>  
> -	MD4_Init(&ctx);
> -	MD4_Update(&ctx, (unsigned char *)&seed, sizeof(int32_t));
> -	MD4_Update(&ctx, buf, len);
> -	MD4_Final(md, &ctx);
> +	MD4_Init(ctx);
> +	MD4_Update(ctx, (unsigned char *)&seed, sizeof(int32_t));
> +}
> +
> +void
> +hash_file_buf(MD4_CTX *ctx, const void *buf, size_t len)
> +{
> +	MD4_Update(ctx, buf, len);
> +}
> +void
> +hash_file_final(MD4_CTX *ctx, unsigned char *md)
> +{
> +	MD4_Final(md, ctx);
>  }
> Index: sender.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/rsync/sender.c,v
> diff -u -p -r1.31 sender.c
> --- sender.c	19 Feb 2024 16:39:18 -0000	1.31
> +++ sender.c	24 Feb 2024 07:35:10 -0000
> @@ -159,7 +159,7 @@ send_up_fsm(struct sess *sess, size_t *p
>  		 * finished with the file.
>  		 */
>  
> -		hash_file(up->stat.map, up->stat.mapsz, fmd, sess);
> +		hash_file_final(&up->stat.ctx, fmd);
>  		if (!io_lowbuffer_alloc(sess, wb, wbsz, wbmax, dsz)) {
>  			ERRX1("io_lowbuffer_alloc");
>  			return 0;
> @@ -619,6 +619,7 @@ rsync_sender(struct sess *sess, int fdin
>  
>  			/* Hash our blocks. */
>  
> +			hash_file_start(&up.stat.ctx, sess);
>  			blkhash_set(up.stat.blktab, up.cur->blks);
>  
>  			/*
>