Index | Thread | Search

From:
Theo Buehler <tb@theobuehler.org>
Subject:
Re: bgpd: rework extendible hash part of CH hash
To:
tech@openbsd.org
Date:
Wed, 13 May 2026 16:27:40 +0200

Download raw body.

Thread
On Wed, May 13, 2026 at 03:41:38PM +0200, Claudio Jeker wrote:
> I really dislike the double arrays for tables and meta data.
> 
> This adds ch_ext which wrapps the two pointers into a single array.
> This simplifies a fair bunch of functions (esp. ch_table_resize).

That's a big improvement.

> I also changed the initial allocation from 1 entry to 2 and with that the
> magic ch_level == 0 case is gone.
> I resuffled the code as well so that we can return early for the inital
> call. This may not be needed but the for loop scares me a bit.
> The need for the signed index just makes me uncomfy.

Yeah, for good reason... Also much better.

ok tb (two tiny nits below)

> -- 
> :wq Claudio
> 
> Index: chash.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/chash.c,v
> diff -u -p -r1.11 chash.c
> --- chash.c	13 May 2026 08:57:39 -0000	1.11
> +++ chash.c	13 May 2026 13:18:07 -0000
> @@ -80,6 +80,11 @@ struct ch_meta {
>  	uint32_t	cs_local_level;
>  };
>  
> +struct ch_ext {
> +	struct ch_group		*ce_table;
> +	struct ch_meta		*ce_meta;
> +};
> +
>  /*
>   * API to work with the cg_meta field of a ch_group:
>   *   cg_meta_set_flags: Set bits in flags section, return true if not yet set.
> @@ -527,8 +532,7 @@ ch_sub_free(const struct ch_type *type, 
>  static int
>  ch_table_resize(const struct ch_type *type, struct ch_table *t)
>  {
> -	struct ch_group **tables;
> -	struct ch_meta **metas;
> +	struct ch_ext *new;
>  	uint64_t oldsize = 1ULL << t->ch_level;
>  	uint64_t newsize = oldsize * 2;
>  	int64_t idx;
> @@ -538,40 +542,28 @@ ch_table_resize(const struct ch_type *ty
>  		return -1;
>  	}
>  
> -	if (t->ch_tables == NULL) {
> +	if (t->ch_exts == NULL) {
>  		oldsize = 0;
> -		newsize = 1;
> +		newsize = 2;
>  	}
>  
> -	tables = reallocarray(t->ch_tables, newsize, sizeof(*tables));
> -	if (tables == NULL)
> -		return -1;
> -	metas = reallocarray(t->ch_metas, newsize, sizeof(*metas));
> -	if (metas == NULL) {
> -		/*
> -		 * tables was correctly reallocated, so update that
> -		 * pointer before failing hard. If the caller recovers
> -		 * somehow the next reallocarray of ch_tables will simply
> -		 * do nothing.
> -		 */
> -		t->ch_tables = tables;
> +	new = reallocarray(t->ch_exts, newsize, sizeof(*t->ch_exts));
> +	if (new == NULL)
>  		return -1;
> -	}
>  
> -	for (idx = oldsize - 1; idx >= 0; idx--) {
> -		tables[idx * 2] = tables[idx];
> -		tables[idx * 2 + 1] = tables[idx];
> -		metas[idx * 2] = metas[idx];
> -		metas[idx * 2 + 1] = metas[idx];
> -	}
> +	t->ch_level++;
> +	t->ch_exts = new;
>  
> -	if (t->ch_tables != NULL)
> -		t->ch_level++;
> -	t->ch_tables = tables;
> -	t->ch_metas = metas;
> +	t->ch_counts.cc_num_extendible += newsize - oldsize;
> +	type->t_counts->cc_num_extendible += newsize - oldsize;
>  
> -	t->ch_counts.cc_num_extendible += oldsize;
> -	type->t_counts->cc_num_extendible += oldsize;
> +	if (oldsize == 0)
> +		return 0;
> +
> +	for (idx = oldsize - 1; idx >= 0; idx--) {
> +		t->ch_exts[idx * 2] = t->ch_exts[idx];
> +		t->ch_exts[idx * 2 + 1] = t->ch_exts[idx];
> +	}
>  
>  	return 0;
>  }
> @@ -591,8 +583,8 @@ ch_table_fill(struct ch_table *t, uint64
>  	cnt = 1ULL << (t->ch_level - meta->cs_local_level);
>  
>  	for (i = 0; i < cnt; i++) {
> -		t->ch_tables[idx + i] = table;
> -		t->ch_metas[idx + i] = meta;
> +		t->ch_exts[idx + i].ce_table = table;
> +		t->ch_exts[idx + i].ce_meta = meta;
>  	}
>  }
>  
> @@ -600,9 +592,8 @@ ch_table_fill(struct ch_table *t, uint64
>   * Return the buddy sub group for the table with idx and local_level.
>   * The buddy page must have the same local level to be a buddy.
>   */
> -static struct ch_group *
> -ch_table_buddy(struct ch_table *t, uint64_t idx, uint32_t local_level,
> -    struct ch_meta **meta)
> +static struct ch_ext *
> +ch_table_buddy(struct ch_table *t, uint64_t idx, uint32_t local_level)
>  {
>  	struct ch_meta *m;
>  
> @@ -612,11 +603,10 @@ ch_table_buddy(struct ch_table *t, uint6
>  
>  	idx ^= 1ULL << (t->ch_level - local_level);
>  
> -	m = t->ch_metas[idx];
> +	m = t->ch_exts[idx].ce_meta;
>  	/* can only merge buddies at same level */
>  	if (m->cs_local_level == local_level) {
> -		*meta = m;
> -		return t->ch_tables[idx];
> +		return &t->ch_exts[idx];
>  	}

Could drop the braces

>  	return NULL;
>  }
> @@ -680,14 +670,15 @@ static int
>  ch_table_compact(const struct ch_type *type, struct ch_table *t, uint64_t h,
>      struct ch_group *table, struct ch_meta *meta)
>  {
> -	struct ch_group *buddy, *to = NULL;
> -	struct ch_meta *buddymeta, *tometa = NULL;
> +	struct ch_ext *buddy;
> +	struct ch_group *to = NULL;
> +	struct ch_meta *tometa = NULL;
>  	uint64_t idx;
>  
>  	idx = CH_H1(h, t->ch_level);
> -	buddy = ch_table_buddy(t, idx, meta->cs_local_level, &buddymeta);
> +	buddy = ch_table_buddy(t, idx, meta->cs_local_level);
>  	if (buddy == NULL || ch_sub_fillfactor(meta) +
> -	    ch_sub_fillfactor(buddymeta) > CH_MAX_LOAD * 2 / 3)
> +	    ch_sub_fillfactor(buddy->ce_meta) > CH_MAX_LOAD * 2 / 3)
>  		return -1;
>  
>  	/* allocate new sub table */
> @@ -695,18 +686,19 @@ ch_table_compact(const struct ch_type *t
>  		goto fail;
>  
>  	/* merge the table and buddy into to. */
> -	if (ch_sub_merge(type, to, table, buddy, tometa, meta, buddymeta) ==
> -	    -1)
> +	if (ch_sub_merge(type, to, table, buddy->ce_table,
> +	    tometa, meta, buddy->ce_meta) == -1)
>  		goto fail;
>  
>  	/*
>  	 * Update table in the extendible hash table, which overwrites
> -	 * all entries of the buddy.
> +	 * all entries of the table and buddy with new values.
> +	 * Therefor free them first.

Therefore

>  	 */
>  	idx = CH_H1(h, tometa->cs_local_level);
> -	ch_table_fill(t, idx, to, tometa);
> -	ch_sub_free(type, t, buddy, buddymeta);
> +	ch_sub_free(type, t, buddy->ce_table, buddy->ce_meta);
>  	ch_sub_free(type, t, table, meta);
> +	ch_table_fill(t, idx, to, tometa);
>  
>  	return 0;
>  
> @@ -748,14 +740,14 @@ _ch_destroy(const struct ch_type *type, 
>  	struct ch_group *table = NULL;
>  
>  	for (idx = 0; idx < max; idx++) {
> -		if (table == t->ch_tables[idx])
> +		if (table == t->ch_exts[idx].ce_table)
>  			continue;
>  
> -		table = t->ch_tables[idx];
> -		ch_sub_free(type, t, t->ch_tables[idx], t->ch_metas[idx]);
> +		table = t->ch_exts[idx].ce_table;
> +		ch_sub_free(type, t, t->ch_exts[idx].ce_table,
> +		    t->ch_exts[idx].ce_meta);
>  	}
> -	free(t->ch_tables);
> -	free(t->ch_metas);
> +	free(t->ch_exts);
>  	type->t_counts->cc_num_extendible -= max;
>  	memset(t, 0, sizeof(*t));
>  }
> @@ -769,21 +761,21 @@ _ch_insert(const struct ch_type *type, s
>  	void *v;
>  	uint64_t idx;
>  
> -	if (t->ch_tables == NULL)
> +	if (t->ch_exts == NULL)
>  		if (_ch_init(type, t) == -1)
>  			return CH_INS_FAILED;
>  
>  	idx = CH_H1(h, t->ch_level);
> -	table = t->ch_tables[idx];
> -	meta = t->ch_metas[idx];
> +	table = t->ch_exts[idx].ce_table;
> +	meta = t->ch_exts[idx].ce_meta;
>  
>  	if (ch_sub_loadfactor(meta) >= CH_MAX_LOAD) {
>  		if (ch_table_grow(type, t, h, table, meta) == -1)
>  			return CH_INS_FAILED;
>  		/* refetch data after resize */
>  		idx = CH_H1(h, t->ch_level);
> -		table = t->ch_tables[idx];
> -		meta = t->ch_metas[idx];
> +		table = t->ch_exts[idx].ce_table;
> +		meta = t->ch_exts[idx].ce_meta;
>  	}
>  
>  	v = ch_sub_insert(type, table, meta, h, elm);
> @@ -803,12 +795,12 @@ _ch_remove(const struct ch_type *type, s
>  	void *v;
>  	uint64_t idx;
>  
> -	if (t->ch_tables == NULL)
> +	if (t->ch_exts == NULL)
>  		return NULL;
>  
>  	idx = CH_H1(h, t->ch_level);
> -	table = t->ch_tables[idx];
> -	meta = t->ch_metas[idx];
> +	table = t->ch_exts[idx].ce_table;
> +	meta = t->ch_exts[idx].ce_meta;
>  
>  	v = ch_sub_remove(type, table, meta, h, needle);
>  	if (v != NULL) {
> @@ -820,8 +812,8 @@ _ch_remove(const struct ch_type *type, s
>  				break;
>  
>  			/* refetch data after compaction */
> -			table = t->ch_tables[idx];
> -			meta = t->ch_metas[idx];
> +			table = t->ch_exts[idx].ce_table;
> +			meta = t->ch_exts[idx].ce_meta;
>  		}
>  	}
>  	return v;
> @@ -834,11 +826,11 @@ _ch_find(const struct ch_type *type, str
>  	struct ch_group *table;
>  	uint64_t idx;
>  
> -	if (t->ch_tables == NULL)
> +	if (t->ch_exts == NULL)
>  		return NULL;
>  
>  	idx = CH_H1(h, t->ch_level);
> -	table = t->ch_tables[idx];
> +	table = t->ch_exts[idx].ce_table;
>  
>  	return ch_sub_find(type, table, h, needle);
>  }
> @@ -850,11 +842,11 @@ _ch_locate(const struct ch_type *type, s
>  	struct ch_group *table;
>  	uint64_t idx;
>  
> -	if (t->ch_tables == NULL)
> +	if (t->ch_exts == NULL)
>  		return NULL;
>  
>  	idx = CH_H1(h, t->ch_level);
> -	table = t->ch_tables[idx];
> +	table = t->ch_exts[idx].ce_table;
>  
>  	return ch_sub_locate(type, table, h, cmp, arg);
>  }
> @@ -865,11 +857,11 @@ _ch_first(const struct ch_type *type, st
>  	struct ch_group *table;
>  	uint64_t idx;
>  
> -	if (t->ch_tables == NULL)
> +	if (t->ch_exts == NULL)
>  		return NULL;
>  
>  	idx = it->ci_ext_idx = 0;
> -	table = t->ch_tables[idx];
> +	table = t->ch_exts[idx].ce_table;
>  
>  	return ch_sub_first(type, table, it);
>  }
> @@ -881,7 +873,7 @@ _ch_next(const struct ch_type *type, str
>  	uint64_t idx, max;
>  	void *v;
>  
> -	if (t->ch_tables == NULL)
> +	if (t->ch_exts == NULL)
>  		return NULL;
>  
>  	max = 1ULL << t->ch_level;
> @@ -889,21 +881,21 @@ _ch_next(const struct ch_type *type, str
>  	if (idx >= max)
>  		return NULL;
>  
> -	table = t->ch_tables[idx];
> +	table = t->ch_exts[idx].ce_table;
>  	v = ch_sub_next(type, table, it);
>  	if (v != NULL)
>  		return v;
>  
>  	/* find next sub table */
>  	for (idx++; idx < max; idx++) {
> -		if (table != t->ch_tables[idx])
> +		if (table != t->ch_exts[idx].ce_table)
>  			break;
>  	}
>  	if (idx >= max)
>  		return NULL;
>  	/* start next sub table */
>  	it->ci_ext_idx = idx;
> -	table = t->ch_tables[idx];
> +	table = t->ch_exts[idx].ce_table;
>  	return ch_sub_first(type, table, it);
>  }
>  
> @@ -917,7 +909,7 @@ _ch_get_stats(struct ch_stats *stats, co
>  	stats->cs_size_tables = counts->cc_num_tables *
>  	    (CH_H2_SIZE * sizeof(struct ch_group) + sizeof(struct ch_meta));
>  	stats->cs_size_extendible = stats->cs_num_extendible *
> -	    (sizeof(struct ch_group *) + sizeof(struct ch_meta *));
> +	    sizeof(struct ch_ext);
>  }
>  
>  /*
> Index: chash.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/bgpd/chash.h,v
> diff -u -p -r1.6 chash.h
> --- chash.h	17 Mar 2026 09:29:29 -0000	1.6
> +++ chash.h	13 May 2026 09:10:16 -0000
> @@ -48,14 +48,12 @@ struct ch_type {
>  	struct ch_counts *t_counts;
>  };
>  
> -struct ch_group;
> -struct ch_meta;
> +struct ch_ext;
>  
>  struct ch_table {
> -	struct ch_group		**ch_tables;
> -	struct ch_meta		**ch_metas;
> -	uint32_t		  ch_level;
> -	struct ch_counts	  ch_counts;
> +	struct ch_ext		*ch_exts;
> +	uint32_t		 ch_level;
> +	struct ch_counts	 ch_counts;
>  };
>  
>  
>