Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
bgpd: rework extendible hash part of CH hash
To:
tech@openbsd.org
Date:
Wed, 13 May 2026 15:41:38 +0200

Download raw body.

Thread
I really dislike the double arrays for tables and meta data.

This adds ch_ext which wrapps the two pointers into a single array.
This simplifies a fair bunch of functions (esp. ch_table_resize).

I also changed the initial allocation from 1 entry to 2 and with that the
magic ch_level == 0 case is gone.
I resuffled the code as well so that we can return early for the inital
call. This may not be needed but the for loop scares me a bit.
The need for the signed index just makes me uncomfy.

-- 
:wq Claudio

Index: chash.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/chash.c,v
diff -u -p -r1.11 chash.c
--- chash.c	13 May 2026 08:57:39 -0000	1.11
+++ chash.c	13 May 2026 13:18:07 -0000
@@ -80,6 +80,11 @@ struct ch_meta {
 	uint32_t	cs_local_level;
 };
 
+struct ch_ext {
+	struct ch_group		*ce_table;
+	struct ch_meta		*ce_meta;
+};
+
 /*
  * API to work with the cg_meta field of a ch_group:
  *   cg_meta_set_flags: Set bits in flags section, return true if not yet set.
@@ -527,8 +532,7 @@ ch_sub_free(const struct ch_type *type, 
 static int
 ch_table_resize(const struct ch_type *type, struct ch_table *t)
 {
-	struct ch_group **tables;
-	struct ch_meta **metas;
+	struct ch_ext *new;
 	uint64_t oldsize = 1ULL << t->ch_level;
 	uint64_t newsize = oldsize * 2;
 	int64_t idx;
@@ -538,40 +542,28 @@ ch_table_resize(const struct ch_type *ty
 		return -1;
 	}
 
-	if (t->ch_tables == NULL) {
+	if (t->ch_exts == NULL) {
 		oldsize = 0;
-		newsize = 1;
+		newsize = 2;
 	}
 
-	tables = reallocarray(t->ch_tables, newsize, sizeof(*tables));
-	if (tables == NULL)
-		return -1;
-	metas = reallocarray(t->ch_metas, newsize, sizeof(*metas));
-	if (metas == NULL) {
-		/*
-		 * tables was correctly reallocated, so update that
-		 * pointer before failing hard. If the caller recovers
-		 * somehow the next reallocarray of ch_tables will simply
-		 * do nothing.
-		 */
-		t->ch_tables = tables;
+	new = reallocarray(t->ch_exts, newsize, sizeof(*t->ch_exts));
+	if (new == NULL)
 		return -1;
-	}
 
-	for (idx = oldsize - 1; idx >= 0; idx--) {
-		tables[idx * 2] = tables[idx];
-		tables[idx * 2 + 1] = tables[idx];
-		metas[idx * 2] = metas[idx];
-		metas[idx * 2 + 1] = metas[idx];
-	}
+	t->ch_level++;
+	t->ch_exts = new;
 
-	if (t->ch_tables != NULL)
-		t->ch_level++;
-	t->ch_tables = tables;
-	t->ch_metas = metas;
+	t->ch_counts.cc_num_extendible += newsize - oldsize;
+	type->t_counts->cc_num_extendible += newsize - oldsize;
 
-	t->ch_counts.cc_num_extendible += oldsize;
-	type->t_counts->cc_num_extendible += oldsize;
+	if (oldsize == 0)
+		return 0;
+
+	for (idx = oldsize - 1; idx >= 0; idx--) {
+		t->ch_exts[idx * 2] = t->ch_exts[idx];
+		t->ch_exts[idx * 2 + 1] = t->ch_exts[idx];
+	}
 
 	return 0;
 }
@@ -591,8 +583,8 @@ ch_table_fill(struct ch_table *t, uint64
 	cnt = 1ULL << (t->ch_level - meta->cs_local_level);
 
 	for (i = 0; i < cnt; i++) {
-		t->ch_tables[idx + i] = table;
-		t->ch_metas[idx + i] = meta;
+		t->ch_exts[idx + i].ce_table = table;
+		t->ch_exts[idx + i].ce_meta = meta;
 	}
 }
 
@@ -600,9 +592,8 @@ ch_table_fill(struct ch_table *t, uint64
  * Return the buddy sub group for the table with idx and local_level.
  * The buddy page must have the same local level to be a buddy.
  */
-static struct ch_group *
-ch_table_buddy(struct ch_table *t, uint64_t idx, uint32_t local_level,
-    struct ch_meta **meta)
+static struct ch_ext *
+ch_table_buddy(struct ch_table *t, uint64_t idx, uint32_t local_level)
 {
 	struct ch_meta *m;
 
@@ -612,11 +603,10 @@ ch_table_buddy(struct ch_table *t, uint6
 
 	idx ^= 1ULL << (t->ch_level - local_level);
 
-	m = t->ch_metas[idx];
+	m = t->ch_exts[idx].ce_meta;
 	/* can only merge buddies at same level */
 	if (m->cs_local_level == local_level) {
-		*meta = m;
-		return t->ch_tables[idx];
+		return &t->ch_exts[idx];
 	}
 	return NULL;
 }
@@ -680,14 +670,15 @@ static int
 ch_table_compact(const struct ch_type *type, struct ch_table *t, uint64_t h,
     struct ch_group *table, struct ch_meta *meta)
 {
-	struct ch_group *buddy, *to = NULL;
-	struct ch_meta *buddymeta, *tometa = NULL;
+	struct ch_ext *buddy;
+	struct ch_group *to = NULL;
+	struct ch_meta *tometa = NULL;
 	uint64_t idx;
 
 	idx = CH_H1(h, t->ch_level);
-	buddy = ch_table_buddy(t, idx, meta->cs_local_level, &buddymeta);
+	buddy = ch_table_buddy(t, idx, meta->cs_local_level);
 	if (buddy == NULL || ch_sub_fillfactor(meta) +
-	    ch_sub_fillfactor(buddymeta) > CH_MAX_LOAD * 2 / 3)
+	    ch_sub_fillfactor(buddy->ce_meta) > CH_MAX_LOAD * 2 / 3)
 		return -1;
 
 	/* allocate new sub table */
@@ -695,18 +686,19 @@ ch_table_compact(const struct ch_type *t
 		goto fail;
 
 	/* merge the table and buddy into to. */
-	if (ch_sub_merge(type, to, table, buddy, tometa, meta, buddymeta) ==
-	    -1)
+	if (ch_sub_merge(type, to, table, buddy->ce_table,
+	    tometa, meta, buddy->ce_meta) == -1)
 		goto fail;
 
 	/*
 	 * Update table in the extendible hash table, which overwrites
-	 * all entries of the buddy.
+	 * all entries of the table and buddy with new values.
+	 * Therefor free them first.
 	 */
 	idx = CH_H1(h, tometa->cs_local_level);
-	ch_table_fill(t, idx, to, tometa);
-	ch_sub_free(type, t, buddy, buddymeta);
+	ch_sub_free(type, t, buddy->ce_table, buddy->ce_meta);
 	ch_sub_free(type, t, table, meta);
+	ch_table_fill(t, idx, to, tometa);
 
 	return 0;
 
@@ -748,14 +740,14 @@ _ch_destroy(const struct ch_type *type, 
 	struct ch_group *table = NULL;
 
 	for (idx = 0; idx < max; idx++) {
-		if (table == t->ch_tables[idx])
+		if (table == t->ch_exts[idx].ce_table)
 			continue;
 
-		table = t->ch_tables[idx];
-		ch_sub_free(type, t, t->ch_tables[idx], t->ch_metas[idx]);
+		table = t->ch_exts[idx].ce_table;
+		ch_sub_free(type, t, t->ch_exts[idx].ce_table,
+		    t->ch_exts[idx].ce_meta);
 	}
-	free(t->ch_tables);
-	free(t->ch_metas);
+	free(t->ch_exts);
 	type->t_counts->cc_num_extendible -= max;
 	memset(t, 0, sizeof(*t));
 }
@@ -769,21 +761,21 @@ _ch_insert(const struct ch_type *type, s
 	void *v;
 	uint64_t idx;
 
-	if (t->ch_tables == NULL)
+	if (t->ch_exts == NULL)
 		if (_ch_init(type, t) == -1)
 			return CH_INS_FAILED;
 
 	idx = CH_H1(h, t->ch_level);
-	table = t->ch_tables[idx];
-	meta = t->ch_metas[idx];
+	table = t->ch_exts[idx].ce_table;
+	meta = t->ch_exts[idx].ce_meta;
 
 	if (ch_sub_loadfactor(meta) >= CH_MAX_LOAD) {
 		if (ch_table_grow(type, t, h, table, meta) == -1)
 			return CH_INS_FAILED;
 		/* refetch data after resize */
 		idx = CH_H1(h, t->ch_level);
-		table = t->ch_tables[idx];
-		meta = t->ch_metas[idx];
+		table = t->ch_exts[idx].ce_table;
+		meta = t->ch_exts[idx].ce_meta;
 	}
 
 	v = ch_sub_insert(type, table, meta, h, elm);
@@ -803,12 +795,12 @@ _ch_remove(const struct ch_type *type, s
 	void *v;
 	uint64_t idx;
 
-	if (t->ch_tables == NULL)
+	if (t->ch_exts == NULL)
 		return NULL;
 
 	idx = CH_H1(h, t->ch_level);
-	table = t->ch_tables[idx];
-	meta = t->ch_metas[idx];
+	table = t->ch_exts[idx].ce_table;
+	meta = t->ch_exts[idx].ce_meta;
 
 	v = ch_sub_remove(type, table, meta, h, needle);
 	if (v != NULL) {
@@ -820,8 +812,8 @@ _ch_remove(const struct ch_type *type, s
 				break;
 
 			/* refetch data after compaction */
-			table = t->ch_tables[idx];
-			meta = t->ch_metas[idx];
+			table = t->ch_exts[idx].ce_table;
+			meta = t->ch_exts[idx].ce_meta;
 		}
 	}
 	return v;
@@ -834,11 +826,11 @@ _ch_find(const struct ch_type *type, str
 	struct ch_group *table;
 	uint64_t idx;
 
-	if (t->ch_tables == NULL)
+	if (t->ch_exts == NULL)
 		return NULL;
 
 	idx = CH_H1(h, t->ch_level);
-	table = t->ch_tables[idx];
+	table = t->ch_exts[idx].ce_table;
 
 	return ch_sub_find(type, table, h, needle);
 }
@@ -850,11 +842,11 @@ _ch_locate(const struct ch_type *type, s
 	struct ch_group *table;
 	uint64_t idx;
 
-	if (t->ch_tables == NULL)
+	if (t->ch_exts == NULL)
 		return NULL;
 
 	idx = CH_H1(h, t->ch_level);
-	table = t->ch_tables[idx];
+	table = t->ch_exts[idx].ce_table;
 
 	return ch_sub_locate(type, table, h, cmp, arg);
 }
@@ -865,11 +857,11 @@ _ch_first(const struct ch_type *type, st
 	struct ch_group *table;
 	uint64_t idx;
 
-	if (t->ch_tables == NULL)
+	if (t->ch_exts == NULL)
 		return NULL;
 
 	idx = it->ci_ext_idx = 0;
-	table = t->ch_tables[idx];
+	table = t->ch_exts[idx].ce_table;
 
 	return ch_sub_first(type, table, it);
 }
@@ -881,7 +873,7 @@ _ch_next(const struct ch_type *type, str
 	uint64_t idx, max;
 	void *v;
 
-	if (t->ch_tables == NULL)
+	if (t->ch_exts == NULL)
 		return NULL;
 
 	max = 1ULL << t->ch_level;
@@ -889,21 +881,21 @@ _ch_next(const struct ch_type *type, str
 	if (idx >= max)
 		return NULL;
 
-	table = t->ch_tables[idx];
+	table = t->ch_exts[idx].ce_table;
 	v = ch_sub_next(type, table, it);
 	if (v != NULL)
 		return v;
 
 	/* find next sub table */
 	for (idx++; idx < max; idx++) {
-		if (table != t->ch_tables[idx])
+		if (table != t->ch_exts[idx].ce_table)
 			break;
 	}
 	if (idx >= max)
 		return NULL;
 	/* start next sub table */
 	it->ci_ext_idx = idx;
-	table = t->ch_tables[idx];
+	table = t->ch_exts[idx].ce_table;
 	return ch_sub_first(type, table, it);
 }
 
@@ -917,7 +909,7 @@ _ch_get_stats(struct ch_stats *stats, co
 	stats->cs_size_tables = counts->cc_num_tables *
 	    (CH_H2_SIZE * sizeof(struct ch_group) + sizeof(struct ch_meta));
 	stats->cs_size_extendible = stats->cs_num_extendible *
-	    (sizeof(struct ch_group *) + sizeof(struct ch_meta *));
+	    sizeof(struct ch_ext);
 }
 
 /*
Index: chash.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/chash.h,v
diff -u -p -r1.6 chash.h
--- chash.h	17 Mar 2026 09:29:29 -0000	1.6
+++ chash.h	13 May 2026 09:10:16 -0000
@@ -48,14 +48,12 @@ struct ch_type {
 	struct ch_counts *t_counts;
 };
 
-struct ch_group;
-struct ch_meta;
+struct ch_ext;
 
 struct ch_table {
-	struct ch_group		**ch_tables;
-	struct ch_meta		**ch_metas;
-	uint32_t		  ch_level;
-	struct ch_counts	  ch_counts;
+	struct ch_ext		*ch_exts;
+	uint32_t		 ch_level;
+	struct ch_counts	 ch_counts;
 };