Download raw body.
bgpd: introduce chash and start using it
bgpd uses a lot of RB trees and why they are great the come with a big
cost especially for objects that do not require an order.
This diff introduces CHASH which is a variation off swisstables (that's
why it is called CH). The hash table is built like the RBT macros and
is close to a drop in replacement. It scales up and down nicely but I have
not yet spent excessive amount of time in tuning the last bit of
performance out of it.
This diff adds chash functions and then uses it for a few objects in the
RDE. The goal is to move more over to this. Especially parts of the
Adj-RIB-Out handling where the RB lookups burn a lot of CPU time.
I tested this a fair bit and it seems to behave but this is a big change
and more testing is needed.
--
:wq Claudio
Index: Makefile
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/Makefile,v
diff -u -p -r1.42 Makefile
--- Makefile 26 Feb 2025 15:49:56 -0000 1.42
+++ Makefile 31 Mar 2025 14:14:20 -0000
@@ -3,6 +3,7 @@
PROG= bgpd
SRCS= bgpd.c
SRCS+= carp.c
+SRCS+= chash.c
SRCS+= config.c
SRCS+= control.c
SRCS+= flowspec.c
Index: chash.c
===================================================================
RCS file: chash.c
diff -N chash.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ chash.c 8 Sep 2025 12:29:40 -0000
@@ -0,0 +1,876 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2025 Claudio Jeker <claudio@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "chash.h"
+
+/*
+ * CH hash table: a cache optimized hash table
+ *
+ * The CH hash table is split into an extendible hashing part and
+ * sub tables are split into CH_H2_SIZE groups holding 7 elements each.
+ * The sub tables are open addressed hash tables similar to swiss tables
+ * but the groups are only using 7 fields and not 16. Also the meta data
+ * is part of each group. On 64bit archs a group uses 64 bytes.
+ *
+ * The hash is split in three parts:
+ * o H1: variable size hash used by the extendible hashing table
+ * o H2: fixed size hash to select a group in a sub table.
+ * o H3: 1 byte hash to select element in a group.
+ *
+ * Unlike other unordered sets this hash table does not allow a key to
+ * be inserted more than once.
+ */
+
+#define CH_MAX_LOAD 875 /* in in per-mille */
+
+#define CH_H3_BITS 8
+#define CH_H3_SIZE (1 << CH_H3_BITS)
+#define CH_H3_MASK (CH_H3_SIZE - 1)
+
+#define CH_H2_SHIFT CH_H3_BITS
+#define CH_H2_BITS 9
+#define CH_H2_SIZE (1 << CH_H2_BITS)
+#define CH_H2_MASK (CH_H2_SIZE - 1)
+
+#define CH_H1_BITS (64 - CH_H2_BITS - CH_H3_BITS)
+
+#define CH_H1(h, l) ((l) == 0 ? 0 : h >> (64 - (l)))
+#define CH_H2(h) ((h >> CH_H2_SHIFT) & CH_H2_MASK)
+#define CH_H3(h) (h & CH_H3_MASK)
+
+struct ch_group {
+ uint64_t cg_meta;
+ /*
+ * cg_meta is actually split like this:
+ * uint8_t cg_flags;
+ * uint8_t cg_hashes[7];
+ */
+ void *cg_data[7];
+};
+#define CH_EVER_FULL 0x80
+#define CH_SLOT_MASK 0x7f
+#define CH_FLAGS_SHIFT 56
+
+struct ch_meta {
+ uint32_t cs_num_elm;
+ uint32_t cs_num_tomb;
+ uint32_t cs_num_ever_full;
+ uint32_t cs_local_level;
+};
+
+/*
+ * API to work with the cg_meta field of a ch_group:
+ * cg_meta_set_flags: Set bits in flags section, return true if not yet set.
+ * cg_meta_clear_flags: Clear bits in flags section, return true if cleared.
+ * cg_meta_get_flags: Return flags section as uint8_t.
+ * cg_meta_check_flags: Return true if flag is set in flags section.
+ * cg_meta_set_hash: Set one of the 7 hash bytes according to slot and hash.
+ * ch_meta_locate: find possible canidates.
+ */
+static inline int
+cg_meta_set_flags(struct ch_group *g, uint8_t flag)
+{
+ uint64_t f = flag, oldf;
+ oldf = g->cg_meta & f << CH_FLAGS_SHIFT;
+ g->cg_meta |= f << CH_FLAGS_SHIFT;
+ return oldf != 0;
+}
+
+static inline int
+cg_meta_clear_flags(struct ch_group *g, uint8_t flag)
+{
+ uint64_t f = flag, oldf;
+ oldf = g->cg_meta & f << CH_FLAGS_SHIFT;
+ g->cg_meta &= ~(f << CH_FLAGS_SHIFT);
+ return oldf != 0;
+}
+
+static inline uint8_t
+cg_meta_get_flags(const struct ch_group *g)
+{
+ return g->cg_meta >> CH_FLAGS_SHIFT;
+}
+
+static inline int
+cg_meta_check_flags(const struct ch_group *g, uint8_t flag)
+{
+ uint64_t f = flag;
+ return (g->cg_meta & f << CH_FLAGS_SHIFT) != 0;
+}
+
+static inline void
+cg_meta_set_hash(struct ch_group *g, int slot, uint8_t hash)
+{
+ ((uint8_t *)&g->cg_meta)[slot] = hash;
+}
+
+/*
+ * Find possible canidates in the group where both the mask matches with
+ * the hash of the slot and where the flag bit for the slot is set.
+ * Additionally set CH_EVER_FULL in the return value if it is set in
+ * the group metadata.
+ */
+static uint8_t
+ch_meta_locate(struct ch_group *g, uint64_t mask)
+{
+ uint64_t lookup;
+ uint8_t flags, i, hits;
+
+ lookup = g->cg_meta ^ mask;
+ flags = cg_meta_get_flags(g);
+ hits = flags & CH_EVER_FULL;
+
+ for (i = 0; i < 7; i++) {
+ if (((lookup >> i * 8) & CH_H3_MASK) == 0)
+ hits |= flags & (1 << i);
+ }
+ return hits;
+}
+
+/*
+ * API to work with fixed size sub tables.
+ * Each sub table is an array of struct ch_group with CH_H2_SIZE elements.
+ *
+ * To find an element the group metadata is used to see if there is a
+ * hit. If there was no hit but the CH_EVER_FULL flag is set the lookup
+ * will continue and look at the next element until the needle is found
+ * or there is a group where CH_EVER_FULL is unset.
+ *
+ * On insert if a group overflows CH_EVER_FULL is set. The flag will not be
+ * cleared again.
+ * On remove a data slot in the group is freed but depending on the
+ * CH_EVER_FULL flag the slot is considered a tombstone (if set) or considered
+ * free (not set).
+ *
+ * Sub tables are automatically split in two when the maximum loadfactor is
+ * reached. If the fill factor drops below a threashold then buddy tables
+ * may be joined back together.
+ */
+
+/* Return load factor (including tombstones) of a sub table in per mille. */
+static int
+ch_sub_loadfactor(struct ch_meta *m)
+{
+ uint32_t max = CH_H2_SIZE * 7;
+ uint32_t used = m->cs_num_elm + m->cs_num_tomb;
+
+ return used * 1000 / max;
+}
+
+/* Return fill factor (only active elements) of a sub table in per mille. */
+static int
+ch_sub_fillfactor(struct ch_meta *m)
+{
+ uint32_t max = CH_H2_SIZE * 7;
+ uint32_t used = m->cs_num_elm;
+
+ return used * 1000 / max;
+}
+
+/*
+ * Insert element into the hash table. If an equal element is already present
+ * in the table return the pointer to that element else return NULL.
+ */
+static void *
+ch_sub_insert(const struct ch_type *type, struct ch_group *table,
+ struct ch_meta *meta, uint64_t h, void *elm)
+{
+ uint64_t mask;
+ uint32_t bucket = CH_H2(h);
+ int i;
+ uint8_t empties, hits, ins_i;
+ struct ch_group *g = &table[bucket], *ins_g = NULL;
+
+ memset(&mask, CH_H3(h), sizeof(mask));
+ while (1) {
+ /* first check if object already present */
+ hits = ch_meta_locate(g, mask);
+ for (i = 0; i < 7; i++) {
+ if (hits & (1 << i)) {
+ if (type->t_equal(g->cg_data[i], elm))
+ return g->cg_data[i];
+ }
+ }
+ /* at the same time remember the first empty spot */
+ empties = ~cg_meta_get_flags(g) & CH_SLOT_MASK;
+ if (ins_g == NULL && (i = ffs(empties)) != 0) {
+ ins_g = g;
+ ins_i = i - 1;
+ } else if (empties == 0) {
+ /* overflow, mark group as full */
+ if (cg_meta_set_flags(g, CH_EVER_FULL) == 0)
+ meta->cs_num_ever_full++;
+ }
+ /* check if lookup is finished and obj is not present */
+ if (cg_meta_check_flags(g, CH_EVER_FULL) == 0)
+ break;
+ g = &table[++bucket & CH_H2_MASK];
+ }
+
+ /*
+ * insert new object and adjust accounting.
+ * If CH_EVER_FULL is set then it was a tombstone and not an empty slot.
+ */
+ ins_g->cg_data[ins_i] = elm;
+ cg_meta_set_hash(ins_g, ins_i, h & CH_H3_MASK);
+ cg_meta_set_flags(ins_g, 1 << ins_i);
+ if (cg_meta_check_flags(ins_g, CH_EVER_FULL))
+ meta->cs_num_tomb--;
+ meta->cs_num_elm++;
+
+ return NULL;
+}
+
+/*
+ * Lookup needle in the sub table and if found, remove the entry and return its
+ * pointer else return NULL.
+ */
+static void *
+ch_sub_remove(const struct ch_type *type, struct ch_group *table,
+ struct ch_meta *meta, uint64_t h, const void *needle)
+{
+ uint64_t mask;
+ uint32_t bucket = CH_H2(h);
+ int i;
+ uint8_t hits;
+ struct ch_group *g = &table[bucket];
+
+ memset(&mask, CH_H3(h), sizeof(mask));
+ while (1) {
+ hits = ch_meta_locate(g, mask);
+ for (i = 0; i < 7; i++) {
+ if (hits & (1 << i)) {
+ /* most porbably a hit */
+ if (type->t_equal(g->cg_data[i], needle)) {
+ void *elm = g->cg_data[i];
+ g->cg_data[i] = NULL;
+ cg_meta_set_hash(g, i, 0);
+ cg_meta_clear_flags(g, 1 << i);
+ if (hits & CH_EVER_FULL)
+ meta->cs_num_tomb++;
+ meta->cs_num_elm--;
+ return elm;
+ }
+ }
+ }
+ if ((hits & CH_EVER_FULL) == 0)
+ return NULL;
+ g = &table[++bucket & CH_H2_MASK];
+ }
+}
+
+/*
+ * Lookup needle in the sub table and if found return its pointer
+ * else return NULL.
+ */
+static void *
+ch_sub_find(const struct ch_type *type, struct ch_group *table, uint64_t h,
+ const void *needle)
+{
+ uint64_t mask;
+ uint32_t bucket = CH_H2(h);
+ int i;
+ uint8_t hits;
+ struct ch_group *g = &table[bucket];
+
+ memset(&mask, CH_H3(h), sizeof(mask));
+ while (1) {
+ hits = ch_meta_locate(g, mask);
+ for (i = 0; i < 7; i++) {
+ if (hits & (1 << i)) {
+ /* most porbably a hit */
+ if (type->t_equal(g->cg_data[i], needle))
+ return g->cg_data[i];
+ }
+ }
+ if ((hits & CH_EVER_FULL) == 0)
+ return NULL;
+ g = &table[++bucket & CH_H2_MASK];
+ }
+}
+
+/*
+ * Lookup hash in the sub table and if possible match is found pass it to
+ * cmp callback to verify.
+ */
+static void *
+ch_sub_locate(const struct ch_type *type, struct ch_group *table, uint64_t h,
+ int (*cmp)(const void *, void *), void *arg)
+{
+ uint64_t mask;
+ uint32_t bucket = CH_H2(h);
+ int i;
+ uint8_t hits;
+ struct ch_group *g = &table[bucket];
+
+ memset(&mask, CH_H3(h), sizeof(mask));
+ while (1) {
+ hits = ch_meta_locate(g, mask);
+ for (i = 0; i < 7; i++) {
+ if (hits & (1 << i)) {
+ /* most porbably a hit */
+ if (cmp(g->cg_data[i], arg))
+ return g->cg_data[i];
+ }
+ }
+ if ((hits & CH_EVER_FULL) == 0)
+ return NULL;
+ g = &table[++bucket & CH_H2_MASK];
+ }
+}
+
+/*
+ * Start of sub table iterator, reset the set and grp indices and locate
+ * first element in sub table. Return element or NULL if table is empty.
+ */
+static void *
+ch_sub_first(const struct ch_type *type, struct ch_group *table,
+ struct ch_iter *iter)
+{
+ struct ch_group *g;
+ uint32_t n;
+ uint8_t elms;
+
+ for (n = 0; n < CH_H2_SIZE; n++, g++) {
+ g = &table[n];
+ elms = cg_meta_get_flags(g) & CH_SLOT_MASK;
+ if (elms == 0)
+ continue;
+ iter->ci_set_idx = n;
+ iter->ci_grp_idx = ffs(elms);
+ return g->cg_data[iter->ci_grp_idx - 1];
+ }
+ iter->ci_set_idx = n;
+ return NULL;
+}
+
+/*
+ * Get next element of a sub table based on the iterator indices.
+ * Return element or NULL if the rest of the table is empty.
+ */
+static void *
+ch_sub_next(const struct ch_type *type, struct ch_group *table,
+ struct ch_iter *iter)
+{
+ struct ch_group *g;
+ uint32_t n;
+ uint8_t elms;
+
+ for (n = iter->ci_set_idx; n < CH_H2_SIZE; n++, g++) {
+ g = &table[n];
+ elms = cg_meta_get_flags(g) & CH_SLOT_MASK;
+ elms &= CH_SLOT_MASK << iter->ci_grp_idx;
+ if (elms == 0) {
+ iter->ci_grp_idx = 0;
+ continue;
+ }
+ iter->ci_set_idx = n;
+ iter->ci_grp_idx = ffs(elms);
+ return g->cg_data[iter->ci_grp_idx - 1];
+ }
+ iter->ci_set_idx = n;
+ return NULL;
+}
+
+/*
+ * Split table from into two new tables low and high. Update the meta data
+ * accordingly. The cs_local_level of low and high will be 1 higher then that
+ * of the from table.
+ */
+static int
+ch_sub_split(const struct ch_type *type, struct ch_group *from,
+ struct ch_group *low, struct ch_group *high, struct ch_meta *frommeta,
+ struct ch_meta *lowmeta, struct ch_meta *highmeta)
+{
+ struct ch_group *g = &from[0];
+ uint32_t n;
+ uint8_t elms, i;
+
+ lowmeta->cs_local_level = frommeta->cs_local_level + 1;
+ highmeta->cs_local_level = frommeta->cs_local_level + 1;
+
+ for (n = 0; n < CH_H2_SIZE; n++, g++) {
+ elms = cg_meta_get_flags(g) & CH_SLOT_MASK;
+ if (elms == 0)
+ continue;
+ for (i = 0; i < 7; i++) {
+ if (elms & (1 << i)) {
+ void *v;
+ uint64_t h = type->t_hash(g->cg_data[i]);
+ if (CH_H1(h, lowmeta->cs_local_level) & 1)
+ v = ch_sub_insert(type, high, highmeta,
+ h, g->cg_data[i]);
+ else
+ v = ch_sub_insert(type, low, lowmeta,
+ h, g->cg_data[i]);
+ if (v != NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Merge all active elements of one sub group into the table table.
+ * Return 0 on success, -1 on failure.
+ */
+static int
+ch_sub_merge_one(const struct ch_type *type, struct ch_group *table,
+ struct ch_meta *meta, const struct ch_group *from)
+{
+ uint8_t elms, i;
+
+ elms = cg_meta_get_flags(from) & CH_SLOT_MASK;
+ if (elms == 0)
+ return 0;
+ for (i = 0; i < 7; i++) {
+ if (elms & (1 << i)) {
+ void *v;
+ uint64_t h = type->t_hash(from->cg_data[i]);
+ v = ch_sub_insert(type, table, meta, h,
+ from->cg_data[i]);
+ if (v != NULL) /* how should there be a dup? */
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Merge sub tables from and buddy into the new table to.
+ * Returns 0 on success and -1 on failure keeping from and buddy as is.
+ */
+static int
+ch_sub_merge(const struct ch_type *type, struct ch_group *to,
+ struct ch_group *from, struct ch_group *buddy, struct ch_meta *tometa,
+ struct ch_meta *frommeta, struct ch_meta *buddymeta)
+{
+ struct ch_group *g = &from[0];
+ struct ch_group *b = &buddy[0];
+ uint32_t n;
+
+ tometa->cs_local_level = frommeta->cs_local_level - 1;
+
+ for (n = 0; n < CH_H2_SIZE; n++, g++, b++) {
+ if (ch_sub_merge_one(type, to, tometa, g) == -1)
+ return -1;
+ if (ch_sub_merge_one(type, to, tometa, b) == -1)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+ch_sub_alloc(struct ch_group **table, struct ch_meta **meta)
+{
+ if ((*table = calloc(CH_H2_SIZE, sizeof(**table))) == NULL)
+ return -1;
+ if ((*meta = calloc(1, sizeof(**meta))) == NULL) {
+ free(*table);
+ return -1;
+ }
+ return 0;
+}
+
+static void
+ch_sub_free(struct ch_group *table, struct ch_meta *meta)
+{
+ free(table);
+ free(meta);
+}
+
+/*
+ * Resize extendible hash table by 2. Updating all pointers accordingly.
+ * Return 0 on success, -1 on failure and set errno.
+ */
+static int
+ch_table_resize(struct ch_table *t)
+{
+ struct ch_group **tables;
+ struct ch_meta **metas;
+ uint64_t oldsize = 1 << t->ch_level;
+ uint64_t newsize = 1 << (t->ch_level + 1);
+ int64_t idx;
+
+ if (t->ch_level + 1 >= CH_H1_BITS) {
+ errno = E2BIG;
+ return -1;
+ }
+
+ if (t->ch_tables == NULL) {
+ oldsize = 0;
+ newsize = 1;
+ }
+
+ tables = reallocarray(t->ch_tables, newsize, sizeof(*tables));
+ if (tables == NULL)
+ return -1;
+ metas = reallocarray(t->ch_metas, newsize, sizeof(*metas));
+ if (metas == NULL) {
+ free(tables);
+ return -1;
+ }
+
+ for (idx = oldsize - 1; idx >= 0; idx--) {
+ tables[idx * 2] = tables[idx];
+ tables[idx * 2 + 1] = tables[idx];
+ metas[idx * 2] = metas[idx];
+ metas[idx * 2 + 1] = metas[idx];
+ }
+
+ if (t->ch_tables != NULL)
+ t->ch_level++;
+ t->ch_tables = tables;
+ t->ch_metas = metas;
+ return 0;
+}
+
+/*
+ * Set table pointers from idx to the required number of elements
+ * depending on the table ch_level and the sub table cs_local_level.
+ * Idx only includes the bits covered by the sub table cs_local_level.
+ */
+static void
+ch_table_fill(struct ch_table *t, uint64_t idx, struct ch_group *table,
+ struct ch_meta *meta)
+{
+ uint64_t cnt, i;
+
+ idx <<= (t->ch_level - meta->cs_local_level);
+ cnt = 1 << (t->ch_level - meta->cs_local_level);
+
+ for (i = 0; i < cnt; i++) {
+ t->ch_tables[idx + i] = table;
+ t->ch_metas[idx + i] = meta;
+ }
+}
+
+/*
+ * Return the buddy sub group for the table with idx and local_level.
+ * The buddy page must have the same local level to be a buddy.
+ */
+static struct ch_group *
+ch_table_buddy(struct ch_table *t, uint64_t idx, uint32_t local_level,
+ struct ch_meta **meta)
+{
+ struct ch_meta *m;
+
+ /* the single root table can't be merged */
+ if (local_level == 0)
+ return NULL;
+
+ idx ^= 1ULL << (t->ch_level - local_level);
+
+ m = t->ch_metas[idx];
+ /* can only merge buddies at same level */
+ if (m->cs_local_level == local_level) {
+ *meta = m;
+ return t->ch_tables[idx];
+ }
+ return NULL;
+}
+
+/*
+ * Grow the hash table by spliting a sub group and if needed by doubling
+ * the extendible hash of the primary table.
+ * Return 0 on success, -1 on failure and set errno.
+ */
+static int
+ch_table_grow(const struct ch_type *type, struct ch_table *t, uint64_t h,
+ struct ch_group *table, struct ch_meta *meta)
+{
+ struct ch_group *left, *right;
+ struct ch_meta *leftmeta, *rightmeta;
+ uint64_t idx;
+
+ /* check if the extendible hashing table needs to grow */
+ if (meta->cs_local_level == t->ch_level) {
+ if (ch_table_resize(t) == -1)
+ return -1;
+ }
+
+ /* allocate new sub tables */
+ if (ch_sub_alloc(&left, &leftmeta) == -1)
+ return -1;
+ if (ch_sub_alloc(&right, &rightmeta) == -1) {
+ ch_sub_free(left, leftmeta);
+ return -1;
+ }
+
+ /* split up the old table into the two new ones */
+ if (ch_sub_split(type, table, left, right,
+ meta, leftmeta, rightmeta) == -1) {
+ ch_sub_free(right, rightmeta);
+ ch_sub_free(left, leftmeta);
+ return -1;
+ }
+
+ /*
+ * Insert new tables into the extendible hash table.
+ * Calculate index based on the H1 hash of the old sub table
+ * but shift it by one bit since the new tables use one more bit.
+ */
+ idx = CH_H1(h, meta->cs_local_level) << 1;
+ ch_table_fill(t, idx, left, leftmeta);
+ ch_table_fill(t, idx | 1, right, rightmeta);
+ ch_sub_free(table, meta);
+
+ return 0;
+}
+
+/*
+ * Try to compact two sub groups back together and adjusting the extendible
+ * hash. Compaction only happens if there is a buddy page (page with same
+ * local level on the alternate slot) and the result of the merge is below
+ * 2 / 3 of the maximum load factor.
+ * Return 0 on success, -1 if no compaction is possible.
+ */
+static int
+ch_table_compact(const struct ch_type *type, struct ch_table *t, uint64_t h,
+ struct ch_group *table, struct ch_meta *meta)
+{
+ struct ch_group *buddy, *to;
+ struct ch_meta *buddymeta, *tometa;
+ uint64_t idx;
+
+ idx = CH_H1(h, t->ch_level);
+ buddy = ch_table_buddy(t, idx, meta->cs_local_level, &buddymeta);
+ if (buddy == NULL ||
+ ch_sub_fillfactor(meta) +
+ ch_sub_fillfactor(buddymeta) >
+ CH_MAX_LOAD * 2 / 3)
+ return -1;
+
+ /* allocate new sub tables */
+ if (ch_sub_alloc(&to, &tometa) == -1)
+ return -1;
+
+ /* merge the table and buddy into to. */
+ if (ch_sub_merge(type, to, table, buddy, tometa, meta, buddymeta) ==
+ -1) {
+ ch_sub_free(to, tometa);
+ return -1;
+ }
+
+ /*
+ * Update table in the extendible hash table, which overwrites
+ * all entries of the buddy.
+ */
+ idx = CH_H1(h, tometa->cs_local_level);
+ ch_table_fill(t, idx, to, tometa);
+ ch_sub_free(buddy, buddymeta);
+ ch_sub_free(table, meta);
+
+ return 0;
+}
+
+/*
+ * Public API used by the macros defined in chash.h.
+ */
+int
+_ch_init(const struct ch_type *type, struct ch_table *t)
+{
+ struct ch_group *table;
+ struct ch_meta *meta;
+
+ t->ch_level = 0;
+ t->ch_num_elm = 0;
+ if (ch_sub_alloc(&table, &meta) == -1)
+ return -1;
+
+ if (ch_table_resize(t) == -1) {
+ ch_sub_free(table, meta);
+ return -1;
+ }
+ ch_table_fill(t, 0, table, meta);
+
+ return 0;
+}
+
+void
+_ch_destroy(const struct ch_type *type, struct ch_table *t)
+{
+ uint64_t idx, max = 1 << t->ch_level;
+ struct ch_group *table = NULL;
+
+ for (idx = 0; idx < max; idx++) {
+ if (table == t->ch_tables[idx])
+ continue;
+
+ table = t->ch_tables[idx];
+ ch_sub_free(t->ch_tables[idx], t->ch_metas[idx]);
+ free(t->ch_metas[idx]);
+ }
+ free(t->ch_tables);
+ free(t->ch_metas);
+ memset(t, 0, sizeof(*t));
+}
+
+void *
+_ch_insert(const struct ch_type *type, struct ch_table *t, uint64_t h,
+ void *elm)
+{
+ struct ch_group *table;
+ struct ch_meta *meta;
+ void *v;
+ uint64_t idx;
+
+ if (t->ch_tables == NULL)
+ if (_ch_init(type, t) == -1)
+ return CH_INS_FAILED;
+
+ idx = CH_H1(h, t->ch_level);
+ table = t->ch_tables[idx];
+ meta = t->ch_metas[idx];
+
+ if (ch_sub_loadfactor(meta) >= CH_MAX_LOAD) {
+ if (ch_table_grow(type, t, h, table, meta) == -1)
+ return CH_INS_FAILED;
+ /* refetch data after resize */
+ idx = CH_H1(h, t->ch_level);
+ table = t->ch_tables[idx];
+ meta = t->ch_metas[idx];
+ }
+
+ v = ch_sub_insert(type, table, meta, h, elm);
+ if (v == NULL)
+ t->ch_num_elm++;
+ return v;
+}
+
+void *
+_ch_remove(const struct ch_type *type, struct ch_table *t, uint64_t h,
+ const void *needle)
+{
+ struct ch_group *table;
+ struct ch_meta *meta;
+ void *v;
+ uint64_t idx;
+
+ if (t->ch_tables == NULL)
+ return NULL;
+
+ idx = CH_H1(h, t->ch_level);
+ table = t->ch_tables[idx];
+ meta = t->ch_metas[idx];
+
+ v = ch_sub_remove(type, table, meta, h, needle);
+ if (v != NULL) {
+ t->ch_num_elm--;
+
+ while (ch_sub_fillfactor(meta) <= CH_MAX_LOAD / 4) {
+ if (ch_table_compact(type, t, h, table, meta) == -1)
+ break;
+
+ /* refetch data after compaction */
+ table = t->ch_tables[idx];
+ meta = t->ch_metas[idx];
+ }
+ }
+ return v;
+}
+
+void *
+_ch_find(const struct ch_type *type, struct ch_table *t, uint64_t h,
+ const void *needle)
+{
+ struct ch_group *table;
+ uint64_t idx;
+
+ if (t->ch_tables == NULL)
+ return NULL;
+
+ idx = CH_H1(h, t->ch_level);
+ table = t->ch_tables[idx];
+
+ return ch_sub_find(type, table, h, needle);
+}
+
+void *
+_ch_locate(const struct ch_type *type, struct ch_table *t, uint64_t h,
+ int (*cmp)(const void *, void *), void *arg)
+{
+ struct ch_group *table;
+ uint64_t idx;
+
+ if (t->ch_tables == NULL)
+ return NULL;
+
+ idx = CH_H1(h, t->ch_level);
+ table = t->ch_tables[idx];
+
+ return ch_sub_locate(type, table, h, cmp, arg);
+}
+
+void *
+_ch_first(const struct ch_type *type, struct ch_table *t, struct ch_iter *it)
+{
+ struct ch_group *table;
+ uint64_t idx;
+
+ if (t->ch_tables == NULL)
+ return NULL;
+
+ idx = it->ci_ext_idx = 0;
+ table = t->ch_tables[idx];
+
+ return ch_sub_first(type, table, it);
+}
+
+void *
+_ch_next(const struct ch_type *type, struct ch_table *t, struct ch_iter *it)
+{
+ struct ch_group *table;
+ uint64_t idx, max;
+ void *v;
+
+ if (t->ch_tables == NULL)
+ return NULL;
+
+ max = 1 << t->ch_level;
+ idx = it->ci_ext_idx;
+ if (idx >= max)
+ return NULL;
+
+ table = t->ch_tables[idx];
+ v = ch_sub_next(type, table, it);
+ if (v != NULL)
+ return v;
+
+ /* find next sub table */
+ for (idx++; idx < max; idx++) {
+ if (table != t->ch_tables[idx])
+ break;
+ }
+ if (idx >= max)
+ return NULL;
+ /* start next sub table */
+ it->ci_ext_idx = idx;
+ table = t->ch_tables[idx];
+ return ch_sub_first(type, table, it);
+}
Index: chash.h
===================================================================
RCS file: chash.h
diff -N chash.h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ chash.h 28 Aug 2025 14:46:50 -0000
@@ -0,0 +1,167 @@
+/* $OpenBSD$ */
+/*
+ * Copyright (c) 2025 Claudio Jeker <claudio@openbsd.org>
+ * Copyright (c) 2016 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __CHASH_H__
+#define __CHASH_H__
+
+struct ch_type {
+ int (*t_equal)(const void *, const void *);
+ uint64_t (*t_hash)(const void *);
+};
+
+struct ch_group;
+struct ch_meta;
+
+struct ch_table {
+ struct ch_group **ch_tables;
+ struct ch_meta **ch_metas;
+ uint32_t ch_level;
+ uint32_t ch_num_elm;
+};
+
+
+#define CH_HEAD(_name, _type) \
+struct _name { \
+ struct ch_table ch_table; \
+}
+
+struct ch_iter {
+ uint64_t ci_ext_idx;
+ uint32_t ci_set_idx;
+ uint32_t ci_grp_idx;
+};
+
+int _ch_init(const struct ch_type *, struct ch_table *);
+void _ch_destroy(const struct ch_type *, struct ch_table *);
+void *_ch_insert(const struct ch_type *, struct ch_table *, uint64_t, void *);
+void *_ch_remove(const struct ch_type *, struct ch_table *, uint64_t,
+ const void *);
+void *_ch_find(const struct ch_type *, struct ch_table *, uint64_t,
+ const void *);
+void *_ch_locate(const struct ch_type *, struct ch_table *, uint64_t,
+ int (*)(const void *, void *), void *);
+void *_ch_first(const struct ch_type *, struct ch_table *, struct ch_iter *);
+void *_ch_next(const struct ch_type *, struct ch_table *, struct ch_iter *);
+
+#define CH_INS_FAILED ((void *)-1)
+
+#define CH_INITIALIZER(_head) { 0 }
+
+#define CH_PROTOTYPE(_name, _type, _hash) \
+extern const struct ch_type *const _name##_CH_TYPE; \
+ \
+__unused static inline int \
+_name##_CH_INIT(struct _name *head) \
+{ \
+ return _ch_init(_name##_CH_TYPE, &head->ch_table); \
+} \
+ \
+__unused static inline void \
+_name##_CH_DESTROY(struct _name *head) \
+{ \
+ _ch_destroy(_name##_CH_TYPE, &head->ch_table); \
+} \
+ \
+__unused static inline int \
+_name##_CH_INSERT(struct _name *head, struct _type *elm, struct _type **prev) \
+{ \
+ struct _type *p; \
+ uint64_t h; \
+ h = _hash(elm); \
+ p = _ch_insert(_name##_CH_TYPE, &head->ch_table, h, elm); \
+ if (p == CH_INS_FAILED) \
+ return -1; \
+ if (prev != NULL) \
+ *prev = p; \
+ return (p == NULL); \
+} \
+ \
+__unused static inline struct _type * \
+_name##_CH_REMOVE(struct _name *head, struct _type *elm) \
+{ \
+ uint64_t h; \
+ h = _hash(elm); \
+ return _ch_remove(_name##_CH_TYPE, &head->ch_table, h, elm); \
+} \
+ \
+__unused static inline struct _type * \
+_name##_CH_FIND(struct _name *head, const struct _type *key) \
+{ \
+ uint64_t h; \
+ h = _hash(key); \
+ return _ch_find(_name##_CH_TYPE, &head->ch_table, h, key); \
+} \
+ \
+__unused static inline struct _type * \
+_name##_CH_LOCATE(struct _name *head, uint64_t hash, \
+ int (*cmp)(const void *, void *), void *arg) \
+{ \
+ return _ch_locate(_name##_CH_TYPE, &head->ch_table, hash, \
+ cmp, arg); \
+} \
+ \
+__unused static inline struct _type * \
+_name##_CH_FIRST(struct _name *head, struct ch_iter *iter) \
+{ \
+ return _ch_first(_name##_CH_TYPE, &head->ch_table, iter); \
+} \
+ \
+__unused static inline struct _type * \
+_name##_CH_NEXT(struct _name *head, struct ch_iter *iter) \
+{ \
+ return _ch_next(_name##_CH_TYPE, &head->ch_table, iter); \
+}
+
+
+#define CH_GENERATE(_name, _type, _eq, _hash) \
+static int \
+_name##_CH_EQUAL(const void *lptr, const void *rptr) \
+{ \
+ const struct _type *l = lptr, *r = rptr; \
+ return _eq(l, r); \
+} \
+ \
+static uint64_t \
+_name##_CH_HASH(const void *ptr) \
+{ \
+ const struct _type *obj = ptr; \
+ return _hash(obj); \
+} \
+ \
+static const struct ch_type _name##_CH_INFO = { \
+ .t_equal = _name##_CH_EQUAL, \
+ .t_hash = _name##_CH_HASH, \
+}; \
+const struct ch_type *const _name##_CH_TYPE = &_name##_CH_INFO
+
+#define CH_INIT(_name, _head) _name##_CH_INIT(_head)
+#define CH_INSERT(_name, _head, _elm, _prev) \
+ _name##_CH_INSERT(_head, _elm, _prev)
+#define CH_REMOVE(_name, _head, _elm) _name##_CH_REMOVE(_head, _elm)
+#define CH_FIND(_name, _head, _key) _name##_CH_FIND(_head, _key)
+#define CH_LOCATE(_name, _head, _h, _cmp, _a) \
+ _name##_CH_LOCATE(_head, _h, _cmp, _a)
+#define CH_FIRST(_name, _head, _iter) _name##_CH_FIRST(_head, _iter)
+#define CH_NEXT(_name, _head, _iter) _name##_CH_NEXT(_head, _iter)
+
+#define CH_FOREACH(_e, _name, _head, _iter) \
+ for ((_e) = CH_FIRST(_name, (_head), (_iter)); \
+ (_e) != NULL; \
+ (_e) = CH_NEXT(_name, (_head), (_iter)))
+
+#endif
Index: rde.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
diff -u -p -r1.656 rde.c
--- rde.c 4 Jun 2025 09:12:34 -0000 1.656
+++ rde.c 15 Sep 2025 11:52:53 -0000
@@ -212,6 +212,9 @@ rde_main(int debug, int verbose)
TAILQ_INIT(out_rules);
pt_init();
+ attr_init();
+ path_init();
+ communities_init();
peer_init(out_rules);
/* make sure the default RIBs are setup */
@@ -4779,8 +4782,6 @@ rde_shutdown(void)
/* now check everything */
rib_shutdown();
nexthop_shutdown();
- path_shutdown();
- attr_shutdown();
pt_shutdown();
}
Index: rde.h
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
diff -u -p -r1.316 rde.h
--- rde.h 4 Jun 2025 09:12:34 -0000 1.316
+++ rde.h 15 Sep 2025 11:52:02 -0000
@@ -163,7 +163,7 @@ enum attrtypes {
#define ATTR_WELL_KNOWN ATTR_TRANSITIVE
struct attr {
- RB_ENTRY(attr) entry;
+ uint64_t hash;
u_char *data;
int refcnt;
uint16_t len;
@@ -172,17 +172,18 @@ struct attr {
};
struct rde_community {
- RB_ENTRY(rde_community) entry;
- int size;
- int nentries;
- int flags;
- int refcnt;
+ uint64_t hash;
+ int size;
+ int nentries;
+ int flags;
+ int refcnt;
struct community *communities;
};
#define PARTIAL_COMMUNITIES 0x01
#define PARTIAL_LARGE_COMMUNITIES 0x02
#define PARTIAL_EXT_COMMUNITIES 0x04
+#define PARTIAL_DIRTY 0x08
#define F_ATTR_ORIGIN 0x00001
#define F_ATTR_ASPATH 0x00002
@@ -208,7 +209,7 @@ struct rde_community {
#define DEFAULT_LPREF 100
struct rde_aspath {
- RB_ENTRY(rde_aspath) entry;
+ uint64_t hash;
struct attr **others;
struct aspath *aspath;
struct rde_aspa_state aspa_state;
@@ -223,6 +224,9 @@ struct rde_aspath {
uint8_t others_len;
uint8_t aspa_generation;
};
+#define PATH_HASHOFF offsetof(struct rde_aspath, med)
+#define PATH_HASHSTART(x) ((const uint8_t *)x + PATH_HASHOFF)
+#define PATH_HASHSIZE (sizeof(struct rde_aspath) - PATH_HASHOFF)
enum nexthop_state {
NEXTHOP_LOOKUP,
@@ -397,12 +401,13 @@ RB_PROTOTYPE(peer_tree, rde_peer, entry,
/* rde_attr.c */
int attr_writebuf(struct ibuf *, uint8_t, uint8_t, void *,
uint16_t);
-void attr_shutdown(void);
+void attr_init(void);
int attr_optadd(struct rde_aspath *, uint8_t, uint8_t,
void *, uint16_t);
struct attr *attr_optget(const struct rde_aspath *, uint8_t);
void attr_copy(struct rde_aspath *, const struct rde_aspath *);
-int attr_compare(struct rde_aspath *, struct rde_aspath *);
+int attr_equal(const struct rde_aspath *,
+ const struct rde_aspath *);
void attr_freeall(struct rde_aspath *);
void attr_free(struct rde_aspath *, struct attr *);
@@ -413,7 +418,7 @@ u_char *aspath_deflate(u_char *, uint16
void aspath_merge(struct rde_aspath *, struct attr *);
uint32_t aspath_neighbor(struct aspath *);
int aspath_loopfree(struct aspath *, uint32_t);
-int aspath_compare(struct aspath *, struct aspath *);
+int aspath_compare(const struct aspath *, const struct aspath *);
int aspath_match(struct aspath *, struct filter_as *, uint32_t);
u_char *aspath_prepend(struct aspath *, uint32_t, int, uint16_t *);
u_char *aspath_override(struct aspath *, uint32_t, uint32_t,
@@ -452,12 +457,13 @@ int community_large_add(struct rde_commu
int community_ext_add(struct rde_community *, int, int, struct ibuf *);
int community_writebuf(struct rde_community *, uint8_t, int, struct ibuf *);
-void communities_shutdown(void);
+void communities_init(void);
struct rde_community *communities_lookup(struct rde_community *);
struct rde_community *communities_link(struct rde_community *);
void communities_unlink(struct rde_community *);
-int communities_equal(struct rde_community *, struct rde_community *);
+int communities_equal(const struct rde_community *,
+ const struct rde_community *);
void communities_copy(struct rde_community *, struct rde_community *);
void communities_clean(struct rde_community *);
@@ -575,7 +581,7 @@ re_rib(struct rib_entry *re)
return rib_byid(re->rib_id);
}
-void path_shutdown(void);
+void path_init(void);
struct rde_aspath *path_copy(struct rde_aspath *, const struct rde_aspath *);
struct rde_aspath *path_prep(struct rde_aspath *);
struct rde_aspath *path_get(void);
Index: rde_attr.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_attr.c,v
diff -u -p -r1.137 rde_attr.c
--- rde_attr.c 14 Apr 2025 11:49:39 -0000 1.137
+++ rde_attr.c 15 Sep 2025 11:52:24 -0000
@@ -22,12 +22,14 @@
#include <endian.h>
#include <limits.h>
+#include <siphash.h>
#include <stdlib.h>
#include <string.h>
#include "bgpd.h"
#include "rde.h"
#include "log.h"
+#include "chash.h"
int
attr_writebuf(struct ibuf *buf, uint8_t flags, uint8_t type, void *data,
@@ -55,21 +57,36 @@ attr_writebuf(struct ibuf *buf, uint8_t
}
/* optional attribute specific functions */
-struct attr *attr_alloc(uint8_t, uint8_t, void *, uint16_t);
-struct attr *attr_lookup(uint8_t, uint8_t, void *, uint16_t);
-void attr_put(struct attr *);
+static struct attr *attr_alloc(uint8_t, uint8_t, void *, uint16_t, uint64_t);
+static struct attr *attr_lookup(uint8_t, uint8_t, void *, uint16_t, uint64_t);
+static void attr_put(struct attr *);
-static inline int attr_diff(struct attr *, struct attr *);
+static SIPHASH_KEY attrkey;
-RB_HEAD(attr_tree, attr) attrtable = RB_INITIALIZER(&attr);
-RB_GENERATE_STATIC(attr_tree, attr, entry, attr_diff);
+static inline uint64_t
+attr_hash(const struct attr *a)
+{
+ return a->hash;
+}
+
+static uint64_t
+attr_calc_hash(uint8_t type, const void *data, uint16_t len)
+{
+ SIPHASH_CTX ctx;
+
+ SipHash24_Init(&ctx, &attrkey);
+ SipHash24_Update(&ctx, data, len);
+ SipHash24_Update(&ctx, &type, sizeof(type));
+ return SipHash24_End(&ctx);
+}
+CH_HEAD(attr_tree, attr) attrtable = CH_INITIALIZER(&attrtable);
+CH_PROTOTYPE(attr_tree, attr, attr_hash);
void
-attr_shutdown(void)
+attr_init(void)
{
- if (!RB_EMPTY(&attrtable))
- log_warnx("%s: free non-free attr table", __func__);
+ arc4random_buf(&attrkey, sizeof(attrkey));
}
int
@@ -79,6 +96,7 @@ attr_optadd(struct rde_aspath *asp, uint
uint8_t l;
struct attr *a, *t;
void *p;
+ uint64_t h;
/* attribute allowed only once */
for (l = 0; l < asp->others_len; l++) {
@@ -91,8 +109,9 @@ attr_optadd(struct rde_aspath *asp, uint
}
/* known optional attributes were validated previously */
- if ((a = attr_lookup(flags, type, data, len)) == NULL)
- a = attr_alloc(flags, type, data, len);
+ h = attr_calc_hash(type, data, len);
+ if ((a = attr_lookup(flags, type, data, len, h)) == NULL)
+ a = attr_alloc(flags, type, data, len, h);
/* add attribute to the table but first bump refcnt */
a->refcnt++;
@@ -169,57 +188,30 @@ attr_copy(struct rde_aspath *t, const st
}
static inline int
-attr_diff(struct attr *oa, struct attr *ob)
+attr_eq(const struct attr *oa, const struct attr *ob)
{
- int r;
-
- if (ob == NULL)
- return (1);
- if (oa == NULL)
- return (-1);
- if (oa->flags > ob->flags)
- return (1);
- if (oa->flags < ob->flags)
- return (-1);
- if (oa->type > ob->type)
- return (1);
- if (oa->type < ob->type)
- return (-1);
- if (oa->len > ob->len)
- return (1);
- if (oa->len < ob->len)
- return (-1);
- if (oa->len == 0)
- return (0);
- r = memcmp(oa->data, ob->data, oa->len);
- if (r > 0)
- return (1);
- if (r < 0)
- return (-1);
- return (0);
+ if (oa->hash != ob->hash)
+ return 0;
+ if (oa->type != ob->type)
+ return 0;
+ if (oa->flags != ob->flags)
+ return 0;
+ if (oa->len != ob->len)
+ return 0;
+ return (oa->len == 0 || memcmp(oa->data, ob->data, oa->len) == 0);
}
int
-attr_compare(struct rde_aspath *a, struct rde_aspath *b)
+attr_equal(const struct rde_aspath *a, const struct rde_aspath *b)
{
- uint8_t l, min;
+ uint8_t l;
- min = a->others_len < b->others_len ? a->others_len : b->others_len;
- for (l = 0; l < min; l++)
+ if (a->others_len != b->others_len)
+ return (0);
+ for (l = 0; l < a->others_len; l++)
if (a->others[l] != b->others[l])
- return (attr_diff(a->others[l], b->others[l]));
-
- if (a->others_len < b->others_len) {
- for (; l < b->others_len; l++)
- if (b->others[l] != NULL)
- return (-1);
- } else if (a->others_len > b->others_len) {
- for (; l < a->others_len; l++)
- if (a->others[l] != NULL)
- return (1);
- }
-
- return (0);
+ return (0);
+ return (1);
}
void
@@ -253,7 +245,7 @@ attr_freeall(struct rde_aspath *asp)
}
struct attr *
-attr_alloc(uint8_t flags, uint8_t type, void *data, uint16_t len)
+attr_alloc(uint8_t flags, uint8_t type, void *data, uint16_t len, uint64_t hash)
{
struct attr *a;
@@ -276,14 +268,17 @@ attr_alloc(uint8_t flags, uint8_t type,
} else
a->data = NULL;
- if (RB_INSERT(attr_tree, &attrtable, a) != NULL)
+ a->hash = hash;
+
+ if (CH_INSERT(attr_tree, &attrtable, a, NULL) != 1)
fatalx("corrupted attr tree");
return (a);
}
struct attr *
-attr_lookup(uint8_t flags, uint8_t type, void *data, uint16_t len)
+attr_lookup(uint8_t flags, uint8_t type, void *data, uint16_t len,
+ uint64_t hash)
{
struct attr needle;
@@ -293,7 +288,9 @@ attr_lookup(uint8_t flags, uint8_t type,
needle.type = type;
needle.len = len;
needle.data = data;
- return RB_FIND(attr_tree, &attrtable, &needle);
+ needle.hash = hash;
+
+ return CH_FIND(attr_tree, &attrtable, &needle);
}
void
@@ -308,7 +305,7 @@ attr_put(struct attr *a)
return;
/* unlink */
- RB_REMOVE(attr_tree, &attrtable, a);
+ CH_REMOVE(attr_tree, &attrtable, a);
if (a->len != 0)
rdemem.attr_dcnt--;
@@ -318,6 +315,8 @@ attr_put(struct attr *a)
free(a);
}
+CH_GENERATE(attr_tree, attr, attr_eq, attr_hash);
+
/* aspath specific functions */
static uint16_t aspath_count(const void *, uint16_t);
@@ -327,7 +326,7 @@ static void aspath_countcopy(struct asp
uint16_t, int);
int
-aspath_compare(struct aspath *a1, struct aspath *a2)
+aspath_compare(const struct aspath *a1, const struct aspath *a2)
{
int r;
Index: rde_community.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_community.c,v
diff -u -p -r1.16 rde_community.c
--- rde_community.c 10 Sep 2024 08:53:20 -0000 1.16
+++ rde_community.c 28 Aug 2025 09:34:10 -0000
@@ -19,12 +19,14 @@
#include <endian.h>
#include <limits.h>
+#include <siphash.h>
#include <stdlib.h>
#include <string.h>
#include "bgpd.h"
#include "rde.h"
#include "log.h"
+#include "chash.h"
static int
apply_flag(uint32_t in, uint8_t flag, struct rde_peer *peer, uint32_t *out,
@@ -225,9 +227,10 @@ insert_community(struct rde_community *c
struct community *new;
int newsize = comm->size + 8;
- if ((new = recallocarray(comm->communities, comm->size,
- newsize, sizeof(struct community))) == NULL)
+ if ((new = reallocarray(comm->communities, newsize,
+ sizeof(struct community))) == NULL)
fatal(__func__);
+ memset(&new[comm->size], 0, sizeof(struct community) * 8);
comm->communities = new;
comm->size = newsize;
}
@@ -250,6 +253,7 @@ insert_community(struct rde_community *c
/* insert community at slot l */
comm->communities[l] = *c;
comm->nentries++;
+ comm->flags |= PARTIAL_DIRTY;
}
static int
@@ -391,6 +395,7 @@ struct rde_peer *peer)
(char *)(comm->communities + comm->nentries) -
(char *)(match + 1));
comm->nentries--;
+ comm->flags |= PARTIAL_DIRTY;
return;
} else {
if (fc2c(fc, peer, &test, &mask) == -1)
@@ -403,6 +408,7 @@ struct rde_peer *peer)
comm->communities + l + 1,
(comm->nentries - l - 1) * sizeof(test));
comm->nentries--;
+ comm->flags |= PARTIAL_DIRTY;
continue;
}
l++;
@@ -620,32 +626,44 @@ community_writebuf(struct rde_community
/*
* Global RIB cache for communities
*/
-static inline int
-communities_compare(struct rde_community *a, struct rde_community *b)
-{
- if (a->nentries != b->nentries)
- return a->nentries > b->nentries ? 1 : -1;
- if (a->flags != b->flags)
- return a->flags > b->flags ? 1 : -1;
+static SIPHASH_KEY commkey;
- return memcmp(a->communities, b->communities,
- a->nentries * sizeof(struct community));
+static inline uint64_t
+communities_hash(const struct rde_community *comm)
+{
+ return comm->hash;
}
-RB_HEAD(comm_tree, rde_community) commtable = RB_INITIALIZER(&commtable);
-RB_GENERATE_STATIC(comm_tree, rde_community, entry, communities_compare);
+static inline void
+communities_calc_hash(struct rde_community *comm)
+{
+ SIPHASH_CTX ctx;
+
+ if (comm->flags & PARTIAL_DIRTY) {
+ comm->flags &= ~PARTIAL_DIRTY;
+ SipHash24_Init(&ctx, &commkey);
+ SipHash24_Update(&ctx, comm->communities,
+ comm->nentries * sizeof(struct community));
+ SipHash24_Update(&ctx, &comm->flags, sizeof(comm->flags));
+ comm->hash = SipHash24_End(&ctx);
+ }
+}
+
+CH_HEAD(comm_tree, rde_community) commtable = CH_INITIALIZER(&commtable);
+CH_PROTOTYPE(comm_tree, rde_community, communities_hash);
+CH_GENERATE(comm_tree, rde_community, communities_equal, communities_hash);
void
-communities_shutdown(void)
+communities_init(void)
{
- if (!RB_EMPTY(&commtable))
- log_warnx("%s: free non-free table", __func__);
+ arc4random_buf(&commkey, sizeof(commkey));
}
struct rde_community *
communities_lookup(struct rde_community *comm)
{
- return RB_FIND(comm_tree, &commtable, comm);
+ communities_calc_hash(comm);
+ return CH_FIND(comm_tree, &commtable, comm);
}
struct rde_community *
@@ -656,8 +674,12 @@ communities_link(struct rde_community *c
if ((n = malloc(sizeof(*n))) == NULL)
fatal(__func__);
communities_copy(n, comm);
+ communities_calc_hash(comm);
- if ((f = RB_INSERT(comm_tree, &commtable, n)) != NULL) {
+ switch (CH_INSERT(comm_tree, &commtable, n, &f)) {
+ case -1:
+ fatal(__func__);
+ case 0:
log_warnx("duplicate communities collection inserted");
free(n->communities);
free(n);
@@ -678,7 +700,7 @@ communities_unlink(struct rde_community
if (comm->refcnt != 1)
fatalx("%s: unlinking still referenced communities", __func__);
- RB_REMOVE(comm_tree, &commtable, comm);
+ CH_REMOVE(comm_tree, &commtable, comm);
rdemem.comm_size -= comm->size;
rdemem.comm_nmemb -= comm->nentries;
@@ -693,7 +715,7 @@ communities_unlink(struct rde_community
* otherwise returns zero.
*/
int
-communities_equal(struct rde_community *a, struct rde_community *b)
+communities_equal(const struct rde_community *a, const struct rde_community *b)
{
if (a->nentries != b->nentries)
return 0;
@@ -718,6 +740,7 @@ communities_copy(struct rde_community *t
to->size = from->nentries;
to->nentries = from->nentries;
to->flags = from->flags;
+ to->hash = from->hash;
if (to->nentries == 0)
return;
Index: rde_rib.c
===================================================================
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
diff -u -p -r1.269 rde_rib.c
--- rde_rib.c 24 Apr 2025 20:17:49 -0000 1.269
+++ rde_rib.c 15 Sep 2025 12:22:43 -0000
@@ -20,6 +20,7 @@
#include <sys/queue.h>
#include <limits.h>
+#include <siphash.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
@@ -27,6 +28,7 @@
#include "bgpd.h"
#include "rde.h"
#include "log.h"
+#include "chash.h"
/*
* BGP RIB -- Routing Information Base
@@ -596,63 +598,64 @@ rib_dump_subtree(uint16_t id, struct bgp
/* path specific functions */
-static struct rde_aspath *path_lookup(struct rde_aspath *);
+static struct rde_aspath *path_getcache(struct rde_aspath *);
static void path_link(struct rde_aspath *);
static void path_unlink(struct rde_aspath *);
-static inline int
-path_compare(struct rde_aspath *a, struct rde_aspath *b)
+static SIPHASH_KEY pathkey;
+
+static inline uint64_t
+path_hash(const struct rde_aspath *p)
{
- int r;
+ return p->hash;
+}
+
+static uint64_t
+path_calc_hash(const struct rde_aspath *p)
+{
+ SIPHASH_CTX ctx;
+
+ SipHash24_Init(&ctx, &pathkey);
+ SipHash24_Update(&ctx, PATH_HASHSTART(p), PATH_HASHSIZE);
+ SipHash24_Update(&ctx, aspath_dump(p->aspath),
+ aspath_length(p->aspath));
+ return SipHash24_End(&ctx);
+}
+static inline int
+path_equal(const struct rde_aspath *a, const struct rde_aspath *b)
+{
if (a == NULL && b == NULL)
- return (0);
- else if (b == NULL)
return (1);
+ else if (b == NULL)
+ return (0);
else if (a == NULL)
- return (-1);
- if ((a->flags & ~F_ATTR_LINKED) > (b->flags & ~F_ATTR_LINKED))
- return (1);
- if ((a->flags & ~F_ATTR_LINKED) < (b->flags & ~F_ATTR_LINKED))
- return (-1);
- if (a->origin > b->origin)
- return (1);
- if (a->origin < b->origin)
- return (-1);
- if (a->med > b->med)
- return (1);
- if (a->med < b->med)
- return (-1);
- if (a->lpref > b->lpref)
- return (1);
- if (a->lpref < b->lpref)
- return (-1);
- if (a->weight > b->weight)
- return (1);
- if (a->weight < b->weight)
- return (-1);
- if (a->rtlabelid > b->rtlabelid)
- return (1);
- if (a->rtlabelid < b->rtlabelid)
- return (-1);
- if (a->pftableid > b->pftableid)
- return (1);
- if (a->pftableid < b->pftableid)
- return (-1);
+ return (0);
- /* no need to check aspa_state or aspa_generation */
+ if ((a->flags & ~F_ATTR_LINKED) != (b->flags & ~F_ATTR_LINKED))
+ return (0);
+ if (a->origin != b->origin)
+ return (0);
+ if (a->med != b->med)
+ return (0);
+ if (a->lpref != b->lpref)
+ return (0);
+ if (a->weight != b->weight)
+ return (0);
+ if (a->rtlabelid != b->rtlabelid)
+ return (0);
+ if (a->pftableid != b->pftableid)
+ return (0);
- r = aspath_compare(a->aspath, b->aspath);
- if (r > 0)
- return (1);
- if (r < 0)
- return (-1);
+ /* no need to check aspa_state or aspa_generation */
- return (attr_compare(a, b));
+ if (aspath_compare(a->aspath, b->aspath) != 0)
+ return (0);
+ return (attr_equal(a, b));
}
-RB_HEAD(path_tree, rde_aspath) pathtable = RB_INITIALIZER(&pathtable);
-RB_GENERATE_STATIC(path_tree, rde_aspath, entry, path_compare);
+CH_HEAD(path_tree, rde_aspath) pathtable = CH_INITIALIZER(&pathtable);
+CH_PROTOTYPE(path_tree, rde_aspath, path_hash);
static inline struct rde_aspath *
path_ref(struct rde_aspath *asp)
@@ -679,16 +682,23 @@ path_unref(struct rde_aspath *asp)
}
void
-path_shutdown(void)
+path_init(void)
{
- if (!RB_EMPTY(&pathtable))
- log_warnx("path_free: free non-free table");
+ arc4random_buf(&pathkey, sizeof(pathkey));
}
static struct rde_aspath *
-path_lookup(struct rde_aspath *aspath)
+path_getcache(struct rde_aspath *aspath)
{
- return (RB_FIND(path_tree, &pathtable, aspath));
+ struct rde_aspath *asp;
+
+ aspath->hash = path_calc_hash(aspath);
+ if ((asp = CH_FIND(path_tree, &pathtable, aspath)) == NULL) {
+ /* Path not available, create and link a new one. */
+ asp = path_copy(path_get(), aspath);
+ path_link(asp);
+ }
+ return asp;
}
/*
@@ -698,7 +708,7 @@ path_lookup(struct rde_aspath *aspath)
static void
path_link(struct rde_aspath *asp)
{
- if (RB_INSERT(path_tree, &pathtable, asp) != NULL)
+ if (CH_INSERT(path_tree, &pathtable, asp, NULL) != 1)
fatalx("%s: already linked object", __func__);
asp->flags |= F_ATTR_LINKED;
}
@@ -717,7 +727,7 @@ path_unlink(struct rde_aspath *asp)
if (asp->refcnt != 0)
fatalx("%s: still holds references", __func__);
- RB_REMOVE(path_tree, &pathtable, asp);
+ CH_REMOVE(path_tree, &pathtable, asp);
asp->flags &= ~F_ATTR_LINKED;
path_put(asp);
@@ -734,6 +744,7 @@ path_copy(struct rde_aspath *dst, const
dst->refcnt = 0;
dst->flags = src->flags & ~F_ATTR_LINKED;
+ dst->hash = src->hash;
dst->med = src->med;
dst->lpref = src->lpref;
dst->weight = src->weight;
@@ -798,6 +809,8 @@ path_put(struct rde_aspath *asp)
free(asp);
}
+CH_GENERATE(path_tree, rde_aspath, path_equal, path_hash);
+
/* prefix specific functions */
static int prefix_add(struct bgpd_addr *, int, struct rib *,
@@ -985,7 +998,7 @@ prefix_update(struct rib *rib, struct rd
if (prefix_nexthop(p) == state->nexthop &&
prefix_nhflags(p) == state->nhflags &&
communities_equal(ncomm, prefix_communities(p)) &&
- path_compare(nasp, prefix_aspath(p)) == 0) {
+ path_equal(nasp, prefix_aspath(p))) {
/* no change, update last change */
p->lastchange = getmonotime();
p->validation_state = state->vstate;
@@ -1002,11 +1015,7 @@ prefix_update(struct rib *rib, struct rd
* In both cases lookup the new aspath to make sure it is not
* already in the RIB.
*/
- if ((asp = path_lookup(nasp)) == NULL) {
- /* Path not available, create and link a new one. */
- asp = path_copy(path_get(), nasp);
- path_link(asp);
- }
+ asp = path_getcache(nasp);
if ((comm = communities_lookup(ncomm)) == NULL) {
/* Communities not available, create and link a new one. */
@@ -1153,11 +1162,8 @@ prefix_flowspec_update(struct rde_peer *
nasp = &state->aspath;
ncomm = &state->communities;
- if ((asp = path_lookup(nasp)) == NULL) {
- /* Path not available, create and link a new one. */
- asp = path_copy(path_get(), nasp);
- path_link(asp);
- }
+ asp = path_getcache(nasp);
+
if ((comm = communities_lookup(ncomm)) == NULL) {
/* Communities not available, create and link a new one. */
comm = communities_link(ncomm);
@@ -1271,7 +1277,7 @@ prefix_adjout_update(struct prefix *p, s
prefix_nexthop(p) == state->nexthop &&
communities_equal(&state->communities,
prefix_communities(p)) &&
- path_compare(&state->aspath, prefix_aspath(p)) == 0) {
+ path_equal(&state->aspath, prefix_aspath(p))) {
/* nothing changed */
p->validation_state = state->vstate;
p->lastchange = getmonotime();
@@ -1306,11 +1312,7 @@ prefix_adjout_update(struct prefix *p, s
fatalx("%s: RB index invariant violated", __func__);
}
- if ((asp = path_lookup(&state->aspath)) == NULL) {
- /* Path not available, create and link a new one. */
- asp = path_copy(path_get(), &state->aspath);
- path_link(asp);
- }
+ asp = path_getcache(&state->aspath);
if ((comm = communities_lookup(&state->communities)) == NULL) {
/* Communities not available, create and link a new one. */
bgpd: introduce chash and start using it