From: David Gwynne Subject: use the SMR api instead of SRPs in art/rtable code To: tech@openbsd.org Date: Fri, 13 Jun 2025 12:03:37 +1000 there's other changes too, but moving to SMR is the important one. SRPs complicated this code massively, so moving it to SMR is almost like backing the SRP changes out. it becomes a lot more readable and similar to other ART implementations, which can't hurt. another change is strip non-ART code out of art.c, specifically the rwlock and route sourceaddr info. these features have been moved up a layer into rtable.c and are now stored as part of a new struct rtable. again, this makes the art code easier to read and compare. the other big change is to walking over an ART. there's now a reentract and non-recursive iterator api which art_walk and rtable_walk are now built on top of. rtable_walk can basically do a foreach over the ART directly rather than having to pass a bunch of state into callbacks to apply with art_walk. we can still do that if we want, i just find this easier to deal with in rtable_walk directly. i modified how the heaps are traversed inside ART as well to try and minimise the pointer accesses as much as possible. i'm sorry the diff is so big. i don't expect code reviews. it took me 4 years and at least 3 attempts to do this diff. however, tests would be much appreciated. it looks like the generated code is smaller too. Index: art.c =================================================================== RCS file: /cvs/src/sys/net/art.c,v diff -u -p -r1.31 art.c --- art.c 11 Nov 2023 12:17:50 -0000 1.31 +++ art.c 13 Jun 2025 01:59:37 -0000 @@ -22,6 +22,58 @@ * * Yoichi Hariguchi paper can be found at: * http://www.hariguchi.org/art/art.pdf + * + * This implementation is tweaked to minimise pointer traversal + * during lookups by only accessing the heaps it avoids following + * pointers to or through the art_table structures. + * + * The heap is an array of art_heap_entry values which have different + * meanings depending on their location. The majority of entries in + * the heap are used as pointers to nodes (struct an_node, aka leaf + * entries), or the next heap to traverse. These pointers are typed/ + * tagged by the low bit in their heap value, and must be masked + * appropriately before use. + * + * The first two entries in the heap are not used by the search + * algorithm, so they are used to store data associated with this + * part of the heap. The first entry (heap[0]) stores a pointer to + * an art_table struct associated with this heap. The second entry + * (heap[1]) stores the node pointer which would be in the parent + * heap if this table wasn't using it. This node pointer is also known + * as the default entry/route for the current table in the ART. + * + * Nodes contain the exact information about the prefix they represent + * in the ART, ie, the address and prefix length, and a pointer to + * data associated with that prefix (an_value). + * + * The relationships between the separate data structures looks + * like the following: + * + * +------------------+ + * | struct art | + * +------------------+ NULL + * | ^ + * | art_root | at_parent + * v | + * +------------------+ heap[0] +------------------+ + * | heap |------------>| struct art_table | + * +------------------+ +------------------+ + * | ^ + * | heap entry | at_parent + * v | + * +------------------+ heap[0] +------------------+ + * | heap |------------>| struct art_table | + * +------------------+ +------------------+ + * | + * | node entry + * v + * +------------------+ + * | struct art_node | + * +------------------+ + * | + * | an_value + * v + * "user" data */ #ifndef _KERNEL @@ -33,43 +85,96 @@ #include #include #include +#include #endif #include -int art_bindex(struct art_table *, const uint8_t *, int); -void art_allot(struct art_table *at, int, struct art_node *, - struct art_node *); -struct art_table *art_table_get(struct art_root *, struct art_table *, - int); -struct art_table *art_table_put(struct art_root *, struct art_table *); -struct art_node *art_table_insert(struct art_root *, struct art_table *, +/* + * Allotment Table. + */ +struct art_table { + art_heap_entry *at_heap; + struct art_table *at_parent; /* Parent table */ + + unsigned int at_index; /* Index in the parent table */ + unsigned int at_minfringe; /* Index that fringe begins */ + + unsigned int at_level; /* Level of the table */ + unsigned int at_bits; /* Stride length of the table */ + unsigned int at_offset; /* Sum of parents' stride len */ + + unsigned int at_refcnt; +}; + +static void art_allot(struct art_table *at, unsigned int, + art_heap_entry, art_heap_entry); +struct art_table *art_table_get(struct art *, struct art_table *, + unsigned int); +struct art_table *art_table_put(struct art *, struct art_table *); +struct art_node *art_table_insert(struct art *, struct art_table *, int, struct art_node *); -struct art_node *art_table_delete(struct art_root *, struct art_table *, +struct art_node *art_table_delete(struct art *, struct art_table *, int, struct art_node *); -struct art_table *art_table_ref(struct art_root *, struct art_table *); -int art_table_free(struct art_root *, struct art_table *); -int art_table_walk(struct art_root *, struct art_table *, - int (*f)(struct art_node *, void *), void *); -int art_walk_apply(struct art_root *, - struct art_node *, struct art_node *, - int (*f)(struct art_node *, void *), void *); +struct art_table *art_table_ref(struct art *, struct art_table *); +int art_table_free(struct art *, struct art_table *); void art_table_gc(void *); void art_gc(void *); -struct pool an_pool, at_pool, at_heap_4_pool, at_heap_8_pool; +static struct pool an_pool, at_pool, at_heap_4_pool, at_heap_8_pool; -struct art_table *art_table_gc_list = NULL; -struct mutex art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); -struct task art_table_gc_task = +static struct art_table *art_table_gc_list = NULL; +static struct mutex art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +static struct task art_table_gc_task = TASK_INITIALIZER(art_table_gc, NULL); -struct art_node *art_node_gc_list = NULL; -struct mutex art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); -struct task art_node_gc_task = TASK_INITIALIZER(art_gc, NULL); +static struct art_node *art_node_gc_list = NULL; +static struct mutex art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +static struct task art_node_gc_task = TASK_INITIALIZER(art_gc, NULL); + +#define ART_HEAP_IDX_TABLE 0 +#define ART_HEAP_IDX_DEFAULT 1 + +#define AT_HEAPSIZE(bits) ((1 << ((bits) + 1)) * sizeof(art_heap_entry)) + +static inline struct art_table * +art_heap_to_table(art_heap_entry *heap) +{ + return ((struct art_table *)heap[ART_HEAP_IDX_TABLE]); +} + +static inline int +art_heap_entry_is_node(art_heap_entry ahe) +{ + return (!ISSET(ahe, 1)); +} + +static inline struct art_node * +art_heap_entry_to_node(art_heap_entry ahe) +{ + return ((struct art_node *)ahe); +} + +static inline art_heap_entry * +art_heap_entry_to_heap(art_heap_entry ahe) +{ + return ((art_heap_entry *)(ahe & ~1ULL)); +} + +static inline art_heap_entry +art_node_to_heap_entry(struct art_node *an) +{ + return ((art_heap_entry)an); +} + +static inline art_heap_entry +art_heap_to_heap_entry(art_heap_entry *heap) +{ + return ((art_heap_entry)heap | 1ULL); +} void -art_init(void) +art_boot(void) { pool_init(&an_pool, sizeof(struct art_node), 0, IPL_SOFTNET, 0, "art_node", NULL); @@ -84,56 +189,74 @@ art_init(void) /* * Per routing table initialization API function. */ -struct art_root * -art_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) -{ - struct art_root *ar; - int i; - ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO); - if (ar == NULL) - return (NULL); +static const unsigned int art_plen32_levels[] = { + 8, 4, 4, 4, 4, 4, 4 +}; + +static const unsigned int art_plen128_levels[] = { + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 +}; + +static const unsigned int art_plen20_levels[] = { + 4, 4, 4, 4, 4 +}; + +void +art_init(struct art *art, unsigned int alen) +{ + const unsigned int *levels; + unsigned int nlevels; +#ifdef DIAGNOSTIC + unsigned int i; + unsigned int bits = 0; +#endif switch (alen) { case 32: - ar->ar_alen = 32; - ar->ar_nlvl = 7; - ar->ar_bits[0] = 8; - for (i = 1; i < ar->ar_nlvl; i++) - ar->ar_bits[i] = 4; + levels = art_plen32_levels; + nlevels = nitems(art_plen32_levels); break; case 128: - ar->ar_alen = 128; - ar->ar_nlvl = 32; - for (i = 0; i < ar->ar_nlvl; i++) - ar->ar_bits[i] = 4; + levels = art_plen128_levels; + nlevels = nitems(art_plen128_levels); + break; + case 20: + levels = art_plen20_levels; + nlevels = nitems(art_plen20_levels); break; default: - printf("%s: incorrect address length %u\n", __func__, alen); - free(ar, M_RTABLE, sizeof(*ar)); - return (NULL); + panic("no configuration for alen %u", alen); + /* NOTREACHED */ } - ar->ar_off = off; - rw_init(&ar->ar_lock, "art"); +#ifdef DIAGNOSTIC + for (i = 0; i < nlevels; i++) + bits += levels[i]; + + if (alen != bits) + panic("sum of levels %u != address len %u", bits, alen); +#endif /* DIAGNOSTIC */ - return (ar); + art->art_root = 0; + art->art_levels = levels; + art->art_nlevels = nlevels; + art->art_alen = alen; } -/* - * Return 1 if ``old'' and ``new`` are identical, 0 otherwise. - */ -static inline int -art_check_duplicate(struct art_root *ar, struct art_node *old, - struct art_node *new) +struct art * +art_alloc(unsigned int alen) { - if (old == NULL) - return (0); + struct art *art; - if (old->an_plen == new->an_plen) - return (1); + art = malloc(sizeof(*art), M_RTABLE, M_NOWAIT|M_ZERO); + if (art == NULL) + return (NULL); - return (0); + art_init(art, alen); + + return (art); } /* @@ -148,30 +271,31 @@ art_check_duplicate(struct art_root *ar, * 8bit-long tables, there's a maximum of 4 base indexes if the * prefix length is > 24. */ -int -art_bindex(struct art_table *at, const uint8_t *addr, int plen) +static unsigned int __pure +art_bindex(unsigned int offset, unsigned int bits, + const uint8_t *addr, unsigned int plen) { - uint8_t boff, bend; + unsigned int boff, bend; uint32_t k; - if (plen < at->at_offset || plen > (at->at_offset + at->at_bits)) - return (-1); + KASSERT(plen >= offset); + KASSERT(plen <= (offset + bits)); /* * We are only interested in the part of the prefix length * corresponding to the range of this table. */ - plen -= at->at_offset; + plen -= offset; /* * Jump to the first byte of the address containing bits * covered by this table. */ - addr += (at->at_offset / 8); + addr += (offset / 8); /* ``at'' covers the bit range between ``boff'' & ``bend''. */ - boff = (at->at_offset % 8); - bend = (at->at_bits + boff); + boff = (offset % 8); + bend = (bits + boff); KASSERT(bend <= 32); @@ -188,25 +312,13 @@ art_bindex(struct art_table *at, const u k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8); k |= addr[1] >> (16 - bend); } else { - k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1); + k = (addr[0] >> (8 - bend)) & ((1 << bits) - 1); } /* * Single level base index formula: */ - return ((k >> (at->at_bits - plen)) + (1 << plen)); -} - -/* - * Single level lookup function. - * - * Return the fringe index of the part of ``addr'' - * corresponding to the range covered by the table ``at''. - */ -static inline int -art_findex(struct art_table *at, const uint8_t *addr) -{ - return art_bindex(at, addr, (at->at_offset + at->at_bits)); + return ((k >> (bits - plen)) + (1 << plen)); } /* @@ -215,60 +327,94 @@ art_findex(struct art_table *at, const u * Return the best existing match for a destination. */ struct art_node * -art_match(struct art_root *ar, const void *addr, struct srp_ref *nsr) +art_match(struct art *art, const void *addr) { - struct srp_ref dsr, ndsr; - void *entry; - struct art_table *at; - struct art_node *dflt, *ndflt; - int j; - - entry = srp_enter(nsr, &ar->ar_root); - at = entry; + unsigned int offset = 0; + unsigned int level = 0; + art_heap_entry *heap; + art_heap_entry ahe, dahe = 0; + struct art_node *an; + int j; - if (at == NULL) - goto done; + SMR_ASSERT_CRITICAL(); - /* - * Remember the default route of each table we visit in case - * we do not find a better matching route. - */ - dflt = srp_enter(&dsr, &at->at_default); + heap = SMR_PTR_GET(&art->art_root); + if (heap == NULL) + return (NULL); /* * Iterate until we find a leaf. */ - while (1) { + for (;;) { + unsigned int bits = art->art_levels[level]; + unsigned int p = offset + bits; + + /* + * Remember the default route of each table we visit in case + * we do not find a better matching route. + */ + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]); + if (ahe != 0) + dahe = ahe; + /* Do a single level route lookup. */ - j = art_findex(at, addr); - entry = srp_follow(nsr, &at->at_heap[j].node); + j = art_bindex(offset, bits, addr, p); + ahe = SMR_PTR_GET(&heap[j]); /* If this is a leaf (NULL is a leaf) we're done. */ - if (ISLEAF(entry)) + if (art_heap_entry_is_node(ahe)) break; - at = SUBTABLE(entry); + heap = art_heap_entry_to_heap(ahe); + offset = p; + level++; + + KASSERT(level < art->art_nlevels); + } + + an = art_heap_entry_to_node(ahe); + if (an != NULL) + return (an); + + return art_heap_entry_to_node(dahe); +} + +static int +art_node_check(const struct art_node *an, + const uint8_t *addr, unsigned int plen) +{ + const uint32_t *wan = (const uint32_t *)an->an_addr; + const uint32_t *waddr = (const uint32_t *)addr; + unsigned int i = 0; + unsigned int shift; + + if (an->an_plen != plen) + return (0); + + while (plen >= 32) { + if (wan[i] != waddr[i]) + return (0); + + i++; + plen -= 32; + } + if (plen == 0) + return (1); + + i *= 4; + while (plen >= 8) { + if (an->an_addr[i] != addr[i]) + return (0); + + i++; + plen -= 8; + } + if (plen == 0) + return (1); + + shift = 8 - plen; - ndflt = srp_enter(&ndsr, &at->at_default); - if (ndflt != NULL) { - srp_leave(&dsr); - dsr = ndsr; - dflt = ndflt; - } else - srp_leave(&ndsr); - } - - if (entry == NULL) { - srp_leave(nsr); - *nsr = dsr; - KASSERT(ISLEAF(dflt)); - return (dflt); - } - - srp_leave(&dsr); -done: - KASSERT(ISLEAF(entry)); - return (entry); + return ((an->an_addr[i] >> shift) == (addr[i] >> shift)); } /* @@ -278,24 +424,28 @@ done: * it does not exist. */ struct art_node * -art_lookup(struct art_root *ar, const void *addr, int plen, struct srp_ref *nsr) +art_lookup(struct art *art, const void *addr, unsigned int plen) { - void *entry; - struct art_table *at; - int i, j; + unsigned int offset = 0; + unsigned int bits, p; + unsigned int level = 0; + art_heap_entry *heap; + art_heap_entry ahe; + struct art_node *an; + unsigned int i, j; - KASSERT(plen >= 0 && plen <= ar->ar_alen); + KASSERT(plen <= art->art_alen); - entry = srp_enter(nsr, &ar->ar_root); - at = entry; + SMR_ASSERT_CRITICAL(); - if (at == NULL) - goto done; + heap = SMR_PTR_GET(&art->art_root); + if (heap == NULL) + return (NULL); /* Default route */ if (plen == 0) { - entry = srp_follow(nsr, &at->at_default); - goto done; + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]); + return art_heap_entry_to_node(ahe); } /* @@ -303,31 +453,47 @@ art_lookup(struct art_root *ar, const vo * the stride length at this level the entry must * be in the current table. */ - while (plen > (at->at_offset + at->at_bits)) { + for (;;) { + bits = art->art_levels[level]; + p = offset + bits; + if (plen <= p) + break; + /* Do a single level route lookup. */ - j = art_findex(at, addr); - entry = srp_follow(nsr, &at->at_heap[j].node); + j = art_bindex(offset, bits, addr, p); + ahe = SMR_PTR_GET(&heap[j]); /* A leaf is a match, but not a perfect one, or NULL */ - if (ISLEAF(entry)) + if (art_heap_entry_is_node(ahe)) return (NULL); - at = SUBTABLE(entry); + heap = art_heap_entry_to_heap(ahe); + offset = p; + level++; + + KASSERT(level < art->art_nlevels); } - i = art_bindex(at, addr, plen); - if (i == -1) - return (NULL); + i = art_bindex(offset, bits, addr, plen); + ahe = SMR_PTR_GET(&heap[i]); + if (!art_heap_entry_is_node(ahe)) { + heap = art_heap_entry_to_heap(ahe); + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]); + } + + /* Make sure we've got a perfect match */ + an = art_heap_entry_to_node(ahe); + if (an != NULL && art_node_check(an, addr, plen)) + return (an); - entry = srp_follow(nsr, &at->at_heap[i].node); - if (!ISLEAF(entry)) - entry = srp_follow(nsr, &SUBTABLE(entry)->at_default); - -done: - KASSERT(ISLEAF(entry)); - return (entry); + return (NULL); } +int +art_is_empty(struct art *art) +{ + return (SMR_PTR_GET_LOCKED(&art->art_root) == NULL); +} /* * Insertion API function. @@ -336,32 +502,38 @@ done: * same destination/mask pair is already present. */ struct art_node * -art_insert(struct art_root *ar, struct art_node *an, const void *addr, int plen) +art_insert(struct art *art, struct art_node *an) { - struct art_table *at, *child; - struct art_node *node; - int i, j; - - rw_assert_wrlock(&ar->ar_lock); - KASSERT(plen >= 0 && plen <= ar->ar_alen); - - at = srp_get_locked(&ar->ar_root); - if (at == NULL) { - at = art_table_get(ar, NULL, -1); + unsigned int p; + art_heap_entry ahe, oahe, *ahep; + art_heap_entry *heap; + struct art_node *oan; + struct art_table *at; + unsigned int i, j; + + KASSERT(an->an_plen <= art->art_alen); + + heap = SMR_PTR_GET_LOCKED(&art->art_root); + if (heap == NULL) { + at = art_table_get(art, NULL, -1); if (at == NULL) return (NULL); - srp_swap_locked(&ar->ar_root, at); - } + heap = at->at_heap; + SMR_PTR_SET_LOCKED(&art->art_root, heap); + } else + at = art_heap_to_table(heap); /* Default route */ - if (plen == 0) { - node = srp_get_locked(&at->at_default); - if (node != NULL) - return (node); + if (an->an_plen == 0) { + ahep = &heap[ART_HEAP_IDX_DEFAULT]; + oahe = SMR_PTR_GET_LOCKED(ahep); + oan = art_heap_entry_to_node(oahe); + if (oan != NULL) + return (oan); - art_table_ref(ar, at); - srp_swap_locked(&at->at_default, an); + art_table_ref(art, at); + SMR_PTR_SET_LOCKED(ahep, art_node_to_heap_entry(an)); return (an); } @@ -370,10 +542,11 @@ art_insert(struct art_root *ar, struct a * the stride length at this level the entry must * be in the current table. */ - while (plen > (at->at_offset + at->at_bits)) { + while ((p = at->at_offset + at->at_bits) < an->an_plen) { /* Do a single level route lookup. */ - j = art_findex(at, addr); - node = srp_get_locked(&at->at_heap[j].node); + j = art_bindex(at->at_offset, at->at_bits, an->an_addr, p); + ahep = &heap[j]; + ahe = SMR_PTR_GET_LOCKED(ahep); /* * If the node corresponding to the fringe index is @@ -381,84 +554,86 @@ art_insert(struct art_root *ar, struct a * entry of this node will then become the default * route of the subtable. */ - if (ISLEAF(node)) { - child = art_table_get(ar, at, j); + if (art_heap_entry_is_node(ahe)) { + struct art_table *child = art_table_get(art, at, j); if (child == NULL) return (NULL); - art_table_ref(ar, at); - srp_swap_locked(&at->at_heap[j].node, ASNODE(child)); + art_table_ref(art, at); + at = child; - } else - at = SUBTABLE(node); + heap = at->at_heap; + SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe); + SMR_PTR_SET_LOCKED(ahep, art_heap_to_heap_entry(heap)); + } else { + heap = art_heap_entry_to_heap(ahe); + at = art_heap_to_table(heap); + } } - i = art_bindex(at, addr, plen); - if (i == -1) - return (NULL); - - return (art_table_insert(ar, at, i, an)); -} - -/* - * Single level insertion. - */ -struct art_node * -art_table_insert(struct art_root *ar, struct art_table *at, int i, - struct art_node *an) -{ - struct art_node *prev, *node; - - node = srp_get_locked(&at->at_heap[i].node); - if (!ISLEAF(node)) - prev = srp_get_locked(&SUBTABLE(node)->at_default); - else - prev = node; - - if (art_check_duplicate(ar, prev, an)) - return (prev); + i = art_bindex(at->at_offset, at->at_bits, an->an_addr, an->an_plen); + ahep = &heap[i]; + oahe = SMR_PTR_GET_LOCKED(ahep); + if (!art_heap_entry_is_node(oahe)) { + heap = art_heap_entry_to_heap(oahe); + ahep = &heap[ART_HEAP_IDX_DEFAULT]; + oahe = SMR_PTR_GET_LOCKED(ahep); + } - art_table_ref(ar, at); + /* Check if there's an existing node */ + oan = art_heap_entry_to_node(oahe); + if (oan != NULL && art_node_check(oan, an->an_addr, an->an_plen)) + return (oan); /* * If the index `i' of the route that we are inserting is not * a fringe index, we need to allot this new route pointer to * all the corresponding fringe indices. */ + art_table_ref(art, at); + ahe = art_node_to_heap_entry(an); if (i < at->at_minfringe) - art_allot(at, i, prev, an); - else if (!ISLEAF(node)) - srp_swap_locked(&SUBTABLE(node)->at_default, an); + art_allot(at, i, oahe, ahe); else - srp_swap_locked(&at->at_heap[i].node, an); + SMR_PTR_SET_LOCKED(ahep, ahe); return (an); } - /* * Deletion API function. */ struct art_node * -art_delete(struct art_root *ar, struct art_node *an, const void *addr, int plen) +art_delete(struct art *art, const void *addr, unsigned int plen) { + unsigned int p; + art_heap_entry *heap; struct art_table *at; - struct art_node *node; - int i, j; + art_heap_entry ahe, pahe, *ahep; + struct art_node *an; + unsigned int i, j; - rw_assert_wrlock(&ar->ar_lock); - KASSERT(plen >= 0 && plen <= ar->ar_alen); + KASSERT(plen <= art->art_alen); - at = srp_get_locked(&ar->ar_root); - if (at == NULL) + heap = SMR_PTR_GET_LOCKED(&art->art_root); + if (heap == NULL) return (NULL); + at = art_heap_to_table(heap); + /* Default route */ if (plen == 0) { - node = srp_get_locked(&at->at_default); - srp_swap_locked(&at->at_default, NULL); - art_table_free(ar, at); - return (node); + ahep = &heap[ART_HEAP_IDX_DEFAULT]; + + ahe = SMR_PTR_GET_LOCKED(ahep); + an = art_heap_entry_to_node(ahe); + if (an == NULL) + return (NULL); + + SMR_PTR_SET_LOCKED(ahep, 0); + art_table_free(art, at); + + return (an); } /* @@ -466,53 +641,37 @@ art_delete(struct art_root *ar, struct a * the stride length at this level the entry must * be in the current table. */ - while (plen > (at->at_offset + at->at_bits)) { + while ((p = at->at_offset + at->at_bits) < plen) { /* Do a single level route lookup. */ - j = art_findex(at, addr); - node = srp_get_locked(&at->at_heap[j].node); + j = art_bindex(at->at_offset, at->at_bits, addr, p); + ahe = SMR_PTR_GET_LOCKED(&heap[j]); /* If this is a leaf, there is no route to delete. */ - if (ISLEAF(node)) + if (art_heap_entry_is_node(ahe)) return (NULL); - at = SUBTABLE(node); + heap = art_heap_entry_to_heap(ahe); + at = art_heap_to_table(heap); } - i = art_bindex(at, addr, plen); - if (i == -1) + i = art_bindex(at->at_offset, at->at_bits, addr, plen); + ahep = &heap[i]; + ahe = SMR_PTR_GET_LOCKED(ahep); + if (!art_heap_entry_is_node(ahe)) { + art_heap_entry *nheap = art_heap_entry_to_heap(ahe); + ahep = &nheap[ART_HEAP_IDX_DEFAULT]; + ahe = SMR_PTR_GET_LOCKED(ahep); + } + + an = art_heap_entry_to_node(ahe); + if (an == NULL) { + /* No route to delete */ return (NULL); - - return (art_table_delete(ar, at, i, an)); -} - -/* - * Single level deletion. - */ -struct art_node * -art_table_delete(struct art_root *ar, struct art_table *at, int i, - struct art_node *an) -{ - struct art_node *next, *node; - -#ifdef DIAGNOSTIC - struct art_node *prev; -#endif - - node = srp_get_locked(&at->at_heap[i].node); -#ifdef DIAGNOSTIC - if (!ISLEAF(node)) - prev = srp_get_locked(&SUBTABLE(node)->at_default); - else - prev = node; - - KASSERT(prev == an); -#endif + } /* Get the next most specific route for the index `i'. */ - if ((i >> 1) > 1) - next = srp_get_locked(&at->at_heap[i >> 1].node); - else - next = NULL; + j = i >> 1; + pahe = (j > 1) ? SMR_PTR_GET_LOCKED(&heap[j]) : 0; /* * If the index `i' of the route that we are removing is not @@ -520,20 +679,18 @@ art_table_delete(struct art_root *ar, st * route pointer to all the corresponding fringe indices. */ if (i < at->at_minfringe) - art_allot(at, i, an, next); - else if (!ISLEAF(node)) - srp_swap_locked(&SUBTABLE(node)->at_default, next); + art_allot(at, i, ahe, pahe); else - srp_swap_locked(&at->at_heap[i].node, next); + SMR_PTR_SET_LOCKED(ahep, pahe); /* We have removed an entry from this table. */ - art_table_free(ar, at); + art_table_free(art, at); return (an); } struct art_table * -art_table_ref(struct art_root *ar, struct art_table *at) +art_table_ref(struct art *art, struct art_table *at) { at->at_refcnt++; return (at); @@ -544,12 +701,11 @@ art_table_rele(struct art_table *at) { if (at == NULL) return (0); - return (--at->at_refcnt == 0); } int -art_table_free(struct art_root *ar, struct art_table *at) +art_table_free(struct art *art, struct art_table *at) { if (art_table_rele(at)) { /* @@ -557,7 +713,7 @@ art_table_free(struct art_root *ar, stru * that are empty. */ do { - at = art_table_put(ar, at); + at = art_table_put(art, at); } while (art_table_rele(at)); return (1); @@ -569,116 +725,187 @@ art_table_free(struct art_root *ar, stru /* * Iteration API function. */ -int -art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg) + +static struct art_node * +art_iter_descend(struct art_iter *ai, art_heap_entry *heap, + art_heap_entry pahe) { - struct srp_ref sr; - struct art_table *at; - struct art_node *node; - int error = 0; + struct art_table *at; + art_heap_entry ahe; - rw_enter_write(&ar->ar_lock); - at = srp_get_locked(&ar->ar_root); - if (at != NULL) { - art_table_ref(ar, at); + at = art_heap_to_table(heap); + ai->ai_table = art_table_ref(ai->ai_art, at); - /* - * The default route should be processed here because the root - * table does not have a parent. - */ - node = srp_enter(&sr, &at->at_default); - error = art_walk_apply(ar, node, NULL, f, arg); - srp_leave(&sr); + /* + * Start looking at non-fringe nodes. + */ + ai->ai_j = 1; + + /* + * The default route (index 1) is processed by the + * parent table (where it belongs) otherwise it could + * be processed more than once. + */ + ai->ai_i = 2; - if (error == 0) - error = art_table_walk(ar, at, f, arg); + /* + * Process the default route now. + */ + ahe = SMR_PTR_GET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT]); + if (ahe != 0 && ahe != pahe) + return (art_heap_entry_to_node(ahe)); + + /* + * Tell the caller to proceed with art_iter_next. + */ + return (NULL); +} + +struct art_node * +art_iter_open(struct art *art, struct art_iter *ai) +{ + art_heap_entry *heap = SMR_PTR_GET(&art->art_root); + struct art_node *an; - art_table_free(ar, at); + ai->ai_art = art; + + if (heap == NULL) { + /* empty, we're already done */ + ai->ai_table = NULL; + return (NULL); } - rw_exit_write(&ar->ar_lock); - return (error); + an = art_iter_descend(ai, heap, 0); + if (an != NULL) + return (an); /* default route */ + + return (art_iter_next(ai)); } -int -art_table_walk(struct art_root *ar, struct art_table *at, - int (*f)(struct art_node *, void *), void *arg) +struct art_node * +art_iter_next(struct art_iter *ai) { - struct srp_ref sr; - struct art_node *node, *next; - struct art_table *nat; - int i, j, error = 0; - uint32_t maxfringe = (at->at_minfringe << 1); + struct art_table *at = ai->ai_table; + art_heap_entry *heap = at->at_heap; + art_heap_entry ahe, pahe; + unsigned int i; +descend: /* * Iterate non-fringe nodes in ``natural'' order. */ - for (j = 1; j < at->at_minfringe; j += 2) { - /* - * The default route (index 1) is processed by the - * parent table (where it belongs) otherwise it could - * be processed more than once. - */ - for (i = max(j, 2); i < at->at_minfringe; i <<= 1) { - next = srp_get_locked(&at->at_heap[i >> 1].node); - - node = srp_enter(&sr, &at->at_heap[i].node); - error = art_walk_apply(ar, node, next, f, arg); - srp_leave(&sr); + if (ai->ai_j < at->at_minfringe) { + for (;;) { + while ((i = ai->ai_i) < at->at_minfringe) { + ai->ai_i = i << 1; + + pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]); + ahe = SMR_PTR_GET_LOCKED(&heap[i]); + if (ahe != 0 && ahe != pahe) + return (art_heap_entry_to_node(ahe)); + } - if (error != 0) - return (error); + ai->ai_j += 2; + if (ai->ai_j < at->at_minfringe) + ai->ai_i = ai->ai_j; + else { + /* Set up the fringe loop */ + ai->ai_i = at->at_minfringe; + break; + } } } /* - * Iterate fringe nodes. + * Descendent tables are only found on fringe nodes, and + * they record where they were placed in their parent. This + * allows the iterator to know where to resume traversal when + * it ascends back to the parent table. By keeping the table + * refs when going down the tree, the topolgy is preserved + * even if all the nodes are removed. */ - for (i = at->at_minfringe; i < maxfringe; i++) { - next = srp_get_locked(&at->at_heap[i >> 1].node); - node = srp_enter(&sr, &at->at_heap[i].node); - if (!ISLEAF(node)) { - nat = art_table_ref(ar, SUBTABLE(node)); - node = srp_follow(&sr, &nat->at_default); - } else - nat = NULL; + for (;;) { + unsigned int maxfringe = at->at_minfringe << 1; + struct art_table *parent; - error = art_walk_apply(ar, node, next, f, arg); - srp_leave(&sr); + /* + * Iterate fringe nodes. + */ + while ((i = ai->ai_i) < maxfringe) { + ai->ai_i = i + 1; - if (error != 0) { - art_table_free(ar, nat); - return (error); + pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]); + ahe = SMR_PTR_GET_LOCKED(&heap[i]); + if (art_heap_entry_is_node(ahe)) { + if (ahe != 0 && ahe != pahe) + return (art_heap_entry_to_node(ahe)); + } else { + struct art_node *an; + + heap = art_heap_entry_to_heap(ahe); + + an = art_iter_descend(ai, heap, pahe); + if (an != NULL) /* default route? */ + return (an); + + /* Start looping over the child table */ + at = art_heap_to_table(heap); + goto descend; + } } - if (nat != NULL) { - error = art_table_walk(ar, nat, f, arg); - art_table_free(ar, nat); - if (error != 0) - return (error); + /* + * Ascend back up to the parent + */ + parent = at->at_parent; + ai->ai_i = at->at_index + 1; + art_table_free(ai->ai_art, at); + + ai->ai_table = parent; + if (parent == NULL) { + /* The root table has no parent */ + break; } + + at = parent; + ai->ai_j = at->at_minfringe; + heap = at->at_heap; } - return (0); + return (NULL); } -int -art_walk_apply(struct art_root *ar, - struct art_node *an, struct art_node *next, - int (*f)(struct art_node *, void *), void *arg) +void +art_iter_close(struct art_iter *ai) { - int error = 0; + struct art_table *at, *parent; - if ((an != NULL) && (an != next)) { - rw_exit_write(&ar->ar_lock); - error = (*f)(an, arg); - rw_enter_write(&ar->ar_lock); + for (at = ai->ai_table; at != NULL; at = parent) { + parent = at->at_parent; + art_table_free(ai->ai_art, at); } - return (error); + ai->ai_table = NULL; } +int +art_walk(struct art *art, int (*f)(struct art_node *, void *), void *arg) +{ + struct art_iter ai; + struct art_node *an; + int error = 0; + + ART_FOREACH(an, art, &ai) { + error = f(an, arg); + if (error != 0) { + art_iter_close(&ai); + return (error); + } + } + + return (0); +} /* * Create a table and use the given index to set its default route. @@ -686,89 +913,90 @@ art_walk_apply(struct art_root *ar, * Note: This function does not modify the root or the parent. */ struct art_table * -art_table_get(struct art_root *ar, struct art_table *parent, int j) +art_table_get(struct art *art, struct art_table *parent, unsigned int j) { struct art_table *at; - struct art_node *node; - void *at_heap; - uint32_t lvl; + art_heap_entry *heap; + unsigned int level; + unsigned int bits; - KASSERT(j != 0 && j != 1); + KASSERT(j != ART_HEAP_IDX_TABLE && j != ART_HEAP_IDX_DEFAULT); KASSERT(parent != NULL || j == -1); - if (parent != NULL) - lvl = parent->at_level + 1; - else - lvl = 0; - - KASSERT(lvl < ar->ar_nlvl); + level = (parent != NULL) ? parent->at_level + 1 : 0; + KASSERT(level < art->art_nlevels); at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO); if (at == NULL) return (NULL); - switch (AT_HEAPSIZE(ar->ar_bits[lvl])) { - case AT_HEAPSIZE(4): - at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO); + bits = art->art_levels[level]; + switch (bits) { + case 4: + heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO); break; - case AT_HEAPSIZE(8): - at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO); + case 8: + heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO); break; default: - panic("incorrect stride length %u", ar->ar_bits[lvl]); + panic("incorrect stride length %u", bits); } - if (at_heap == NULL) { + if (heap == NULL) { pool_put(&at_pool, at); return (NULL); } + heap[ART_HEAP_IDX_TABLE] = (art_heap_entry)at; + at->at_parent = parent; at->at_index = j; - at->at_minfringe = (1 << ar->ar_bits[lvl]); - at->at_level = lvl; - at->at_bits = ar->ar_bits[lvl]; - at->at_heap = at_heap; + at->at_minfringe = (1 << bits); + at->at_level = level; + at->at_bits = bits; + at->at_heap = heap; at->at_refcnt = 0; if (parent != NULL) { - node = srp_get_locked(&parent->at_heap[j].node); - /* node isn't being deleted, no srp_finalize needed */ - srp_swap_locked(&at->at_default, node); + art_heap_entry ahe; + at->at_offset = (parent->at_offset + parent->at_bits); + + ahe = SMR_PTR_GET_LOCKED(&parent->at_heap[j]); + SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe); } return (at); } - /* * Delete a table and use its index to restore its parent's default route. * * Note: Modify its parent to unlink the table from it. */ struct art_table * -art_table_put(struct art_root *ar, struct art_table *at) +art_table_put(struct art *art, struct art_table *at) { struct art_table *parent = at->at_parent; - struct art_node *node; - uint32_t j = at->at_index; + unsigned int j = at->at_index; KASSERT(at->at_refcnt == 0); KASSERT(j != 0 && j != 1); if (parent != NULL) { + art_heap_entry ahe; + KASSERT(j != -1); KASSERT(at->at_level == parent->at_level + 1); KASSERT(parent->at_refcnt >= 1); /* Give the route back to its parent. */ - node = srp_get_locked(&at->at_default); - srp_swap_locked(&parent->at_heap[j].node, node); + ahe = SMR_PTR_GET_LOCKED(&at->at_heap[ART_HEAP_IDX_DEFAULT]); + SMR_PTR_SET_LOCKED(&parent->at_heap[j], ahe); } else { KASSERT(j == -1); KASSERT(at->at_level == 0); - srp_swap_locked(&ar->ar_root, NULL); + SMR_PTR_SET_LOCKED(&art->art_root, NULL); } mtx_enter(&art_table_gc_mtx); @@ -791,19 +1019,16 @@ art_table_gc(void *null) art_table_gc_list = NULL; mtx_leave(&art_table_gc_mtx); + smr_barrier(); + while (at != NULL) { next = at->at_parent; - if (at->at_level == 0) - srp_finalize(at, "arttfini"); - else - srp_finalize(ASNODE(at), "arttfini"); - - switch (AT_HEAPSIZE(at->at_bits)) { - case AT_HEAPSIZE(4): + switch (at->at_bits) { + case 4: pool_put(&at_heap_4_pool, at->at_heap); break; - case AT_HEAPSIZE(8): + case 8: pool_put(&at_heap_8_pool, at->at_heap); break; default: @@ -839,40 +1064,45 @@ art_table_gc(void *null) * art_allot(at, k, old, new); * } */ -void -art_allot(struct art_table *at, int i, struct art_node *old, - struct art_node *new) -{ - struct art_node *node, *dflt; - int k = i; +static void +art_allot(struct art_table *at, unsigned int i, + art_heap_entry oahe, art_heap_entry nahe) +{ + art_heap_entry ahe, *ahep; + art_heap_entry *heap; + unsigned int k = i; KASSERT(i < at->at_minfringe); + heap = at->at_heap; + again: k <<= 1; if (k < at->at_minfringe) goto nonfringe; /* Change fringe nodes. */ - while (1) { - node = srp_get_locked(&at->at_heap[k].node); - if (!ISLEAF(node)) { - dflt = srp_get_locked(&SUBTABLE(node)->at_default); - if (dflt == old) { - srp_swap_locked(&SUBTABLE(node)->at_default, - new); - } - } else if (node == old) { - srp_swap_locked(&at->at_heap[k].node, new); + for (;;) { + ahep = &heap[k]; + ahe = SMR_PTR_GET_LOCKED(ahep); + + if (!art_heap_entry_is_node(ahe)) { + art_heap_entry *child = art_heap_entry_to_heap(ahe); + ahep = &child[ART_HEAP_IDX_DEFAULT]; + ahe = SMR_PTR_GET_LOCKED(ahep); } + + if (ahe == oahe) + SMR_PTR_SET_LOCKED(ahep, nahe); + if (k % 2) goto moveup; k++; } nonfringe: - node = srp_get_locked(&at->at_heap[k].node); - if (node == old) + ahe = SMR_PTR_GET_LOCKED(&heap[k]); + if (ahe == oahe) goto again; moveon: if (k % 2) @@ -881,7 +1111,7 @@ moveon: goto nonfringe; moveup: k >>= 1; - srp_swap_locked(&at->at_heap[k].node, new); + SMR_PTR_SET_LOCKED(&heap[k], nahe); /* Change non-fringe node. */ if (k != i) @@ -889,7 +1119,7 @@ moveup: } struct art_node * -art_get(uint8_t plen) +art_get(const uint8_t *addr, unsigned int plen) { struct art_node *an; @@ -897,17 +1127,25 @@ art_get(uint8_t plen) if (an == NULL) return (NULL); - an->an_plen = plen; - SRPL_INIT(&an->an_rtlist); + art_node_init(an, addr, plen); return (an); } void -art_put(struct art_node *an) +art_node_init(struct art_node *an, const uint8_t *addr, unsigned int plen) { - KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist)); + size_t len; + + len = roundup(plen, 8) / 8; + KASSERT(len <= sizeof(an->an_addr)); + memcpy(an->an_addr, addr, len); + an->an_plen = plen; +} +void +art_put(struct art_node *an) +{ mtx_enter(&art_node_gc_mtx); an->an_gc = art_node_gc_list; art_node_gc_list = an; @@ -919,17 +1157,17 @@ art_put(struct art_node *an) void art_gc(void *null) { - struct art_node *an, *next; + struct art_node *an; mtx_enter(&art_node_gc_mtx); an = art_node_gc_list; art_node_gc_list = NULL; mtx_leave(&art_node_gc_mtx); - while (an != NULL) { - next = an->an_gc; + smr_barrier(); - srp_finalize(an, "artnfini"); + while (an != NULL) { + struct art_node *next = an->an_gc; pool_put(&an_pool, an); Index: art.h =================================================================== RCS file: /cvs/src/sys/net/art.h,v diff -u -p -r1.25 art.h --- art.h 11 Nov 2023 12:17:50 -0000 1.25 +++ art.h 13 Jun 2025 01:59:37 -0000 @@ -19,94 +19,90 @@ #ifndef _NET_ART_H_ #define _NET_ART_H_ -#include -#include - -#define ART_MAXLVL 32 /* We currently use 32 levels for IPv6. */ - /* - * Root of the ART tables, equivalent to the radix head. + * Allotment Routing Table (ART) + * + * Yoichi Hariguchi paper can be found at: + * http://www.hariguchi.org/art/art.pdf + * + * Locking: + * + * Modification (ie, art_insert or art_delete) and iteration + * (art_iter_next, etc) over the ART must be serialised by the caller. + * Lookups (ie, art_match and art_lookup) run within an SMR critical + * section. * - * Locks used to protect struct members in this file: - * I immutable after creation - * l root's `ar_lock' - * K kernel lock - * For SRP related structures that allow lock-free reads, the write lock - * is indicated below. + * Iteration requires serialisation as it manipulates the reference + * counts on tables as it traverses the tree. The iterator maintains + * these references until it runs out of entries. This allows code + * iterating over the ART to release locks in between calls to + * art_iter_open and art_iter_next. The references may be dropped + * early with art_iter_close. + * + * Note, the iterator does not hold a reference to the art_node + * structure or the data hanging off the an_value pointer, they must + * be accounted for separately or their use must be serialised with + * art_delete. */ -struct art_root { - struct srp ar_root; /* [l] First table */ - struct rwlock ar_lock; /* [] Serialise modifications */ - uint8_t ar_bits[ART_MAXLVL]; /* [I] Per level stride */ - uint8_t ar_nlvl; /* [I] Number of levels */ - uint8_t ar_alen; /* [I] Address length in bits */ - uint8_t ar_off; /* [I] Offset of key in bytes */ - struct sockaddr *ar_source; /* [N] use optional src addr */ -}; -#define ISLEAF(e) (((unsigned long)(e) & 1) == 0) -#define SUBTABLE(e) ((struct art_table *)((unsigned long)(e) & ~1)) -#define ASNODE(t) ((struct art_node *)((unsigned long)(t) | 1)) +typedef uintptr_t art_heap_entry; /* - * Allotment Table. + * Root of the ART, equivalent to the radix head. */ -struct art_table { - struct art_table *at_parent; /* Parent table */ - uint32_t at_index; /* Index in the parent table */ - uint32_t at_minfringe; /* Index that fringe begins */ - uint32_t at_level; /* Level of the table */ - uint8_t at_bits; /* Stride length of the table */ - uint8_t at_offset; /* Sum of parents' stride len */ - - /* - * Items stored in the heap are pointers to nodes, in the leaf - * case, or tables otherwise. One exception is index 0 which - * is a route counter. - */ - union { - struct srp node; - unsigned long count; - } *at_heap; /* Array of 2^(slen+1) items */ -}; -#define at_refcnt at_heap[0].count/* Refcounter (1 per different route) */ -#define at_default at_heap[1].node /* Default route (was in parent heap) */ - -/* Heap size for an ART table of stride length ``slen''. */ -#define AT_HEAPSIZE(slen) ((1 << ((slen) + 1)) * sizeof(void *)) -/* - * Forward declaration needed for the list of mpath routes - * attached to a single ART node. - */ -struct rtentry; +struct art { + art_heap_entry *art_root; + const unsigned int *art_levels; + unsigned int art_nlevels; + unsigned int art_alen; +}; /* * A node is the internal representation of a route entry. */ struct art_node { + void *an_value; union { - SRPL_HEAD(, rtentry) an__rtlist; /* Route related to this node */ - struct art_node *an__gc; /* Entry on GC list */ - } an_pointer; - uint8_t an_plen; /* Prefix length */ + struct art_node *an__gc; + uint8_t an__addr[16]; + } an__u; +#define an_gc an__u.an__gc +#define an_addr an__u.an__addr + unsigned int an_plen; }; -#define an_rtlist an_pointer.an__rtlist -#define an_gc an_pointer.an__gc - -void art_init(void); -struct art_root *art_alloc(unsigned int, unsigned int, unsigned int); -struct art_node *art_insert(struct art_root *, struct art_node *, const void *, - int); -struct art_node *art_delete(struct art_root *, struct art_node *, const void *, - int); -struct art_node *art_match(struct art_root *, const void *, struct srp_ref *); -struct art_node *art_lookup(struct art_root *, const void *, int, - struct srp_ref *); -int art_walk(struct art_root *, - int (*)(struct art_node *, void *), void *); -struct art_node *art_get(uint8_t); +void art_boot(void); +struct art *art_alloc(unsigned int); +void art_init(struct art *, unsigned int); +struct art_node *art_insert(struct art *, struct art_node *); +struct art_node *art_delete(struct art *, const void *, unsigned int); +struct art_node *art_match(struct art *, const void *); +struct art_node *art_lookup(struct art *, const void *, unsigned int); +int art_is_empty(struct art *); + +struct art_node *art_get(const uint8_t *, unsigned int); +void art_node_init(struct art_node *, + const uint8_t *, unsigned int); void art_put(struct art_node *); + +struct art_iter { + struct art *ai_art; + struct art_table *ai_table; + unsigned int ai_j; + unsigned int ai_i; +}; + +struct art_node *art_iter_open(struct art *, struct art_iter *); +struct art_node *art_iter_next(struct art_iter *); +void art_iter_close(struct art_iter *); + +#define ART_FOREACH(_an, _art, _ai) \ + for ((_an) = art_iter_open((_art), (_ai)); \ + (_an) != NULL; \ + (_an) = art_iter_next((_ai))) + +int art_walk(struct art *, + int (*)(struct art_node *, void *), void *); #endif /* _NET_ART_H_ */ Index: if_tun.c =================================================================== RCS file: /cvs/src/sys/net/if_tun.c,v diff -u -p -r1.251 if_tun.c --- if_tun.c 2 Mar 2025 21:28:32 -0000 1.251 +++ if_tun.c 13 Jun 2025 01:59:37 -0000 @@ -318,8 +318,6 @@ tun_clone_destroy(struct ifnet *ifp) if (vfinddev(dev, VCHR, &vp)) VOP_REVOKE(vp, REVOKEALL); - - KASSERT(sc->sc_dev == 0); } /* prevent userland from getting to the device again */ Index: if_wg.c =================================================================== RCS file: /cvs/src/sys/net/if_wg.c,v diff -u -p -r1.42 if_wg.c --- if_wg.c 17 Apr 2025 12:42:50 -0000 1.42 +++ if_wg.c 13 Jun 2025 01:59:37 -0000 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -251,10 +252,11 @@ struct wg_softc { struct socket *sc_so6; #endif + struct rwlock sc_aip_lock; size_t sc_aip_num; - struct art_root *sc_aip4; + struct art *sc_aip4; #ifdef INET6 - struct art_root *sc_aip6; + struct art *sc_aip6; #endif struct rwlock sc_peer_lock; @@ -290,7 +292,7 @@ void wg_peer_counters_add(struct wg_peer int wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *); struct wg_peer * - wg_aip_lookup(struct art_root *, void *); + wg_aip_lookup(struct art *, void *); int wg_aip_remove(struct wg_softc *, struct wg_peer *, struct wg_aip_io *); @@ -609,7 +611,7 @@ wg_peer_counters_add(struct wg_peer *pee int wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d) { - struct art_root *root; + struct art *root; struct art_node *node; struct wg_aip *aip; int ret = 0; @@ -625,8 +627,10 @@ wg_aip_add(struct wg_softc *sc, struct w if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL) return ENOBUFS; - rw_enter_write(&root->ar_lock); - node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr); + art_node_init(&aip->a_node, &d->a_addr.addr_bytes, d->a_cidr); + + rw_enter_write(&sc->sc_aip_lock); + node = art_insert(root, &aip->a_node); if (node == &aip->a_node) { aip->a_peer = peer; @@ -642,18 +646,18 @@ wg_aip_add(struct wg_softc *sc, struct w aip->a_peer = peer; } } - rw_exit_write(&root->ar_lock); + rw_exit_write(&sc->sc_aip_lock); return ret; } struct wg_peer * -wg_aip_lookup(struct art_root *root, void *addr) +wg_aip_lookup(struct art *root, void *addr) { - struct srp_ref sr; struct art_node *node; - node = art_match(root, addr, &sr); - srp_leave(&sr); + smr_read_enter(); + node = art_match(root, addr); + smr_read_leave(); return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer; } @@ -661,8 +665,7 @@ wg_aip_lookup(struct art_root *root, voi int wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d) { - struct srp_ref sr; - struct art_root *root; + struct art *root; struct art_node *node; struct wg_aip *aip; int ret = 0; @@ -675,23 +678,21 @@ wg_aip_remove(struct wg_softc *sc, struc default: return EAFNOSUPPORT; } - rw_enter_write(&root->ar_lock); - if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) { + rw_enter_write(&sc->sc_aip_lock); + if ((node = art_lookup(root, &d->a_addr, d->a_cidr)) == NULL) { ret = ENOENT; } else if (((struct wg_aip *) node)->a_peer != peer) { ret = EXDEV; } else { aip = (struct wg_aip *)node; - if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL) + if (art_delete(root, &d->a_addr, d->a_cidr) == NULL) panic("art_delete failed to delete node %p", node); sc->sc_aip_num--; LIST_REMOVE(aip, a_entry); pool_put(&wg_aip_pool, aip); } - - srp_leave(&sr); - rw_exit_write(&root->ar_lock); + rw_exit_write(&sc->sc_aip_lock); return ret; } @@ -2726,10 +2727,11 @@ wg_clone_create(struct if_clone *ifc, in #endif sc->sc_aip_num = 0; - if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL) + rw_init(&sc->sc_aip_lock, "wgaip"); + if ((sc->sc_aip4 = art_alloc(32)) == NULL) goto ret_02; #ifdef INET6 - if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL) + if ((sc->sc_aip6 = art_alloc(128)) == NULL) goto ret_03; #endif Index: if_wg.h =================================================================== RCS file: /cvs/src/sys/net/if_wg.h,v diff -u -p -r1.6 if_wg.h --- if_wg.h 13 Oct 2024 00:53:21 -0000 1.6 +++ if_wg.h 13 Jun 2025 01:59:37 -0000 @@ -46,6 +46,7 @@ struct wg_aip_io { sa_family_t a_af; int a_cidr; union wg_aip_addr { + uint8_t addr_bytes; struct in_addr addr_ipv4; struct in6_addr addr_ipv6; } a_addr; Index: route.h =================================================================== RCS file: /cvs/src/sys/net/route.h,v diff -u -p -r1.216 route.h --- route.h 16 Mar 2025 21:58:08 -0000 1.216 +++ route.h 13 Jun 2025 01:59:37 -0000 @@ -40,7 +40,7 @@ * I immutable after creation * N net lock * X exclusive net lock, or shared net lock + kernel lock - * R art (rtable) lock + * R rtable lock * r per route entry mutex rt_mtx * L arp/nd6/etc lock for updates, net lock for reads * T rttimer_mtx route timer lists @@ -117,7 +117,7 @@ struct rttimer; struct rtentry { struct sockaddr *rt_dest; /* [I] destination */ - SRPL_ENTRY(rtentry) rt_next; /* [R] next mpath entry to our dst */ + struct rtentry *rt_next; /* [R] next mpath entry to our dst */ struct sockaddr *rt_gateway; /* [X] gateway address */ struct ifaddr *rt_ifa; /* [N] interface addr to use */ caddr_t rt_llinfo; /* [L] pointer to link level info or Index: rtable.c =================================================================== RCS file: /cvs/src/sys/net/rtable.c,v diff -u -p -r1.87 rtable.c --- rtable.c 9 Apr 2024 12:53:08 -0000 1.87 +++ rtable.c 13 Jun 2025 01:59:37 -0000 @@ -26,12 +26,21 @@ #include #include #include +#include #endif #include #include #include +struct rtable { + struct rwlock r_lock; + struct art *r_art; /* [I] */ + unsigned int r_off; /* [I] Offset of key in bytes */ + + struct sockaddr *r_source; /* [N] use optional src addr */ +}; + /* * Structures used by rtable_get() to retrieve the corresponding * routing table for a given pair of ``af'' and ``rtableid''. @@ -85,8 +94,8 @@ void rtmap_dtor(void *, void *); struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL); void rtable_init_backend(void); -void *rtable_alloc(unsigned int, unsigned int, unsigned int); -void *rtable_get(unsigned int, sa_family_t); +struct rtable *rtable_alloc(unsigned int, unsigned int, unsigned int); +struct rtable *rtable_get(unsigned int, sa_family_t); void rtmap_init(void) @@ -193,7 +202,7 @@ int rtable_add(unsigned int id) { const struct domain *dp; - void *tbl; + struct rtable *tbl; struct rtmap *map; struct dommp *dmm; sa_family_t af; @@ -244,11 +253,11 @@ out: return (error); } -void * +struct rtable * rtable_get(unsigned int rtableid, sa_family_t af) { struct rtmap *map; - void *tbl = NULL; + struct rtable *tbl = NULL; struct srp_ref sr; if (af >= nitems(af2idx) || af2idx[af] == 0) @@ -286,7 +295,7 @@ rtable_empty(unsigned int rtableid) { const struct domain *dp; int i; - struct art_root *tbl; + struct rtable *tbl; for (i = 0; (dp = domains[i]) != NULL; i++) { if (dp->dom_rtoffset == 0) @@ -295,7 +304,7 @@ rtable_empty(unsigned int rtableid) tbl = rtable_get(rtableid, dp->dom_family); if (tbl == NULL) continue; - if (tbl->ar_root.ref != NULL) + if (!art_is_empty(tbl->r_art)) return (0); } @@ -350,40 +359,51 @@ rtable_l2set(unsigned int rtableid, unsi } -static inline const uint8_t *satoaddr(struct art_root *, +static inline const uint8_t *satoaddr(struct rtable *, const struct sockaddr *); -int an_match(struct art_node *, const struct sockaddr *, int); -void rtentry_ref(void *, void *); -void rtentry_unref(void *, void *); - void rtable_mpath_insert(struct art_node *, struct rtentry *); -struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL); - void rtable_init_backend(void) { - art_init(); + art_boot(); } -void * +struct rtable * rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) { - return (art_alloc(rtableid, alen, off)); + struct rtable *r; + + r = malloc(sizeof(*r), M_RTABLE, M_NOWAIT|M_ZERO); + if (r == NULL) + return (r); + + + r->r_art = art_alloc(alen); + if (r->r_art == NULL) { + free(r, M_RTABLE, sizeof(*r)); + return (NULL); + } + + rw_init(&r->r_lock, "rtable"); + r->r_off = off; + r->r_source = NULL; + + return (r); } int rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src) { - struct art_root *ar; + struct rtable *r; NET_ASSERT_LOCKED_EXCLUSIVE(); - if ((ar = rtable_get(rtableid, af)) == NULL) + if ((r = rtable_get(rtableid, af)) == NULL) return (EAFNOSUPPORT); - ar->ar_source = src; + r->r_source = src; return (0); } @@ -391,15 +411,15 @@ rtable_setsource(unsigned int rtableid, struct sockaddr * rtable_getsource(unsigned int rtableid, int af) { - struct art_root *ar; + struct rtable *r; NET_ASSERT_LOCKED(); - ar = rtable_get(rtableid, af); - if (ar == NULL) + r = rtable_get(rtableid, af); + if (r == NULL) return (NULL); - return (ar->ar_source); + return (r->r_source); } void @@ -419,37 +439,34 @@ struct rtentry * rtable_lookup(unsigned int rtableid, const struct sockaddr *dst, const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio) { - struct art_root *ar; + struct rtable *r; struct art_node *an; struct rtentry *rt = NULL; - struct srp_ref sr, nsr; const uint8_t *addr; int plen; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) + r = rtable_get(rtableid, dst->sa_family); + if (r == NULL) return (NULL); - addr = satoaddr(ar, dst); + addr = satoaddr(r, dst); - /* No need for a perfect match. */ + smr_read_enter(); if (mask == NULL) { - an = art_match(ar, addr, &nsr); - if (an == NULL) - goto out; + /* No need for a perfect match. */ + an = art_match(r->r_art, addr); } else { plen = rtable_satoplen(dst->sa_family, mask); if (plen == -1) return (NULL); - an = art_lookup(ar, addr, plen, &nsr); - - /* Make sure we've got a perfect match. */ - if (!an_match(an, dst, plen)) - goto out; + an = art_lookup(r->r_art, addr, plen); } + if (an == NULL) + goto out; - SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { + for (rt = SMR_PTR_GET(&an->an_value); rt != NULL; + rt = SMR_PTR_GET(&rt->rt_next)) { if (prio != RTP_ANY && (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) continue; @@ -464,9 +481,8 @@ rtable_lookup(unsigned int rtableid, con if (rt != NULL) rtref(rt); - SRPL_LEAVE(&sr); out: - srp_leave(&nsr); + smr_read_leave(); return (rt); } @@ -474,44 +490,41 @@ out: struct rtentry * rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src) { - struct art_root *ar; + struct rtable *r; struct art_node *an; struct rtentry *rt = NULL; - struct srp_ref sr, nsr; const uint8_t *addr; int hash; + uint8_t prio; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) + r = rtable_get(rtableid, dst->sa_family); + if (r == NULL) return (NULL); - addr = satoaddr(ar, dst); + addr = satoaddr(r, dst); - an = art_match(ar, addr, &nsr); + smr_read_enter(); + an = art_match(r->r_art, addr); if (an == NULL) goto out; - rt = SRPL_FIRST(&sr, &an->an_rtlist); - if (rt == NULL) { - SRPL_LEAVE(&sr); - goto out; - } - rtref(rt); - SRPL_LEAVE(&sr); + rt = SMR_PTR_GET(&an->an_value); + KASSERT(rt != NULL); + prio = rt->rt_priority; /* Gateway selection by Hash-Threshold (RFC 2992) */ if ((hash = rt_hash(rt, dst, src)) != -1) { struct rtentry *mrt; - int threshold, npaths = 0; + int threshold, npaths = 1; KASSERT(hash <= 0xffff); - SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) { - /* Only count nexthops with the same priority. */ - if (mrt->rt_priority == rt->rt_priority) + /* Only count nexthops with the same priority. */ + mrt = rt; + while ((mrt = SMR_PTR_GET(&mrt->rt_next)) != NULL) { + if (mrt->rt_priority == prio) npaths++; } - SRPL_LEAVE(&sr); threshold = (0xffff / npaths) + 1; @@ -520,27 +533,22 @@ rtable_match(unsigned int rtableid, cons * route list attached to the node, so we won't necessarily * have the same number of routes. for most modifications, * we'll pick a route that we wouldn't have if we only saw the - * list before or after the change. if we were going to use - * the last available route, but it got removed, we'll hit - * the end of the list and then pick the first route. + * list before or after the change. */ - - mrt = SRPL_FIRST(&sr, &an->an_rtlist); - while (hash > threshold && mrt != NULL) { - if (mrt->rt_priority == rt->rt_priority) + mrt = rt; + while (hash > threshold) { + if (mrt->rt_priority == prio) { + rt = mrt; hash -= threshold; - mrt = SRPL_FOLLOW(&sr, mrt, rt_next); - } - - if (mrt != NULL) { - rtref(mrt); - rtfree(rt); - rt = mrt; + } + mrt = SMR_PTR_GET(&mrt->rt_next); + if (mrt == NULL) + break; } - SRPL_LEAVE(&sr); } + rtref(rt); out: - srp_leave(&nsr); + smr_read_leave(); return (rt); } @@ -549,35 +557,59 @@ rtable_insert(unsigned int rtableid, str const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio, struct rtentry *rt) { - struct rtentry *mrt; - struct srp_ref sr; - struct art_root *ar; + struct rtable *r; struct art_node *an, *prev; const uint8_t *addr; int plen; unsigned int rt_flags; int error = 0; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) + r = rtable_get(rtableid, dst->sa_family); + if (r == NULL) return (EAFNOSUPPORT); - addr = satoaddr(ar, dst); + addr = satoaddr(r, dst); plen = rtable_satoplen(dst->sa_family, mask); if (plen == -1) return (EINVAL); - rtref(rt); /* guarantee rtfree won't do anything during insert */ - rw_enter_write(&ar->ar_lock); + an = art_get(addr, plen); + if (an == NULL) + return (ENOMEM); - /* Do not permit exactly the same dst/mask/gw pair. */ - an = art_lookup(ar, addr, plen, &sr); - srp_leave(&sr); /* an can't go away while we have the lock */ - if (an_match(an, dst, plen)) { - struct rtentry *mrt; - int mpathok = ISSET(rt->rt_flags, RTF_MPATH); + /* prepare for immediate operation if insert succeeds */ + rt_flags = rt->rt_flags; + rt->rt_flags &= ~RTF_MPATH; + rt->rt_dest = dst; + rt->rt_plen = plen; + rt->rt_next = NULL; - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { + rtref(rt); /* take a ref for the table */ + an->an_value = rt; + + rw_enter_write(&r->r_lock); + prev = art_insert(r->r_art, an); + if (prev == NULL) { + error = ENOMEM; + goto put; + } + + if (prev != an) { + struct rtentry *mrt; + int mpathok = ISSET(rt_flags, RTF_MPATH); + int mpath = 0; + + /* + * An ART node with the same destination/netmask already + * exists. + */ + art_put(an); + an = prev; + + /* Do not permit exactly the same dst/mask/gw pair. */ + for (mrt = SMR_PTR_GET_LOCKED(&an->an_value); + mrt != NULL; + mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) { if (prio != RTP_ANY && (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) continue; @@ -589,63 +621,34 @@ rtable_insert(unsigned int rtableid, str error = EEXIST; goto leave; } - } - } - - an = art_get(plen); - if (an == NULL) { - error = ENOBUFS; - goto leave; - } - - /* prepare for immediate operation if insert succeeds */ - rt_flags = rt->rt_flags; - rt->rt_flags &= ~RTF_MPATH; - rt->rt_dest = dst; - rt->rt_plen = plen; - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); - - prev = art_insert(ar, an, addr, plen); - if (prev != an) { - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, - rt_next); - rt->rt_flags = rt_flags; - art_put(an); - - if (prev == NULL) { - error = ESRCH; - goto leave; + mpath = RTF_MPATH; } - an = prev; - - mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); - KASSERT(mrt != NULL); - KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio); + /* The new route can be added to the list. */ + if (mpath) { + SET(rt->rt_flags, RTF_MPATH); + + for (mrt = SMR_PTR_GET_LOCKED(&an->an_value); + mrt != NULL; + mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) { + if ((mrt->rt_priority & RTP_MASK) != + (prio & RTP_MASK)) + continue; - /* - * An ART node with the same destination/netmask already - * exists, MPATH conflict must have been already checked. - */ - if (rt->rt_flags & RTF_MPATH) { - /* - * Only keep the RTF_MPATH flag if two routes have - * the same gateway. - */ - rt->rt_flags &= ~RTF_MPATH; - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) { - if (mrt->rt_priority == prio) { - mrt->rt_flags |= RTF_MPATH; - rt->rt_flags |= RTF_MPATH; - } + SET(mrt->rt_flags, RTF_MPATH); } } /* Put newly inserted entry at the right place. */ rtable_mpath_insert(an, rt); } + rw_exit_write(&r->r_lock); + return (error); + +put: + art_put(an); leave: - rw_exit_write(&ar->ar_lock); + rw_exit_write(&r->r_lock); rtfree(rt); return (error); } @@ -654,118 +657,134 @@ int rtable_delete(unsigned int rtableid, const struct sockaddr *dst, const struct sockaddr *mask, struct rtentry *rt) { - struct art_root *ar; + struct rtable *r; struct art_node *an; - struct srp_ref sr; const uint8_t *addr; int plen; struct rtentry *mrt; - int npaths = 0; - int error = 0; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) + r = rtable_get(rtableid, dst->sa_family); + if (r == NULL) return (EAFNOSUPPORT); - addr = satoaddr(ar, dst); + addr = satoaddr(r, dst); plen = rtable_satoplen(dst->sa_family, mask); if (plen == -1) return (EINVAL); - rtref(rt); /* guarantee rtfree won't do anything under ar_lock */ - rw_enter_write(&ar->ar_lock); - an = art_lookup(ar, addr, plen, &sr); - srp_leave(&sr); /* an can't go away while we have the lock */ - - /* Make sure we've got a perfect match. */ - if (!an_match(an, dst, plen)) { - error = ESRCH; - goto leave; + rw_enter_write(&r->r_lock); + smr_read_enter(); + an = art_lookup(r->r_art, addr, plen); + smr_read_leave(); + if (an == NULL) { + rw_exit_write(&r->r_lock); + return (ESRCH); } - /* - * If other multipath route entries are still attached to - * this ART node we only have to unlink it. - */ - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) - npaths++; - - if (npaths > 1) { - KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, - rt_next); - - mrt = SRPL_FIRST_LOCKED(&an->an_rtlist); - if (npaths == 2) - mrt->rt_flags &= ~RTF_MPATH; + /* If this is the only route in the list then we can delete the node */ + if (SMR_PTR_GET_LOCKED(&an->an_value) == rt && + SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) { + struct art_node *oan; + oan = art_delete(r->r_art, addr, plen); + if (oan != an) + panic("art %p changed shape during delete", r->r_art); + art_put(an); + /* + * XXX an and the rt ref could still be alive on other cpus. + * this currently works because of the NET_LOCK/KERNEL_LOCK + * but should be fixed if we want to do route lookups outside + * these locks. - dlg@ + */ + } else { + struct rtentry **prt = (struct rtentry **)&an->an_value; + struct rtentry *nrt; + unsigned int found = 0; + unsigned int npaths = 0; - goto leave; + /* + * If other multipath route entries are still attached to + * this ART node we only have to unlink it. + */ + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { + if (mrt == rt) { + found = 1; + SMR_PTR_SET_LOCKED(prt, + SMR_PTR_GET_LOCKED(&mrt->rt_next)); + } else if ((mrt->rt_priority & RTP_MASK) == + (rt->rt_priority & RTP_MASK)) { + npaths++; + nrt = mrt; + } + } + if (!found) + panic("removing non-existent route"); + if (npaths == 1) + CLR(nrt->rt_flags, RTF_MPATH); } - - if (art_delete(ar, an, addr, plen) == NULL) - panic("art_delete failed to find node %p", an); - KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); - art_put(an); - -leave: - rw_exit_write(&ar->ar_lock); + rw_exit_write(&r->r_lock); rtfree(rt); - return (error); -} - -struct rtable_walk_cookie { - int (*rwc_func)(struct rtentry *, void *, unsigned int); - void *rwc_arg; - struct rtentry **rwc_prt; - unsigned int rwc_rid; -}; - -/* - * Helper for rtable_walk to keep the ART code free from any "struct rtentry". - */ -int -rtable_walk_helper(struct art_node *an, void *xrwc) -{ - struct srp_ref sr; - struct rtable_walk_cookie *rwc = xrwc; - struct rtentry *rt; - int error = 0; - - SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) { - error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid); - if (error != 0) - break; - } - if (rwc->rwc_prt != NULL && rt != NULL) { - rtref(rt); - *rwc->rwc_prt = rt; - } - SRPL_LEAVE(&sr); - - return (error); + return (0); } int rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt, int (*func)(struct rtentry *, void *, unsigned int), void *arg) { - struct art_root *ar; - struct rtable_walk_cookie rwc; + struct rtable *r; + struct art_iter ai; + struct art_node *an; int error; - ar = rtable_get(rtableid, af); - if (ar == NULL) + r = rtable_get(rtableid, af); + if (r == NULL) return (EAFNOSUPPORT); - rwc.rwc_func = func; - rwc.rwc_arg = arg; - rwc.rwc_prt = prt; - rwc.rwc_rid = rtableid; + rw_enter_write(&r->r_lock); + ART_FOREACH(an, r->r_art, &ai) { + /* + * ART nodes have a list of rtentries. + * + * art_iter holds references to the topology + * so it won't change, but not the an_node or rtentries. + */ + struct rtentry *rt = SMR_PTR_GET_LOCKED(&an->an_value); + rtref(rt); + + rw_exit_write(&r->r_lock); + do { + struct rtentry *nrt; + + smr_read_enter(); + /* Get ready for the next entry. */ + nrt = SMR_PTR_GET(&rt->rt_next); + if (nrt != NULL) + rtref(nrt); + smr_read_leave(); + + error = func(rt, arg, rtableid); + if (error != 0) { + if (prt != NULL) + *prt = rt; + else + rtfree(rt); + + if (nrt != NULL) + rtfree(nrt); + + rw_enter_write(&r->r_lock); + art_iter_close(&ai); + rw_exit_write(&r->r_lock); + return (error); + } - error = art_walk(ar, rtable_walk_helper, &rwc); + rtfree(rt); + rt = nrt; + } while (rt != NULL); + rw_enter_write(&r->r_lock); + } + rw_exit_write(&r->r_lock); return (error); } @@ -774,12 +793,12 @@ struct rtentry * rtable_iterate(struct rtentry *rt0) { struct rtentry *rt = NULL; - struct srp_ref sr; - rt = SRPL_NEXT(&sr, rt0, rt_next); + smr_read_enter(); + rt = SMR_PTR_GET(&rt0->rt_next); if (rt != NULL) rtref(rt); - SRPL_LEAVE(&sr); + smr_read_leave(); rtfree(rt0); return (rt); } @@ -794,27 +813,28 @@ int rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, int plen, uint8_t prio, struct rtentry *rt) { - struct art_root *ar; + struct rtable *r; struct art_node *an; - struct srp_ref sr; const uint8_t *addr; int error = 0; - ar = rtable_get(rtableid, dst->sa_family); - if (ar == NULL) + r = rtable_get(rtableid, dst->sa_family); + if (r == NULL) return (EAFNOSUPPORT); - addr = satoaddr(ar, dst); - - rw_enter_write(&ar->ar_lock); - an = art_lookup(ar, addr, plen, &sr); - srp_leave(&sr); /* an can't go away while we have the lock */ + addr = satoaddr(r, dst); - /* Make sure we've got a perfect match. */ - if (!an_match(an, dst, plen)) { + rw_enter_write(&r->r_lock); + smr_read_enter(); + an = art_lookup(r->r_art, addr, plen); + smr_read_leave(); + if (an == NULL) { error = ESRCH; - } else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt && - SRPL_NEXT_LOCKED(rt, rt_next) == NULL) { + goto leave; + } + + if (SMR_PTR_GET_LOCKED(&an->an_value) == rt && + SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) { /* * If there's only one entry on the list do not go * through an insert/remove cycle. This is done to @@ -823,15 +843,22 @@ rtable_mpath_reprio(unsigned int rtablei */ rt->rt_priority = prio; } else { - rtref(rt); /* keep rt alive in between remove and insert */ - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, - rt, rtentry, rt_next); + struct rtentry **prt = (struct rtentry **)&an->an_value; + struct rtentry *mrt; + + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { + if (mrt == rt) + break; + } + KASSERT(mrt != NULL); + + SMR_PTR_SET_LOCKED(prt, SMR_PTR_GET_LOCKED(&rt->rt_next)); rt->rt_priority = prio; rtable_mpath_insert(an, rt); - rtfree(rt); error = EAGAIN; } - rw_exit_write(&ar->ar_lock); +leave: + rw_exit_write(&r->r_lock); return (error); } @@ -839,63 +866,19 @@ rtable_mpath_reprio(unsigned int rtablei void rtable_mpath_insert(struct art_node *an, struct rtentry *rt) { - struct rtentry *mrt, *prt = NULL; + struct rtentry *mrt, **prt; uint8_t prio = rt->rt_priority; - if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) { - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); - return; - } + prt = (struct rtentry **)&an->an_value; /* Iterate until we find the route to be placed after ``rt''. */ - while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) { - prt = mrt; - mrt = SRPL_NEXT_LOCKED(mrt, rt_next); - } - - if (mrt->rt_priority <= prio) { - SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next); - } else if (prt != NULL) { - SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next); - } else { - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { + if (mrt->rt_priority >= prio) + break; } -} -/* - * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise. - */ -int -an_match(struct art_node *an, const struct sockaddr *dst, int plen) -{ - struct rtentry *rt; - struct srp_ref sr; - int match; - - if (an == NULL || an->an_plen != plen) - return (0); - - rt = SRPL_FIRST(&sr, &an->an_rtlist); - match = (rt != NULL && memcmp(rt->rt_dest, dst, dst->sa_len) == 0); - SRPL_LEAVE(&sr); - - return (match); -} - -void -rtentry_ref(void *null, void *xrt) -{ - struct rtentry *rt = xrt; - - rtref(rt); -} - -void -rtentry_unref(void *null, void *xrt) -{ - struct rtentry *rt = xrt; - - rtfree(rt); + SMR_PTR_SET_LOCKED(&rt->rt_next, mrt); + SMR_PTR_SET_LOCKED(prt, rt); } /* @@ -904,9 +887,9 @@ rtentry_unref(void *null, void *xrt) * of "struct sockaddr" used by this routing table. */ static inline const uint8_t * -satoaddr(struct art_root *at, const struct sockaddr *sa) +satoaddr(struct rtable *r, const struct sockaddr *sa) { - return (((const uint8_t *)sa) + at->ar_off); + return (((const uint8_t *)sa) + r->r_off); } /* Index: rtable.h =================================================================== RCS file: /cvs/src/sys/net/rtable.h,v diff -u -p -r1.30 rtable.h --- rtable.h 13 May 2024 01:15:53 -0000 1.30 +++ rtable.h 13 Jun 2025 01:59:37 -0000 @@ -28,6 +28,8 @@ #define rt_plen(rt) ((rt)->rt_plen) #define RT_ROOT(rt) (0) +struct rtable; + int rtable_satoplen(sa_family_t, const struct sockaddr *); void rtable_init(void);