Index | Thread | Search

From:
David Gwynne <david@gwynne.id.au>
Subject:
Re: use the SMR api instead of SRPs in art/rtable code
To:
Alexander Bluhm <alexander.bluhm@gmx.net>
Cc:
tech@openbsd.org
Date:
Sun, 29 Jun 2025 21:39:56 +1000

Download raw body.

Thread
  • Alexander Bluhm:

    use the SMR api instead of SRPs in art/rtable code

  • On Sun, Jun 29, 2025 at 01:10:03PM +0200, Alexander Bluhm wrote:
    > On Sun, Jun 29, 2025 at 09:20:25AM +1000, David Gwynne wrote:
    > > this moves some data structure info back to art.h from art.c because
    > > netstat needs to understand it.
    > 
    > Now it crashes in regress/sys/net/wg
    
    oh yeah. sorry about that.
    
    > ==== unconfig ====
    > # destroy WireGuard and routing domain loopback interfaces
    > ifconfig wg11 destroy
    > Timeout, server ot29 not responding.
    > 
    > WpAanRNicIN: G:k erSPneL l NOdiT agLOnoWEstREicD  aONss SerYStiCAonLL " 7cu4 rc-1pu47()85->74ci08_s0 chEXedITst 0at ae.
    > Stopped at  savectx+0xae:   movl    $0,%gs:0x688
    >     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
    >  264749  46815      0         0x2          0    2  ifconfig
    >  342447   3854      0     0x14000      0x200    5  wg_handshake
    >  299868   9493      0     0x14000      0x200    3  wg_handshake
    >     372   2548     55         0x2          0    0  perl
    >  274731  28174     77    0x100012          0    4  dhcpleased
    > *192105  71069    115    0x100012          0    1  slaacd
    > savectx() at savectx+0xae
    > end of kernel
    > end trace frame: 0x7ce4ebf2b4d0, count: 14
    > https://www.openbsd.org/ddb.html describes the minimum info required in bug
    > reports.  Insufficient info makes it difficult to find and fix bugs.
    > 
    > ddb{1}> show panic
    > *cpu2: kernel diagnostic assertion "curcpu()->ci_schedstate.spc_smrdepth > 0" failed: file "/usr/src/sys/net/art.c", line 381
    
    i guess you'll want my tweaks for the rtable regress too.
    
    Index: sys/net/art.c
    ===================================================================
    RCS file: /cvs/src/sys/net/art.c,v
    diff -u -p -r1.31 art.c
    --- sys/net/art.c	11 Nov 2023 12:17:50 -0000	1.31
    +++ sys/net/art.c	29 Jun 2025 11:35:03 -0000
    @@ -22,6 +22,58 @@
      *
      * Yoichi Hariguchi paper can be found at:
      *	http://www.hariguchi.org/art/art.pdf
    + *
    + * This implementation is tweaked to minimise pointer traversal
    + * during lookups by only accessing the heaps. It avoids following
    + * pointers to or through the art_table structures.
    + *
    + * The heap is an array of art_heap_entry values which have different
    + * meanings depending on their location. The majority of entries in
    + * the heap are used as pointers to nodes (struct an_node, aka leaf
    + * entries), or the next heap to traverse. These pointers are typed/
    + * tagged by the low bit in their heap value, and must be masked
    + * appropriately before use.
    + *
    + * The first two entries in the heap are not used by the search
    + * algorithm, so they are used to store data associated with this
    + * part of the heap. The first entry (heap[0]) stores a pointer to
    + * an art_table struct associated with this heap. The second entry
    + * (heap[1]) stores the node pointer which would be in the parent
    + * heap if this table wasn't using it. This node pointer is also known
    + * as the default entry/route for the current table in the ART.
    + *
    + * Nodes contain the exact information about the prefix they represent
    + * in the ART, ie, the address and prefix length, and a pointer to
    + * data associated with that prefix (an_value).
    + *
    + * The relationships between the separate data structures looks
    + * like the following:
    + *
    + * +------------------+
    + * |    struct art    |
    + * +------------------+              NULL
    + *   |                                ^
    + *   | art_root                       | at_parent
    + *   v                    heap[0]     |
    + * +------------------+ ----------> +------------------+
    + * |       heap       |             | struct art_table |
    + * +------------------+ <---------- +------------------+
    + *   |                    at_heap     ^
    + *   | heap entry                     | at_parent
    + *   v                    heap[0[     |
    + * +------------------+ ----------> +------------------+
    + * |       heap       |             | struct art_table |
    + * +------------------+ <---------- +------------------+
    + *   |                    at_heap
    + *   | node entry
    + *   v
    + * +------------------+
    + * | struct art_node  |
    + * +------------------+
    + *   |
    + *   | an_value
    + *   v
    + * "user" data
      */
     
     #ifndef _KERNEL
    @@ -33,43 +85,38 @@
     #include <sys/pool.h>
     #include <sys/task.h>
     #include <sys/socket.h>
    +#include <sys/smr.h>
     #endif
     
     #include <net/art.h>
     
    -int			 art_bindex(struct art_table *, const uint8_t *, int);
    -void			 art_allot(struct art_table *at, int, struct art_node *,
    -			     struct art_node *);
    -struct art_table	*art_table_get(struct art_root *, struct art_table *,
    -			     int);
    -struct art_table	*art_table_put(struct art_root *, struct art_table *);
    -struct art_node		*art_table_insert(struct art_root *, struct art_table *,
    +static void		 art_allot(struct art_table *at, unsigned int,
    +			     art_heap_entry, art_heap_entry);
    +struct art_table	*art_table_get(struct art *, struct art_table *,
    +			     unsigned int);
    +struct art_table	*art_table_put(struct art  *, struct art_table *);
    +struct art_node		*art_table_insert(struct art *, struct art_table *,
     			     int, struct art_node *);
    -struct art_node		*art_table_delete(struct art_root *, struct art_table *,
    +struct art_node		*art_table_delete(struct art *, struct art_table *,
     			     int, struct art_node *);
    -struct art_table	*art_table_ref(struct art_root *, struct art_table *);
    -int			 art_table_free(struct art_root *, struct art_table *);
    -int			 art_table_walk(struct art_root *, struct art_table *,
    -			     int (*f)(struct art_node *, void *), void *);
    -int			 art_walk_apply(struct art_root *,
    -			     struct art_node *, struct art_node *,
    -			     int (*f)(struct art_node *, void *), void *);
    +struct art_table	*art_table_ref(struct art *, struct art_table *);
    +int			 art_table_free(struct art *, struct art_table *);
     void			 art_table_gc(void *);
     void			 art_gc(void *);
     
    -struct pool		an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
    +static struct pool	 an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
     
    -struct art_table	*art_table_gc_list = NULL;
    -struct mutex		 art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
    -struct task		 art_table_gc_task =
    +static struct art_table	*art_table_gc_list = NULL;
    +static struct mutex	 art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
    +static struct task	 art_table_gc_task =
     			     TASK_INITIALIZER(art_table_gc, NULL);
     
    -struct art_node		*art_node_gc_list = NULL;
    -struct mutex		 art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
    -struct task		 art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
    +static struct art_node	*art_node_gc_list = NULL;
    +static struct mutex	 art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
    +static struct task	 art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
     
     void
    -art_init(void)
    +art_boot(void)
     {
     	pool_init(&an_pool, sizeof(struct art_node), 0, IPL_SOFTNET, 0,
     	    "art_node", NULL);
    @@ -84,56 +131,74 @@ art_init(void)
     /*
      * Per routing table initialization API function.
      */
    -struct art_root *
    -art_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
    -{
    -	struct art_root		*ar;
    -	int			 i;
     
    -	ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO);
    -	if (ar == NULL)
    -		return (NULL);
    +static const unsigned int art_plen32_levels[] = {
    +	8,  4, 4,  4, 4,  4, 4
    +};
    +
    +static const unsigned int art_plen128_levels[] = {
    +	4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4,
    +	4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4
    +};
    +
    +static const unsigned int art_plen20_levels[] = {
    +	4, 4, 4, 4, 4
    +};
    +
    +void
    +art_init(struct art *art, unsigned int alen)
    +{
    +	const unsigned int *levels;
    +	unsigned int nlevels;
    +#ifdef DIAGNOSTIC
    +	unsigned int		 i;
    +	unsigned int		 bits = 0;
    +#endif
     
     	switch (alen) {
     	case 32:
    -		ar->ar_alen = 32;
    -		ar->ar_nlvl = 7;
    -		ar->ar_bits[0] = 8;
    -		for (i = 1; i < ar->ar_nlvl; i++)
    -			ar->ar_bits[i] = 4;
    +		levels = art_plen32_levels;
    +		nlevels = nitems(art_plen32_levels);
     		break;
     	case 128:
    -		ar->ar_alen = 128;
    -		ar->ar_nlvl = 32;
    -		for (i = 0; i < ar->ar_nlvl; i++)
    -			ar->ar_bits[i] = 4;
    +		levels = art_plen128_levels;
    +		nlevels = nitems(art_plen128_levels);
    +		break;
    +	case 20:
    +		levels = art_plen20_levels;
    +		nlevels = nitems(art_plen20_levels);
     		break;
     	default:
    -		printf("%s: incorrect address length %u\n", __func__, alen);
    -		free(ar, M_RTABLE, sizeof(*ar));
    -		return (NULL);
    +		panic("no configuration for alen %u", alen);
    +		/* NOTREACHED */
     	}
     
    -	ar->ar_off = off;
    -	rw_init(&ar->ar_lock, "art");
    +#ifdef DIAGNOSTIC
    +	for (i = 0; i < nlevels; i++)
    +		bits += levels[i];
    +
    +	if (alen != bits)
    +		panic("sum of levels %u != address len %u", bits, alen);
    +#endif /* DIAGNOSTIC */
     
    -	return (ar);
    +	art->art_root = 0;
    +	art->art_levels = levels;
    +	art->art_nlevels = nlevels;
    +	art->art_alen = alen;
     }
     
    -/*
    - * Return 1 if ``old'' and ``new`` are identical, 0 otherwise.
    - */
    -static inline int
    -art_check_duplicate(struct art_root *ar, struct art_node *old,
    -    struct art_node *new)
    +struct art *
    +art_alloc(unsigned int alen)
     {
    -	if (old == NULL)
    -		return (0);
    +	struct art		*art;
     
    -	if (old->an_plen == new->an_plen)
    -		return (1);
    +	art = malloc(sizeof(*art), M_RTABLE, M_NOWAIT|M_ZERO);
    +	if (art == NULL)
    +		return (NULL);
     
    -	return (0);
    +	art_init(art, alen);
    +
    +	return (art);
     }
     
     /*
    @@ -148,30 +213,31 @@ art_check_duplicate(struct art_root *ar,
      * 8bit-long tables, there's a maximum of 4 base indexes if the
      * prefix length is > 24.
      */
    -int
    -art_bindex(struct art_table *at, const uint8_t *addr, int plen)
    +static unsigned int __pure
    +art_bindex(unsigned int offset, unsigned int bits,
    +    const uint8_t *addr, unsigned int plen)
     {
    -	uint8_t			boff, bend;
    +	unsigned int		boff, bend;
     	uint32_t		k;
     
    -	if (plen < at->at_offset || plen > (at->at_offset + at->at_bits))
    -		return (-1);
    +	KASSERT(plen >= offset);
    +	KASSERT(plen <= (offset + bits));
     
     	/*
     	 * We are only interested in the part of the prefix length
     	 * corresponding to the range of this table.
     	 */
    -	plen -= at->at_offset;
    +	plen -= offset;
     
     	/*
     	 * Jump to the first byte of the address containing bits
     	 * covered by this table.
     	 */
    -	addr += (at->at_offset / 8);
    +	addr += (offset / 8);
     
     	/* ``at'' covers the bit range between ``boff'' & ``bend''. */
    -	boff = (at->at_offset % 8);
    -	bend = (at->at_bits + boff);
    +	boff = (offset % 8);
    +	bend = (bits + boff);
     
     	KASSERT(bend <= 32);
     
    @@ -188,25 +254,13 @@ art_bindex(struct art_table *at, const u
     		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
     		k |= addr[1] >> (16 - bend);
     	} else {
    -		k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1);
    +		k = (addr[0] >> (8 - bend)) & ((1 << bits) - 1);
     	}
     
     	/*
     	 * Single level base index formula:
     	 */
    -	return ((k >> (at->at_bits - plen)) + (1 << plen));
    -}
    -
    -/*
    - * Single level lookup function.
    - *
    - * Return the fringe index of the part of ``addr''
    - * corresponding to the range covered by the table ``at''.
    - */
    -static inline int
    -art_findex(struct art_table *at, const uint8_t *addr)
    -{
    -	return art_bindex(at, addr, (at->at_offset + at->at_bits));
    +	return ((k >> (bits - plen)) + (1 << plen));
     }
     
     /*
    @@ -215,60 +269,94 @@ art_findex(struct art_table *at, const u
      * Return the best existing match for a destination.
      */
     struct art_node *
    -art_match(struct art_root *ar, const void *addr, struct srp_ref *nsr)
    +art_match(struct art *art, const void *addr)
     {
    -	struct srp_ref		dsr, ndsr;
    -	void			*entry;
    -	struct art_table	*at;
    -	struct art_node		*dflt, *ndflt;
    -	int			j;
    -
    -	entry = srp_enter(nsr, &ar->ar_root);
    -	at = entry;
    +	unsigned int		 offset = 0;
    +	unsigned int		 level = 0;
    +	art_heap_entry		*heap;
    +	art_heap_entry		 ahe, dahe = 0;
    +	struct art_node		*an;
    +	int			 j;
     
    -	if (at == NULL)
    -		goto done;
    +	SMR_ASSERT_CRITICAL();
     
    -	/*
    -	 * Remember the default route of each table we visit in case
    -	 * we do not find a better matching route.
    -	 */
    -	dflt = srp_enter(&dsr, &at->at_default);
    +	heap = SMR_PTR_GET(&art->art_root);
    +	if (heap == NULL)
    +		return (NULL);
     
     	/*
     	 * Iterate until we find a leaf.
     	 */
    -	while (1) {
    +	for (;;) {
    +		unsigned int bits = art->art_levels[level];
    +		unsigned int p = offset + bits;
    +
    +		/*
    +		 * Remember the default route of each table we visit in case
    +		 * we do not find a better matching route.
    +		 */
    +		ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]);
    +		if (ahe != 0)
    +			dahe = ahe;
    +		
     		/* Do a single level route lookup. */
    -		j = art_findex(at, addr);
    -		entry = srp_follow(nsr, &at->at_heap[j].node);
    +		j = art_bindex(offset, bits, addr, p);
    +		ahe = SMR_PTR_GET(&heap[j]);
     
     		/* If this is a leaf (NULL is a leaf) we're done. */
    -		if (ISLEAF(entry))
    +		if (art_heap_entry_is_node(ahe))
     			break;
     
    -		at = SUBTABLE(entry);
    +		heap = art_heap_entry_to_heap(ahe);
    +		offset = p;
    +		level++;
    +
    +		KASSERT(level < art->art_nlevels);
    +	}
    +
    +	an = art_heap_entry_to_node(ahe);
    +	if (an != NULL)
    +		return (an);
    +
    +	return art_heap_entry_to_node(dahe);
    +}
    +
    +static int
    +art_node_check(const struct art_node *an,
    +    const uint8_t *addr, unsigned int plen)
    +{
    +	const uint32_t *wan = (const uint32_t *)an->an_addr;
    +	const uint32_t *waddr = (const uint32_t *)addr;
    +	unsigned int i = 0;
    +	unsigned int shift;
    +
    +	if (an->an_plen != plen)
    +		return (0);
    +
    +	while (plen >= 32) {
    +		if (wan[i] != waddr[i])
    +			return (0);
    +
    +		i++;
    +		plen -= 32;
    +	}
    +	if (plen == 0)
    +		return (1);
     
    -		ndflt = srp_enter(&ndsr, &at->at_default);
    -		if (ndflt != NULL) {
    -			srp_leave(&dsr);
    -			dsr = ndsr;
    -			dflt = ndflt;
    -		} else
    -			srp_leave(&ndsr);
    -	}
    -
    -	if (entry == NULL) {
    -		srp_leave(nsr);
    -		*nsr = dsr;
    -		KASSERT(ISLEAF(dflt));
    -		return (dflt);
    -	}
    -
    -	srp_leave(&dsr);
    -done:
    -	KASSERT(ISLEAF(entry));
    -	return (entry);
    +	i *= 4;
    +	while (plen >= 8) {
    +		if (an->an_addr[i] != addr[i])
    +			return (0);
    +		
    +		i++;
    +		plen -= 8;
    +	}
    +	if (plen == 0)
    +		return (1);
    +
    +	shift = 8 - plen;
    +
    +	return ((an->an_addr[i] >> shift) == (addr[i] >> shift));
     }
     
     /*
    @@ -278,24 +366,28 @@ done:
      * it does not exist.
      */
     struct art_node *
    -art_lookup(struct art_root *ar, const void *addr, int plen, struct srp_ref *nsr)
    +art_lookup(struct art *art, const void *addr, unsigned int plen)
     {
    -	void			*entry;
    -	struct art_table	*at;
    -	int			 i, j;
    +	unsigned int		 offset = 0;
    +	unsigned int		 bits, p;
    +	unsigned int		 level = 0;
    +	art_heap_entry		*heap;
    +	art_heap_entry		 ahe;
    +	struct art_node		*an;
    +	unsigned int		 i, j;
     
    -	KASSERT(plen >= 0 && plen <= ar->ar_alen);
    +	KASSERT(plen <= art->art_alen);
     
    -	entry = srp_enter(nsr, &ar->ar_root);
    -	at = entry;
    +	SMR_ASSERT_CRITICAL();
     
    -	if (at == NULL)
    -		goto done;
    +	heap = SMR_PTR_GET(&art->art_root);
    +	if (heap == NULL)
    +		return (NULL);
     
     	/* Default route */
     	if (plen == 0) {
    -		entry = srp_follow(nsr, &at->at_default);
    -		goto done;
    +		ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]);
    +		return art_heap_entry_to_node(ahe);
     	}
     
     	/*
    @@ -303,31 +395,47 @@ art_lookup(struct art_root *ar, const vo
     	 * the stride length at this level the entry must
     	 * be in the current table.
     	 */
    -	while (plen > (at->at_offset + at->at_bits)) {
    +	for (;;) {
    +		bits = art->art_levels[level];
    +		p = offset + bits;
    +		if (plen <= p)
    +			break;
    +
     		/* Do a single level route lookup. */
    -		j = art_findex(at, addr);
    -		entry = srp_follow(nsr, &at->at_heap[j].node);
    +		j = art_bindex(offset, bits, addr, p);
    +		ahe = SMR_PTR_GET(&heap[j]);
     
     		/* A leaf is a match, but not a perfect one, or NULL */
    -		if (ISLEAF(entry))
    +		if (art_heap_entry_is_node(ahe))
     			return (NULL);
     
    -		at = SUBTABLE(entry);
    +		heap = art_heap_entry_to_heap(ahe);
    +		offset = p;
    +		level++;
    +
    +		KASSERT(level < art->art_nlevels);
     	}
     
    -	i = art_bindex(at, addr, plen);
    -	if (i == -1)
    -		return (NULL);
    +	i = art_bindex(offset, bits, addr, plen);
    +	ahe = SMR_PTR_GET(&heap[i]);
    +	if (!art_heap_entry_is_node(ahe)) {
    +		heap = art_heap_entry_to_heap(ahe);
    +		ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]);
    +	}
     
    -	entry = srp_follow(nsr, &at->at_heap[i].node);
    -	if (!ISLEAF(entry))
    -		entry = srp_follow(nsr, &SUBTABLE(entry)->at_default);
    -
    -done:
    -	KASSERT(ISLEAF(entry));
    -	return (entry);
    +	/* Make sure we've got a perfect match */
    +	an = art_heap_entry_to_node(ahe);
    +	if (an != NULL && art_node_check(an, addr, plen))
    +		return (an);
    +
    +	return (NULL);
     }
     
    +int
    +art_is_empty(struct art *art)
    +{
    +	return (SMR_PTR_GET_LOCKED(&art->art_root) == NULL);
    +}
     
     /*
      * Insertion API function.
    @@ -336,32 +444,38 @@ done:
      * same destination/mask pair is already present.
      */
     struct art_node *
    -art_insert(struct art_root *ar, struct art_node *an, const void *addr, int plen)
    +art_insert(struct art *art, struct art_node *an)
     {
    -	struct art_table	*at, *child;
    -	struct art_node		*node;
    -	int			 i, j;
    -
    -	rw_assert_wrlock(&ar->ar_lock);
    -	KASSERT(plen >= 0 && plen <= ar->ar_alen);
    -
    -	at = srp_get_locked(&ar->ar_root);
    -	if (at == NULL) {
    -		at = art_table_get(ar, NULL, -1);
    +	unsigned int		 p;
    +	art_heap_entry		 ahe, oahe, *ahep;
    +	art_heap_entry		*heap;
    +	struct art_node		*oan;
    +	struct art_table	*at;
    +	unsigned int		 i, j;
    +
    +	KASSERT(an->an_plen <= art->art_alen);
    +
    +	heap = SMR_PTR_GET_LOCKED(&art->art_root);
    +	if (heap == NULL) {
    +		at = art_table_get(art, NULL, -1);
     		if (at == NULL)
     			return (NULL);
     
    -		srp_swap_locked(&ar->ar_root, at);
    -	}
    +		heap = at->at_heap;
    +		SMR_PTR_SET_LOCKED(&art->art_root, heap);
    +	} else
    +		at = art_heap_to_table(heap);
     
     	/* Default route */
    -	if (plen == 0) {
    -		node = srp_get_locked(&at->at_default);
    -		if (node != NULL)
    -			return (node);
    +	if (an->an_plen == 0) {
    +		ahep = &heap[ART_HEAP_IDX_DEFAULT];
    +		oahe = SMR_PTR_GET_LOCKED(ahep);
    +		oan = art_heap_entry_to_node(oahe);
    +		if (oan != NULL)
    +			return (oan);
     
    -		art_table_ref(ar, at);
    -		srp_swap_locked(&at->at_default, an);
    +		art_table_ref(art, at);
    +		SMR_PTR_SET_LOCKED(ahep, art_node_to_heap_entry(an));
     		return (an);
     	}
     
    @@ -370,10 +484,11 @@ art_insert(struct art_root *ar, struct a
     	 * the stride length at this level the entry must
     	 * be in the current table.
     	 */
    -	while (plen > (at->at_offset + at->at_bits)) {
    +	while ((p = at->at_offset + at->at_bits) < an->an_plen) {
     		/* Do a single level route lookup. */
    -		j = art_findex(at, addr);
    -		node = srp_get_locked(&at->at_heap[j].node);
    +		j = art_bindex(at->at_offset, at->at_bits, an->an_addr, p);
    +		ahep = &heap[j];
    +		ahe = SMR_PTR_GET_LOCKED(ahep);
     
     		/*
     		 * If the node corresponding to the fringe index is
    @@ -381,84 +496,86 @@ art_insert(struct art_root *ar, struct a
     		 * entry of this node will then become the default
     		 * route of the subtable.
     		 */
    -		if (ISLEAF(node)) {
    -			child = art_table_get(ar, at, j);
    +		if (art_heap_entry_is_node(ahe)) {
    +			struct art_table *child = art_table_get(art, at, j);
     			if (child == NULL)
     				return (NULL);
     
    -			art_table_ref(ar, at);
    -			srp_swap_locked(&at->at_heap[j].node, ASNODE(child));
    +			art_table_ref(art, at);
    +
     			at = child;
    -		} else
    -			at = SUBTABLE(node);
    +			heap = at->at_heap;
    +			SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe);
    +			SMR_PTR_SET_LOCKED(ahep, art_heap_to_heap_entry(heap));
    +		} else {
    +			heap = art_heap_entry_to_heap(ahe);
    +			at = art_heap_to_table(heap);
    +		}
     	}
     
    -	i = art_bindex(at, addr, plen);
    -	if (i == -1)
    -		return (NULL);
    -
    -	return (art_table_insert(ar, at, i, an));
    -}
    -
    -/*
    - * Single level insertion.
    - */
    -struct art_node *
    -art_table_insert(struct art_root *ar, struct art_table *at, int i,
    -    struct art_node *an)
    -{
    -	struct art_node	*prev, *node;
    -
    -	node = srp_get_locked(&at->at_heap[i].node);
    -	if (!ISLEAF(node))
    -		prev = srp_get_locked(&SUBTABLE(node)->at_default);
    -	else
    -		prev = node;
    -
    -	if (art_check_duplicate(ar, prev, an))
    -		return (prev);
    +	i = art_bindex(at->at_offset, at->at_bits, an->an_addr, an->an_plen);
    +	ahep = &heap[i];
    +	oahe = SMR_PTR_GET_LOCKED(ahep);
    +	if (!art_heap_entry_is_node(oahe)) {
    +		heap = art_heap_entry_to_heap(oahe);
    +		ahep = &heap[ART_HEAP_IDX_DEFAULT];
    +		oahe = SMR_PTR_GET_LOCKED(ahep);
    +	}
     
    -	art_table_ref(ar, at);
    +	/* Check if there's an existing node */
    +	oan = art_heap_entry_to_node(oahe);
    +	if (oan != NULL && art_node_check(oan, an->an_addr, an->an_plen))
    +		return (oan);
     
     	/*
     	 * If the index `i' of the route that we are inserting is not
     	 * a fringe index, we need to allot this new route pointer to
     	 * all the corresponding fringe indices.
     	 */
    +	art_table_ref(art, at);
    +	ahe = art_node_to_heap_entry(an);
     	if (i < at->at_minfringe)
    -		art_allot(at, i, prev, an);
    -	else if (!ISLEAF(node))
    -		srp_swap_locked(&SUBTABLE(node)->at_default, an);
    +		art_allot(at, i, oahe, ahe);
     	else
    -		srp_swap_locked(&at->at_heap[i].node, an);
    +		SMR_PTR_SET_LOCKED(ahep, ahe);
     
     	return (an);
     }
     
    -
     /*
      * Deletion API function.
      */
     struct art_node *
    -art_delete(struct art_root *ar, struct art_node *an, const void *addr, int plen)
    +art_delete(struct art *art, const void *addr, unsigned int plen)
     {
    +	unsigned int		 p;
    +	art_heap_entry		*heap;
     	struct art_table	*at;
    -	struct art_node		*node;
    -	int			 i, j;
    +	art_heap_entry		 ahe, pahe, *ahep;
    +	struct art_node		*an;
    +	unsigned int		 i, j;
     
    -	rw_assert_wrlock(&ar->ar_lock);
    -	KASSERT(plen >= 0 && plen <= ar->ar_alen);
    +	KASSERT(plen <= art->art_alen);
     
    -	at = srp_get_locked(&ar->ar_root);
    -	if (at == NULL)
    +	heap = SMR_PTR_GET_LOCKED(&art->art_root);
    +	if (heap == NULL)
     		return (NULL);
     
    +	at = art_heap_to_table(heap);
    +
     	/* Default route */
     	if (plen == 0) {
    -		node = srp_get_locked(&at->at_default);
    -		srp_swap_locked(&at->at_default, NULL);
    -		art_table_free(ar, at);
    -		return (node);
    +		ahep = &heap[ART_HEAP_IDX_DEFAULT];
    +
    +		ahe = SMR_PTR_GET_LOCKED(ahep);
    +		an = art_heap_entry_to_node(ahe);
    +		if (an == NULL)
    +			return (NULL);
    +
    +		SMR_PTR_SET_LOCKED(ahep, 0);
    +		art_table_free(art, at);
    +
    +		return (an);
     	}
     
     	/*
    @@ -466,53 +583,37 @@ art_delete(struct art_root *ar, struct a
     	 * the stride length at this level the entry must
     	 * be in the current table.
     	 */
    -	while (plen > (at->at_offset + at->at_bits)) {
    +	while ((p = at->at_offset + at->at_bits) < plen) {
     		/* Do a single level route lookup. */
    -		j = art_findex(at, addr);
    -		node = srp_get_locked(&at->at_heap[j].node);
    +		j = art_bindex(at->at_offset, at->at_bits, addr, p);
    +		ahe = SMR_PTR_GET_LOCKED(&heap[j]);
     
     		/* If this is a leaf, there is no route to delete. */
    -		if (ISLEAF(node))
    +		if (art_heap_entry_is_node(ahe))
     			return (NULL);
     
    -		at = SUBTABLE(node);
    +		heap = art_heap_entry_to_heap(ahe);
    +		at = art_heap_to_table(heap);
     	}
     
    -	i = art_bindex(at, addr, plen);
    -	if (i == -1)
    +	i = art_bindex(at->at_offset, at->at_bits, addr, plen);
    +	ahep = &heap[i];
    +	ahe = SMR_PTR_GET_LOCKED(ahep);
    +	if (!art_heap_entry_is_node(ahe)) {
    +		art_heap_entry *nheap = art_heap_entry_to_heap(ahe);
    +		ahep = &nheap[ART_HEAP_IDX_DEFAULT];
    +		ahe = SMR_PTR_GET_LOCKED(ahep);
    +	}
    +
    +	an = art_heap_entry_to_node(ahe);
    +	if (an == NULL) {
    +		/* No route to delete */
     		return (NULL);
    -
    -	return (art_table_delete(ar, at, i, an));
    -}
    -
    -/*
    - * Single level deletion.
    - */
    -struct art_node *
    -art_table_delete(struct art_root *ar, struct art_table *at, int i,
    -    struct art_node *an)
    -{
    -	struct art_node		*next, *node;
    -
    -#ifdef DIAGNOSTIC
    -	struct art_node		*prev;
    -#endif
    -
    -	node = srp_get_locked(&at->at_heap[i].node);
    -#ifdef DIAGNOSTIC
    -	if (!ISLEAF(node))
    -		prev = srp_get_locked(&SUBTABLE(node)->at_default);
    -	else
    -		prev = node;
    -
    -	KASSERT(prev == an);
    -#endif
    +	}
     
     	/* Get the next most specific route for the index `i'. */
    -	if ((i >> 1) > 1)
    -		next = srp_get_locked(&at->at_heap[i >> 1].node);
    -	else
    -		next = NULL;
    +	j = i >> 1;
    +	pahe = (j > 1) ? SMR_PTR_GET_LOCKED(&heap[j]) : 0;
     
     	/*
     	 * If the index `i' of the route that we are removing is not
    @@ -520,20 +621,18 @@ art_table_delete(struct art_root *ar, st
     	 * route pointer to all the corresponding fringe indices.
     	 */
     	if (i < at->at_minfringe)
    -		art_allot(at, i, an, next);
    -	else if (!ISLEAF(node))
    -		srp_swap_locked(&SUBTABLE(node)->at_default, next);
    +		art_allot(at, i, ahe, pahe);
     	else
    -		srp_swap_locked(&at->at_heap[i].node, next);
    +		SMR_PTR_SET_LOCKED(ahep, pahe);
     
     	/* We have removed an entry from this table. */
    -	art_table_free(ar, at);
    +	art_table_free(art, at);
     
     	return (an);
     }
     
     struct art_table *
    -art_table_ref(struct art_root *ar, struct art_table *at)
    +art_table_ref(struct art *art, struct art_table *at)
     {
     	at->at_refcnt++;
     	return (at);
    @@ -544,12 +643,11 @@ art_table_rele(struct art_table *at)
     {
     	if (at == NULL)
     		return (0);
    -
     	return (--at->at_refcnt == 0);
     }
     
     int
    -art_table_free(struct art_root *ar, struct art_table *at)
    +art_table_free(struct art *art, struct art_table *at)
     {
     	if (art_table_rele(at)) {
     		/*
    @@ -557,7 +655,7 @@ art_table_free(struct art_root *ar, stru
     		 * that are empty.
     		 */
     		do {
    -			at = art_table_put(ar, at);
    +			at = art_table_put(art, at);
     		} while (art_table_rele(at));
     
     		return (1);
    @@ -569,116 +667,187 @@ art_table_free(struct art_root *ar, stru
     /*
      * Iteration API function.
      */
    -int
    -art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg)
    +
    +static struct art_node *
    +art_iter_descend(struct art_iter *ai, art_heap_entry *heap,
    +    art_heap_entry pahe)
     {
    -	struct srp_ref		 sr;
    -	struct art_table	*at;
    -	struct art_node		*node;
    -	int			 error = 0;
    +	struct art_table *at;
    +	art_heap_entry ahe;
     
    -	rw_enter_write(&ar->ar_lock);
    -	at = srp_get_locked(&ar->ar_root);
    -	if (at != NULL) {
    -		art_table_ref(ar, at);
    +	at = art_heap_to_table(heap);
    +	ai->ai_table = art_table_ref(ai->ai_art, at);
     
    -		/*
    -		 * The default route should be processed here because the root
    -		 * table does not have a parent.
    -		 */
    -		node = srp_enter(&sr, &at->at_default);
    -		error = art_walk_apply(ar, node, NULL, f, arg);
    -		srp_leave(&sr);
    +	/*
    +	 * Start looking at non-fringe nodes.
    +	 */
    +	ai->ai_j = 1;
     
    -		if (error == 0)
    -			error = art_table_walk(ar, at, f, arg);
    +	/*
    +	 * The default route (index 1) is processed by the
    +	 * parent table (where it belongs) otherwise it could
    +	 * be processed more than once.
    +	 */
    +	ai->ai_i = 2;
    +
    +	/*
    +	 * Process the default route now.
    +	 */
    +	ahe = SMR_PTR_GET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT]);
    +	if (ahe != 0 && ahe != pahe)
    +		return (art_heap_entry_to_node(ahe));
     
    -		art_table_free(ar, at);
    +	/*
    +	 * Tell the caller to proceed with art_iter_next.
    +	 */
    +	return (NULL);
    +}
    +
    +struct art_node *
    +art_iter_open(struct art *art, struct art_iter *ai)
    +{
    +	art_heap_entry *heap = SMR_PTR_GET(&art->art_root);
    +	struct art_node *an;
    +
    +	ai->ai_art = art;
    +
    +	if (heap == NULL) {
    +		/* empty, we're already done */
    +		ai->ai_table = NULL;
    +		return (NULL);
     	}
    -	rw_exit_write(&ar->ar_lock);
     
    -	return (error);
    +	an = art_iter_descend(ai, heap, 0);
    +	if (an != NULL)
    +		return (an); /* default route */
    +
    +	return (art_iter_next(ai));
     }
     
    -int
    -art_table_walk(struct art_root *ar, struct art_table *at,
    -    int (*f)(struct art_node *, void *), void *arg)
    +struct art_node *
    +art_iter_next(struct art_iter *ai)
     {
    -	struct srp_ref		 sr;
    -	struct art_node		*node, *next;
    -	struct art_table	*nat;
    -	int			 i, j, error = 0;
    -	uint32_t		 maxfringe = (at->at_minfringe << 1);
    +	struct art_table *at = ai->ai_table;
    +	art_heap_entry *heap = at->at_heap;
    +	art_heap_entry ahe, pahe;
    +	unsigned int i;
     
    +descend:
     	/*
     	 * Iterate non-fringe nodes in ``natural'' order.
     	 */
    -	for (j = 1; j < at->at_minfringe; j += 2) {
    -		/*
    -		 * The default route (index 1) is processed by the
    -		 * parent table (where it belongs) otherwise it could
    -		 * be processed more than once.
    -		 */
    -		for (i = max(j, 2); i < at->at_minfringe; i <<= 1) {
    -			next = srp_get_locked(&at->at_heap[i >> 1].node);
    -
    -			node = srp_enter(&sr, &at->at_heap[i].node);
    -			error = art_walk_apply(ar, node, next, f, arg);
    -			srp_leave(&sr);
    +	if  (ai->ai_j < at->at_minfringe) {
    +		for (;;) {
    +			while ((i = ai->ai_i) < at->at_minfringe) {
    +				ai->ai_i = i << 1;
    +
    +				pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]);
    +				ahe = SMR_PTR_GET_LOCKED(&heap[i]);
    +				if (ahe != 0 && ahe != pahe)
    +					return (art_heap_entry_to_node(ahe));
    +			}
     
    -			if (error != 0)
    -				return (error);
    +			ai->ai_j += 2;
    +			if (ai->ai_j < at->at_minfringe)
    +				ai->ai_i = ai->ai_j;
    +			else {
    +				/* Set up the fringe loop */
    +				ai->ai_i = at->at_minfringe;
    +				break;
    +			}
     		}
     	}
     
     	/*
    -	 * Iterate fringe nodes.
    +	 * Descendent tables are only found on fringe nodes, and
    +	 * they record where they were placed in their parent. This
    +	 * allows the iterator to know where to resume traversal when
    +	 * it ascends back to the parent table. By keeping the table
    +	 * refs when going down the tree, the topolgy is preserved
    +	 * even if all the nodes are removed.
     	 */
    -	for (i = at->at_minfringe; i < maxfringe; i++) {
    -		next = srp_get_locked(&at->at_heap[i >> 1].node);
     
    -		node = srp_enter(&sr, &at->at_heap[i].node);
    -		if (!ISLEAF(node)) {
    -			nat = art_table_ref(ar, SUBTABLE(node));
    -			node = srp_follow(&sr, &nat->at_default);
    -		} else
    -			nat = NULL;
    +	for (;;) {
    +		unsigned int maxfringe = at->at_minfringe << 1;
    +		struct art_table *parent;
     
    -		error = art_walk_apply(ar, node, next, f, arg);
    -		srp_leave(&sr);
    +		/*
    +		 * Iterate fringe nodes.
    +		 */
    +		while ((i = ai->ai_i) < maxfringe) {
    +			ai->ai_i = i + 1;
     
    -		if (error != 0) {
    -			art_table_free(ar, nat);
    -			return (error);
    +			pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]);
    +			ahe = SMR_PTR_GET_LOCKED(&heap[i]);
    +			if (art_heap_entry_is_node(ahe)) {
    +				if (ahe != 0 && ahe != pahe)
    +					return (art_heap_entry_to_node(ahe));
    +			} else {
    +				struct art_node *an;
    + 
    +				heap = art_heap_entry_to_heap(ahe);
    +
    +				an = art_iter_descend(ai, heap, pahe);
    +				if (an != NULL) /* default route? */
    +					return (an);
    +
    +				/* Start looping over the child table */
    +				at = art_heap_to_table(heap);
    +				goto descend;
    +			}
     		}
     
    -		if (nat != NULL) {
    -			error = art_table_walk(ar, nat, f, arg);
    -			art_table_free(ar, nat);
    -			if (error != 0)
    -				return (error);
    +		/*
    +		 * Ascend back up to the parent
    +		 */
    +		parent = at->at_parent;
    +		ai->ai_i = at->at_index + 1;
    +		art_table_free(ai->ai_art, at);
    +
    +		ai->ai_table = parent;
    +		if (parent == NULL) {
    +			/* The root table has no parent */
    +			break;
     		}
    +
    +		at = parent;
    +		ai->ai_j = at->at_minfringe;
    +		heap = at->at_heap;
     	}
     
    -	return (0);
    +	return (NULL);
     }
     
    -int
    -art_walk_apply(struct art_root *ar,
    -    struct art_node *an, struct art_node *next,
    -    int (*f)(struct art_node *, void *), void *arg)
    +void
    +art_iter_close(struct art_iter *ai)
     {
    -	int error = 0;
    +	struct art_table *at, *parent;
     
    -	if ((an != NULL) && (an != next)) {
    -		rw_exit_write(&ar->ar_lock);
    -		error = (*f)(an, arg);
    -		rw_enter_write(&ar->ar_lock);
    +	for (at = ai->ai_table; at != NULL; at = parent) {
    +		parent = at->at_parent;
    +		art_table_free(ai->ai_art, at);
     	}
     
    -	return (error);
    +	ai->ai_table = NULL;
     }
     
    +int
    +art_walk(struct art *art, int (*f)(struct art_node *, void *), void *arg)
    +{
    +	struct art_iter		 ai;
    +	struct art_node		*an;
    +	int			 error = 0;
    +
    +	ART_FOREACH(an, art, &ai) {
    +		error = f(an, arg);
    +		if (error != 0) {
    +			art_iter_close(&ai);
    +			return (error);
    +		}
    +	}
    +
    +	return (0);
    +}
     
     /*
      * Create a table and use the given index to set its default route.
    @@ -686,89 +855,90 @@ art_walk_apply(struct art_root *ar,
      * Note:  This function does not modify the root or the parent.
      */
     struct art_table *
    -art_table_get(struct art_root *ar, struct art_table *parent, int j)
    +art_table_get(struct art *art, struct art_table *parent, unsigned int j)
     {
     	struct art_table	*at;
    -	struct art_node		*node;
    -	void			*at_heap;
    -	uint32_t		 lvl;
    +	art_heap_entry		*heap;
    +	unsigned int		 level;
    +	unsigned int		 bits;
     
    -	KASSERT(j != 0 && j != 1);
    +	KASSERT(j != ART_HEAP_IDX_TABLE && j != ART_HEAP_IDX_DEFAULT);
     	KASSERT(parent != NULL || j == -1);
     
    -	if (parent != NULL)
    -		lvl = parent->at_level + 1;
    -	else
    -		lvl = 0;
    -
    -	KASSERT(lvl < ar->ar_nlvl);
    +	level = (parent != NULL) ? parent->at_level + 1 : 0;
    +	KASSERT(level < art->art_nlevels);
     
     	at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO);
     	if (at == NULL)
     		return (NULL);
     
    -	switch (AT_HEAPSIZE(ar->ar_bits[lvl])) {
    -	case AT_HEAPSIZE(4):
    -		at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
    +	bits = art->art_levels[level];
    +	switch (bits) {
    +	case 4:
    +		heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
     		break;
    -	case AT_HEAPSIZE(8):
    -		at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
    +	case 8:
    +		heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
     		break;
     	default:
    -		panic("incorrect stride length %u", ar->ar_bits[lvl]);
    +		panic("incorrect stride length %u", bits);
     	}
     
    -	if (at_heap == NULL) {
    +	if (heap == NULL) {
     		pool_put(&at_pool, at);
     		return (NULL);
     	}
     
    +	heap[ART_HEAP_IDX_TABLE] = (art_heap_entry)at;
    +
     	at->at_parent = parent;
     	at->at_index = j;
    -	at->at_minfringe = (1 << ar->ar_bits[lvl]);
    -	at->at_level = lvl;
    -	at->at_bits = ar->ar_bits[lvl];
    -	at->at_heap = at_heap;
    +	at->at_minfringe = (1 << bits);
    +	at->at_level = level;
    +	at->at_bits = bits;
    +	at->at_heap = heap;
     	at->at_refcnt = 0;
     
     	if (parent != NULL) {
    -		node = srp_get_locked(&parent->at_heap[j].node);
    -		/* node isn't being deleted, no srp_finalize needed */
    -		srp_swap_locked(&at->at_default, node);
    +		art_heap_entry ahe;
    +
     		at->at_offset = (parent->at_offset + parent->at_bits);
    +
    +		ahe = SMR_PTR_GET_LOCKED(&parent->at_heap[j]);
    +		SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe);
     	}
     
     	return (at);
     }
     
    -
     /*
      * Delete a table and use its index to restore its parent's default route.
      *
      * Note:  Modify its parent to unlink the table from it.
      */
     struct art_table *
    -art_table_put(struct art_root *ar, struct art_table *at)
    +art_table_put(struct art *art, struct art_table *at)
     {
     	struct art_table	*parent = at->at_parent;
    -	struct art_node		*node;
    -	uint32_t		 j = at->at_index;
    +	unsigned int		 j = at->at_index;
     
     	KASSERT(at->at_refcnt == 0);
     	KASSERT(j != 0 && j != 1);
     
     	if (parent != NULL) {
    +		art_heap_entry ahe;
    +
     		KASSERT(j != -1);
     		KASSERT(at->at_level == parent->at_level + 1);
     		KASSERT(parent->at_refcnt >= 1);
     
     		/* Give the route back to its parent. */
    -		node = srp_get_locked(&at->at_default);
    -		srp_swap_locked(&parent->at_heap[j].node, node);
    +		ahe = SMR_PTR_GET_LOCKED(&at->at_heap[ART_HEAP_IDX_DEFAULT]);
    +		SMR_PTR_SET_LOCKED(&parent->at_heap[j], ahe);
     	} else {
     		KASSERT(j == -1);
     		KASSERT(at->at_level == 0);
    -		srp_swap_locked(&ar->ar_root, NULL);
    +		SMR_PTR_SET_LOCKED(&art->art_root, NULL);
     	}
     
     	mtx_enter(&art_table_gc_mtx);
    @@ -791,19 +961,16 @@ art_table_gc(void *null)
     	art_table_gc_list = NULL;
     	mtx_leave(&art_table_gc_mtx);
     
    +	smr_barrier();
    +
     	while (at != NULL) {
     		next = at->at_parent;
     
    -		if (at->at_level == 0)
    -			srp_finalize(at, "arttfini");
    -		else
    -			srp_finalize(ASNODE(at), "arttfini");
    -
    -		switch (AT_HEAPSIZE(at->at_bits)) {
    -		case AT_HEAPSIZE(4):
    +		switch (at->at_bits) {
    +		case 4:
     			pool_put(&at_heap_4_pool, at->at_heap);
     			break;
    -		case AT_HEAPSIZE(8):
    +		case 8:
     			pool_put(&at_heap_8_pool, at->at_heap);
     			break;
     		default:
    @@ -839,40 +1006,45 @@ art_table_gc(void *null)
      *		art_allot(at, k, old, new);
      *	}
      */
    -void
    -art_allot(struct art_table *at, int i, struct art_node *old,
    -    struct art_node *new)
    -{
    -	struct art_node		*node, *dflt;
    -	int			 k = i;
    +static void
    +art_allot(struct art_table *at, unsigned int i,
    +    art_heap_entry oahe, art_heap_entry nahe)
    +{
    +	art_heap_entry		 ahe, *ahep;
    +	art_heap_entry		*heap;
    +	unsigned int		 k = i;
     
     	KASSERT(i < at->at_minfringe);
     
    +	heap = at->at_heap;
    +
     again:
     	k <<= 1;
     	if (k < at->at_minfringe)
     		goto nonfringe;
     
     	/* Change fringe nodes. */
    -	while (1) {
    -		node = srp_get_locked(&at->at_heap[k].node);
    -		if (!ISLEAF(node)) {
    -			dflt = srp_get_locked(&SUBTABLE(node)->at_default);
    -			if (dflt == old) {
    -				srp_swap_locked(&SUBTABLE(node)->at_default,
    -				    new);
    -			}
    -		} else if (node == old) {
    -			srp_swap_locked(&at->at_heap[k].node, new);
    +	for (;;) {
    +		ahep = &heap[k];
    +		ahe = SMR_PTR_GET_LOCKED(ahep);
    +
    +		if (!art_heap_entry_is_node(ahe)) {
    +			art_heap_entry *child = art_heap_entry_to_heap(ahe);
    +			ahep = &child[ART_HEAP_IDX_DEFAULT];
    +			ahe = SMR_PTR_GET_LOCKED(ahep);
     		}
    +
    +		if (ahe == oahe)
    +			SMR_PTR_SET_LOCKED(ahep, nahe);
    +
     		if (k % 2)
     			goto moveup;
     		k++;
     	}
     
     nonfringe:
    -	node = srp_get_locked(&at->at_heap[k].node);
    -	if (node == old)
    +	ahe = SMR_PTR_GET_LOCKED(&heap[k]);
    +	if (ahe == oahe)
     		goto again;
     moveon:
     	if (k % 2)
    @@ -881,7 +1053,7 @@ moveon:
     	goto nonfringe;
     moveup:
     	k >>= 1;
    -	srp_swap_locked(&at->at_heap[k].node, new);
    +	SMR_PTR_SET_LOCKED(&heap[k], nahe);
     
     	/* Change non-fringe node. */
     	if (k != i)
    @@ -889,7 +1061,7 @@ moveup:
     }
     
     struct art_node *
    -art_get(uint8_t plen)
    +art_get(const uint8_t *addr, unsigned int plen)
     {
     	struct art_node		*an;
     
    @@ -897,17 +1069,25 @@ art_get(uint8_t plen)
     	if (an == NULL)
     		return (NULL);
     
    -	an->an_plen = plen;
    -	SRPL_INIT(&an->an_rtlist);
    +	art_node_init(an, addr, plen);
     
     	return (an);
     }
     
     void
    -art_put(struct art_node *an)
    +art_node_init(struct art_node *an, const uint8_t *addr, unsigned int plen)
     {
    -	KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist));
    +	size_t len;
    +
    +	len = roundup(plen, 8) / 8;
    +	KASSERT(len <= sizeof(an->an_addr));
    +	memcpy(an->an_addr, addr, len);
    +	an->an_plen = plen;
    +}
     
    +void
    +art_put(struct art_node *an)
    +{
     	mtx_enter(&art_node_gc_mtx);
     	an->an_gc = art_node_gc_list;
     	art_node_gc_list = an;
    @@ -919,17 +1099,17 @@ art_put(struct art_node *an)
     void
     art_gc(void *null)
     {
    -	struct art_node		*an, *next;
    +	struct art_node		*an;
     
     	mtx_enter(&art_node_gc_mtx);
     	an = art_node_gc_list;
     	art_node_gc_list = NULL;
     	mtx_leave(&art_node_gc_mtx);
     
    -	while (an != NULL) {
    -		next = an->an_gc;
    +	smr_barrier();
     
    -		srp_finalize(an, "artnfini");
    +	while (an != NULL) {
    +		struct art_node *next = an->an_gc;
     
     		pool_put(&an_pool, an);
     
    Index: sys/net/art.h
    ===================================================================
    RCS file: /cvs/src/sys/net/art.h,v
    diff -u -p -r1.25 art.h
    --- sys/net/art.h	11 Nov 2023 12:17:50 -0000	1.25
    +++ sys/net/art.h	29 Jun 2025 11:35:03 -0000
    @@ -19,94 +19,151 @@
     #ifndef _NET_ART_H_
     #define _NET_ART_H_
     
    -#include <sys/rwlock.h>
    -#include <sys/srp.h>
    +/*
    + * Allotment Routing Table (ART)
    + *
    + * Yoichi Hariguchi paper can be found at:
    + *      http://www.hariguchi.org/art/art.pdf
    + *
    + * Locking:
    + *
    + * Modification (ie, art_insert or art_delete) and iteration
    + * (art_iter_next, etc) over the ART must be serialised by the caller.
    + * Lookups (ie, art_match and art_lookup) run within an SMR critical
    + * section.
    + *
    + * Iteration requires serialisation as it manipulates the reference
    + * counts on tables as it traverses the tree. The iterator maintains
    + * these references until it runs out of entries. This allows code
    + * iterating over the ART to release locks in between calls to
    + * art_iter_open and art_iter_next. The references may be dropped
    + * early with art_iter_close.
    + *
    + * Note, the iterator does not hold a reference to the art_node
    + * structure or the data hanging off the an_value pointer, they must
    + * be accounted for separately or their use must be serialised with
    + * art_delete.
    + */
     
    -#define ART_MAXLVL	32	/* We currently use 32 levels for IPv6. */
    +typedef uintptr_t		 art_heap_entry;
     
     /*
    - * Root of the ART tables, equivalent to the radix head.
    - *
    - *  Locks used to protect struct members in this file:
    - *	I	immutable after creation
    - *	l	root's `ar_lock'
    - *	K	kernel lock
    - *  For SRP related structures that allow lock-free reads, the write lock
    - *  is indicated below.
    + * Root of the ART, equivalent to the radix head.
      */
    -struct art_root {
    -	struct srp		 ar_root;	/* [l] First table */
    -	struct rwlock		 ar_lock;	/* [] Serialise modifications */
    -	uint8_t			 ar_bits[ART_MAXLVL]; /* [I] Per level stride */
    -	uint8_t			 ar_nlvl;	/* [I] Number of levels */
    -	uint8_t			 ar_alen;	/* [I] Address length in bits */
    -	uint8_t			 ar_off;	/* [I] Offset of key in bytes */
    -	struct sockaddr		*ar_source;	/* [N] use optional src addr */
    -};
     
    -#define ISLEAF(e)	(((unsigned long)(e) & 1) == 0)
    -#define SUBTABLE(e)	((struct art_table *)((unsigned long)(e) & ~1))
    -#define ASNODE(t)	((struct art_node *)((unsigned long)(t) | 1))
    +struct art {
    +	art_heap_entry		*art_root;
    +	const unsigned int	*art_levels;
    +	unsigned int		 art_nlevels;
    +	unsigned int		 art_alen;
    +};
     
     /*
      * Allotment Table.
      */
     struct art_table {
    +	art_heap_entry		*at_heap;
     	struct art_table	*at_parent;	/* Parent table */
    -	uint32_t		 at_index;	/* Index in the parent table */
    -	uint32_t		 at_minfringe;	/* Index that fringe begins */
    -	uint32_t		 at_level;	/* Level of the table */
    -	uint8_t			 at_bits;	/* Stride length of the table */
    -	uint8_t			 at_offset;	/* Sum of parents' stride len */
    -
    -	/*
    -	 * Items stored in the heap are pointers to nodes, in the leaf
    -	 * case, or tables otherwise.  One exception is index 0 which
    -	 * is a route counter.
    -	 */
    -	union {
    -		struct srp		 node;
    -		unsigned long		 count;
    -	} *at_heap;				/* Array of 2^(slen+1) items */
    +
    +	unsigned int		 at_index;	/* Index in the parent table */
    +	unsigned int		 at_minfringe;	/* Index that fringe begins */
    +
    +	unsigned int		 at_level;	/* Level of the table */
    +	unsigned int		 at_bits;	/* Stride length of the table */
    +	unsigned int		 at_offset;	/* Sum of parents' stride len */
    +
    +	unsigned int		 at_refcnt;
     };
    -#define	at_refcnt	at_heap[0].count/* Refcounter (1 per different route) */
    -#define	at_default	at_heap[1].node	/* Default route (was in parent heap) */
     
    -/* Heap size for an ART table of stride length ``slen''. */
    -#define AT_HEAPSIZE(slen)	((1 << ((slen) + 1)) * sizeof(void *))
    +#define ART_HEAP_IDX_TABLE	0
    +#define ART_HEAP_IDX_DEFAULT	1
     
    -/*
    - * Forward declaration needed for the list of mpath routes
    - * attached to a single ART node.
    - */
    -struct rtentry;
    +#define AT_HEAPSIZE(bits)	((1 << ((bits) + 1)) * sizeof(art_heap_entry))
     
     /*
      * A node is the internal representation of a route entry.
      */
     struct art_node {
    +	void			*an_value;
     	union {
    -	    SRPL_HEAD(, rtentry) an__rtlist;	/* Route related to this node */
    -	    struct art_node	*an__gc;	/* Entry on GC list */
    -	}			 an_pointer;
    -	uint8_t			 an_plen;	/* Prefix length */
    +		struct art_node		*an__gc;
    +		uint8_t			 an__addr[16];
    +	}			 an__u;
    +#define an_gc			 an__u.an__gc
    +#define an_addr			 an__u.an__addr
    +	unsigned int		 an_plen;
     };
    -#define an_rtlist	an_pointer.an__rtlist
    -#define an_gc		an_pointer.an__gc
     
    -void		 art_init(void);
    -struct art_root *art_alloc(unsigned int, unsigned int, unsigned int);
    -struct art_node *art_insert(struct art_root *, struct art_node *, const void *,
    -		     int);
    -struct art_node *art_delete(struct art_root *, struct art_node *, const void *,
    -		     int);
    -struct art_node *art_match(struct art_root *, const void *, struct srp_ref *);
    -struct art_node *art_lookup(struct art_root *, const void *, int,
    -		     struct srp_ref *);
    -int		 art_walk(struct art_root *,
    +static inline struct art_table *
    +art_heap_to_table(art_heap_entry *heap)
    +{
    +	return ((struct art_table *)heap[ART_HEAP_IDX_TABLE]);
    +}
    +
    +static inline int
    +art_heap_entry_is_node(art_heap_entry ahe)
    +{
    +	return ((ahe & 1UL) == 0);
    +}
    +
    +static inline struct art_node *
    +art_heap_entry_to_node(art_heap_entry ahe)
    +{
    +	return ((struct art_node *)ahe);
    +}
    +
    +static inline art_heap_entry *
    +art_heap_entry_to_heap(art_heap_entry ahe)
    +{
    +	return ((art_heap_entry *)(ahe & ~1UL));
    +}
    +
    +static inline art_heap_entry
    +art_node_to_heap_entry(struct art_node *an)
    +{
    +	return ((art_heap_entry)an);
    +}
    +
    +static inline art_heap_entry
    +art_heap_to_heap_entry(art_heap_entry *heap)
    +{
    +	return ((art_heap_entry)heap | 1UL);
    +}
    +
    +#ifdef _KERNEL
    +void		 art_boot(void);
    +struct art	*art_alloc(unsigned int);
    +void		 art_init(struct art *, unsigned int);
    +struct art_node *art_insert(struct art *, struct art_node *);
    +struct art_node *art_delete(struct art *, const void *, unsigned int);
    +struct art_node *art_match(struct art *, const void *);
    +struct art_node *art_lookup(struct art *, const void *, unsigned int);
    +int		 art_is_empty(struct art *);
    +
    +struct art_node	*art_get(const uint8_t *, unsigned int);
    +void		 art_node_init(struct art_node *,
    +		     const uint8_t *, unsigned int);
    +void		 art_put(struct art_node *);
    +
    +struct art_iter {
    +	struct art		*ai_art;
    +	struct art_table	*ai_table;
    +	unsigned int		 ai_j;
    +	unsigned int		 ai_i;
    +};
    +
    +struct art_node	*art_iter_open(struct art *, struct art_iter *);
    +struct art_node	*art_iter_next(struct art_iter *);
    +void		 art_iter_close(struct art_iter *);
    +
    +#define ART_FOREACH(_an, _art, _ai)					\
    +	for ((_an) = art_iter_open((_art), (_ai));			\
    +	     (_an) != NULL;						\
    +	     (_an) = art_iter_next((_ai)))
    +
    +int		 art_walk(struct art *,
     		     int (*)(struct art_node *, void *), void *);
     
    -struct art_node *art_get(uint8_t);
    -void		 art_put(struct art_node *);
    +#endif /* _KERNEL */
     
     #endif /* _NET_ART_H_ */
    Index: sys/net/if_wg.c
    ===================================================================
    RCS file: /cvs/src/sys/net/if_wg.c,v
    diff -u -p -r1.42 if_wg.c
    --- sys/net/if_wg.c	17 Apr 2025 12:42:50 -0000	1.42
    +++ sys/net/if_wg.c	29 Jun 2025 11:35:03 -0000
    @@ -31,6 +31,7 @@
     #include <sys/ioctl.h>
     #include <sys/mbuf.h>
     #include <sys/syslog.h>
    +#include <sys/smr.h>
     
     #include <net/if.h>
     #include <net/if_var.h>
    @@ -251,10 +252,11 @@ struct wg_softc {
     	struct socket		*sc_so6;
     #endif
     
    +	struct rwlock		 sc_aip_lock;
     	size_t			 sc_aip_num;
    -	struct art_root		*sc_aip4;
    +	struct art		*sc_aip4;
     #ifdef INET6
    -	struct art_root		*sc_aip6;
    +	struct art		*sc_aip6;
     #endif
     
     	struct rwlock		 sc_peer_lock;
    @@ -290,7 +292,7 @@ void	wg_peer_counters_add(struct wg_peer
     
     int	wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
     struct wg_peer *
    -	wg_aip_lookup(struct art_root *, void *);
    +	wg_aip_lookup(struct art *, void *);
     int	wg_aip_remove(struct wg_softc *, struct wg_peer *,
     	    struct wg_aip_io *);
     
    @@ -609,7 +611,7 @@ wg_peer_counters_add(struct wg_peer *pee
     int
     wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
     {
    -	struct art_root	*root;
    +	struct art	*root;
     	struct art_node	*node;
     	struct wg_aip	*aip;
     	int		 ret = 0;
    @@ -625,8 +627,10 @@ wg_aip_add(struct wg_softc *sc, struct w
     	if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL)
     		return ENOBUFS;
     
    -	rw_enter_write(&root->ar_lock);
    -	node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
    +	art_node_init(&aip->a_node, &d->a_addr.addr_bytes, d->a_cidr);
    +
    +	rw_enter_write(&sc->sc_aip_lock);
    +	node = art_insert(root, &aip->a_node);
     
     	if (node == &aip->a_node) {
     		aip->a_peer = peer;
    @@ -642,18 +646,18 @@ wg_aip_add(struct wg_softc *sc, struct w
     			aip->a_peer = peer;
     		}
     	}
    -	rw_exit_write(&root->ar_lock);
    +	rw_exit_write(&sc->sc_aip_lock);
     	return ret;
     }
     
     struct wg_peer *
    -wg_aip_lookup(struct art_root *root, void *addr)
    +wg_aip_lookup(struct art *root, void *addr)
     {
    -	struct srp_ref	 sr;
     	struct art_node	*node;
     
    -	node = art_match(root, addr, &sr);
    -	srp_leave(&sr);
    +	smr_read_enter();
    +	node = art_match(root, addr);
    +	smr_read_leave();
     
     	return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
     }
    @@ -661,8 +665,7 @@ wg_aip_lookup(struct art_root *root, voi
     int
     wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
     {
    -	struct srp_ref	 sr;
    -	struct art_root	*root;
    +	struct art	*root;
     	struct art_node	*node;
     	struct wg_aip	*aip;
     	int		 ret = 0;
    @@ -675,23 +678,24 @@ wg_aip_remove(struct wg_softc *sc, struc
     	default: return EAFNOSUPPORT;
     	}
     
    -	rw_enter_write(&root->ar_lock);
    -	if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
    +	rw_enter_write(&sc->sc_aip_lock);
    +	smr_read_enter();
    +	node = art_lookup(root, &d->a_addr, d->a_cidr);
    +	smr_read_leave();
    +	if (node == NULL) {
     		ret = ENOENT;
     	} else if (((struct wg_aip *) node)->a_peer != peer) {
     		ret = EXDEV;
     	} else {
     		aip = (struct wg_aip *)node;
    -		if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
    +		if (art_delete(root, &d->a_addr, d->a_cidr) == NULL)
     			panic("art_delete failed to delete node %p", node);
     
     		sc->sc_aip_num--;
     		LIST_REMOVE(aip, a_entry);
     		pool_put(&wg_aip_pool, aip);
     	}
    -
    -	srp_leave(&sr);
    -	rw_exit_write(&root->ar_lock);
    +	rw_exit_write(&sc->sc_aip_lock);
     	return ret;
     }
     
    @@ -2726,10 +2730,11 @@ wg_clone_create(struct if_clone *ifc, in
     #endif
     
     	sc->sc_aip_num = 0;
    -	if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
    +	rw_init(&sc->sc_aip_lock, "wgaip");
    +	if ((sc->sc_aip4 = art_alloc(32)) == NULL)
     		goto ret_02;
     #ifdef INET6
    -	if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
    +	if ((sc->sc_aip6 = art_alloc(128)) == NULL)
     		goto ret_03;
     #endif
     
    Index: sys/net/if_wg.h
    ===================================================================
    RCS file: /cvs/src/sys/net/if_wg.h,v
    diff -u -p -r1.6 if_wg.h
    --- sys/net/if_wg.h	13 Oct 2024 00:53:21 -0000	1.6
    +++ sys/net/if_wg.h	29 Jun 2025 11:35:03 -0000
    @@ -46,6 +46,7 @@ struct wg_aip_io {
     	sa_family_t	 a_af;
     	int		 a_cidr;
     	union wg_aip_addr {
    +		uint8_t			addr_bytes;
     		struct in_addr		addr_ipv4;
     		struct in6_addr		addr_ipv6;
     	}		 a_addr;
    Index: sys/net/route.h
    ===================================================================
    RCS file: /cvs/src/sys/net/route.h,v
    diff -u -p -r1.216 route.h
    --- sys/net/route.h	16 Mar 2025 21:58:08 -0000	1.216
    +++ sys/net/route.h	29 Jun 2025 11:35:03 -0000
    @@ -40,7 +40,7 @@
      *	I	immutable after creation
      *	N	net lock
      *	X	exclusive net lock, or shared net lock + kernel lock
    - *	R	art (rtable) lock
    + *	R	rtable lock
      *	r	per route entry mutex	rt_mtx
      *	L	arp/nd6/etc lock for updates, net lock for reads
      *	T	rttimer_mtx		route timer lists
    @@ -117,7 +117,7 @@ struct rttimer;
     
     struct rtentry {
     	struct sockaddr	*rt_dest;	/* [I] destination */
    -	SRPL_ENTRY(rtentry) rt_next;	/* [R] next mpath entry to our dst */
    +	struct rtentry	*rt_next;	/* [R] next mpath entry to our dst */
     	struct sockaddr	*rt_gateway;	/* [X] gateway address */
     	struct ifaddr	*rt_ifa;	/* [N] interface addr to use */
     	caddr_t		 rt_llinfo;	/* [L] pointer to link level info or
    Index: sys/net/rtable.c
    ===================================================================
    RCS file: /cvs/src/sys/net/rtable.c,v
    diff -u -p -r1.87 rtable.c
    --- sys/net/rtable.c	9 Apr 2024 12:53:08 -0000	1.87
    +++ sys/net/rtable.c	29 Jun 2025 11:35:03 -0000
    @@ -26,12 +26,21 @@
     #include <sys/queue.h>
     #include <sys/domain.h>
     #include <sys/srp.h>
    +#include <sys/smr.h>
     #endif
     
     #include <net/rtable.h>
     #include <net/route.h>
     #include <net/art.h>
     
    +struct rtable {
    +	struct rwlock		 r_lock;
    +	struct art		*r_art;		/* [I] */
    +	unsigned int		 r_off;		/* [I] Offset of key in bytes */
    +
    +	struct sockaddr		*r_source;	/* [N] use optional src addr */
    +};
    +
     /*
      * Structures used by rtable_get() to retrieve the corresponding
      * routing table for a given pair of ``af'' and ``rtableid''.
    @@ -85,8 +94,8 @@ void		   rtmap_dtor(void *, void *);
     struct srp_gc	   rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL);
     
     void		   rtable_init_backend(void);
    -void		  *rtable_alloc(unsigned int, unsigned int, unsigned int);
    -void		  *rtable_get(unsigned int, sa_family_t);
    +struct rtable	  *rtable_alloc(unsigned int, unsigned int, unsigned int);
    +struct rtable	  *rtable_get(unsigned int, sa_family_t);
     
     void
     rtmap_init(void)
    @@ -193,7 +202,7 @@ int
     rtable_add(unsigned int id)
     {
     	const struct domain	*dp;
    -	void			*tbl;
    +	struct rtable		*tbl;
     	struct rtmap		*map;
     	struct dommp		*dmm;
     	sa_family_t		 af;
    @@ -244,11 +253,11 @@ out:
     	return (error);
     }
     
    -void *
    +struct rtable *
     rtable_get(unsigned int rtableid, sa_family_t af)
     {
     	struct rtmap	*map;
    -	void		*tbl = NULL;
    +	struct rtable	*tbl = NULL;
     	struct srp_ref	 sr;
     
     	if (af >= nitems(af2idx) || af2idx[af] == 0)
    @@ -286,7 +295,7 @@ rtable_empty(unsigned int rtableid)
     {
     	const struct domain	*dp;
     	int			 i;
    -	struct art_root		*tbl;
    +	struct rtable		*tbl;
     
     	for (i = 0; (dp = domains[i]) != NULL; i++) {
     		if (dp->dom_rtoffset == 0)
    @@ -295,7 +304,7 @@ rtable_empty(unsigned int rtableid)
     		tbl = rtable_get(rtableid, dp->dom_family);
     		if (tbl == NULL)
     			continue;
    -		if (tbl->ar_root.ref != NULL)
    +		if (!art_is_empty(tbl->r_art))
     			return (0);
     	}
     
    @@ -350,40 +359,51 @@ rtable_l2set(unsigned int rtableid, unsi
     }
     
     
    -static inline const uint8_t *satoaddr(struct art_root *,
    +static inline const uint8_t *satoaddr(struct rtable *,
         const struct sockaddr *);
     
    -int	an_match(struct art_node *, const struct sockaddr *, int);
    -void	rtentry_ref(void *, void *);
    -void	rtentry_unref(void *, void *);
    -
     void	rtable_mpath_insert(struct art_node *, struct rtentry *);
     
    -struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL);
    -
     void
     rtable_init_backend(void)
     {
    -	art_init();
    +	art_boot();
     }
     
    -void *
    +struct rtable *
     rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
     {
    -	return (art_alloc(rtableid, alen, off));
    +	struct rtable *r;
    +
    +	r = malloc(sizeof(*r), M_RTABLE, M_NOWAIT|M_ZERO);
    +	if (r == NULL)
    +		return (r);
    +
    +
    +	r->r_art = art_alloc(alen);
    +	if (r->r_art == NULL) {
    +		free(r, M_RTABLE, sizeof(*r));
    +		return (NULL);
    +	}
    +
    +	rw_init(&r->r_lock, "rtable");
    +	r->r_off = off;
    +	r->r_source = NULL;
    +
    +	return (r);
     }
     
     int
     rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src)
     {
    -	struct art_root		*ar;
    +	struct rtable		*r;
     
     	NET_ASSERT_LOCKED_EXCLUSIVE();
     
    -	if ((ar = rtable_get(rtableid, af)) == NULL)
    +	if ((r = rtable_get(rtableid, af)) == NULL)
     		return (EAFNOSUPPORT);
     
    -	ar->ar_source = src;
    +	r->r_source = src;
     
     	return (0);
     }
    @@ -391,15 +411,15 @@ rtable_setsource(unsigned int rtableid, 
     struct sockaddr *
     rtable_getsource(unsigned int rtableid, int af)
     {
    -	struct art_root		*ar;
    +	struct rtable		*r;
     
     	NET_ASSERT_LOCKED();
     
    -	ar = rtable_get(rtableid, af);
    -	if (ar == NULL)
    +	r = rtable_get(rtableid, af);
    +	if (r == NULL)
     		return (NULL);
     
    -	return (ar->ar_source);
    +	return (r->r_source);
     }
     
     void
    @@ -419,37 +439,34 @@ struct rtentry *
     rtable_lookup(unsigned int rtableid, const struct sockaddr *dst,
         const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio)
     {
    -	struct art_root			*ar;
    +	struct rtable			*r;
     	struct art_node			*an;
     	struct rtentry			*rt = NULL;
    -	struct srp_ref			 sr, nsr;
     	const uint8_t			*addr;
     	int				 plen;
     
    -	ar = rtable_get(rtableid, dst->sa_family);
    -	if (ar == NULL)
    +	r = rtable_get(rtableid, dst->sa_family);
    +	if (r == NULL)
     		return (NULL);
     
    -	addr = satoaddr(ar, dst);
    +	addr = satoaddr(r, dst);
     
    -	/* No need for a perfect match. */
    +	smr_read_enter();
     	if (mask == NULL) {
    -		an = art_match(ar, addr, &nsr);
    -		if (an == NULL)
    -			goto out;
    +		/* No need for a perfect match. */
    +		an = art_match(r->r_art, addr);
     	} else {
     		plen = rtable_satoplen(dst->sa_family, mask);
     		if (plen == -1)
    -			return (NULL);
    -
    -		an = art_lookup(ar, addr, plen, &nsr);
    -
    -		/* Make sure we've got a perfect match. */
    -		if (!an_match(an, dst, plen))
     			goto out;
    +
    +		an = art_lookup(r->r_art, addr, plen);
     	}
    +	if (an == NULL)
    +		goto out;
     
    -	SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) {
    +	for (rt = SMR_PTR_GET(&an->an_value); rt != NULL;
    +	    rt = SMR_PTR_GET(&rt->rt_next)) {
     		if (prio != RTP_ANY &&
     		    (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK))
     			continue;
    @@ -464,9 +481,8 @@ rtable_lookup(unsigned int rtableid, con
     	if (rt != NULL)
     		rtref(rt);
     
    -	SRPL_LEAVE(&sr);
     out:
    -	srp_leave(&nsr);
    +	smr_read_leave();
     
     	return (rt);
     }
    @@ -474,44 +490,41 @@ out:
     struct rtentry *
     rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src)
     {
    -	struct art_root			*ar;
    +	struct rtable			*r;
     	struct art_node			*an;
     	struct rtentry			*rt = NULL;
    -	struct srp_ref			 sr, nsr;
     	const uint8_t			*addr;
     	int				 hash;
    +	uint8_t				 prio;
     
    -	ar = rtable_get(rtableid, dst->sa_family);
    -	if (ar == NULL)
    +	r = rtable_get(rtableid, dst->sa_family);
    +	if (r == NULL)
     		return (NULL);
     
    -	addr = satoaddr(ar, dst);
    +	addr = satoaddr(r, dst);
     
    -	an = art_match(ar, addr, &nsr);
    +	smr_read_enter();
    +	an = art_match(r->r_art, addr);
     	if (an == NULL)
     		goto out;
     
    -	rt = SRPL_FIRST(&sr, &an->an_rtlist);
    -	if (rt == NULL) {
    -		SRPL_LEAVE(&sr);
    -		goto out;
    -	}
    -	rtref(rt);
    -	SRPL_LEAVE(&sr);
    +	rt = SMR_PTR_GET(&an->an_value);
    +	KASSERT(rt != NULL);
    +	prio = rt->rt_priority;
     
     	/* Gateway selection by Hash-Threshold (RFC 2992) */
     	if ((hash = rt_hash(rt, dst, src)) != -1) {
     		struct rtentry		*mrt;
    -		int			 threshold, npaths = 0;
    +		int			 threshold, npaths = 1;
     
     		KASSERT(hash <= 0xffff);
     
    -		SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) {
    -			/* Only count nexthops with the same priority. */
    -			if (mrt->rt_priority == rt->rt_priority)
    +		/* Only count nexthops with the same priority. */
    +		mrt = rt;
    +		while ((mrt = SMR_PTR_GET(&mrt->rt_next)) != NULL) {
    +			if (mrt->rt_priority == prio)
     				npaths++;
     		}
    -		SRPL_LEAVE(&sr);
     
     		threshold = (0xffff / npaths) + 1;
     
    @@ -520,27 +533,22 @@ rtable_match(unsigned int rtableid, cons
     		 * route list attached to the node, so we won't necessarily
     		 * have the same number of routes.  for most modifications,
     		 * we'll pick a route that we wouldn't have if we only saw the
    -		 * list before or after the change.  if we were going to use
    -		 * the last available route, but it got removed, we'll hit
    -		 * the end of the list and then pick the first route.
    +		 * list before or after the change.
     		 */
    -
    -		mrt = SRPL_FIRST(&sr, &an->an_rtlist);
    -		while (hash > threshold && mrt != NULL) {
    -			if (mrt->rt_priority == rt->rt_priority)
    +		mrt = rt;
    +		while (hash > threshold) {
    +			if (mrt->rt_priority == prio) {
    +				rt = mrt;
     				hash -= threshold;
    -			mrt = SRPL_FOLLOW(&sr, mrt, rt_next);
    -		}
    -
    -		if (mrt != NULL) {
    -			rtref(mrt);
    -			rtfree(rt);
    -			rt = mrt;
    +			}
    +			mrt = SMR_PTR_GET(&mrt->rt_next);
    +			if (mrt == NULL)
    +				break;
     		}
    -		SRPL_LEAVE(&sr);
     	}
    +	rtref(rt);
     out:
    -	srp_leave(&nsr);
    +	smr_read_leave();
     	return (rt);
     }
     
    @@ -549,35 +557,59 @@ rtable_insert(unsigned int rtableid, str
         const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio,
         struct rtentry *rt)
     {
    -	struct rtentry			*mrt;
    -	struct srp_ref			 sr;
    -	struct art_root			*ar;
    +	struct rtable			*r;
     	struct art_node			*an, *prev;
     	const uint8_t			*addr;
     	int				 plen;
     	unsigned int			 rt_flags;
     	int				 error = 0;
     
    -	ar = rtable_get(rtableid, dst->sa_family);
    -	if (ar == NULL)
    +	r = rtable_get(rtableid, dst->sa_family);
    +	if (r == NULL)
     		return (EAFNOSUPPORT);
     
    -	addr = satoaddr(ar, dst);
    +	addr = satoaddr(r, dst);
     	plen = rtable_satoplen(dst->sa_family, mask);
     	if (plen == -1)
     		return (EINVAL);
     
    -	rtref(rt); /* guarantee rtfree won't do anything during insert */
    -	rw_enter_write(&ar->ar_lock);
    +	an = art_get(addr, plen);
    +	if (an == NULL)
    +		return (ENOMEM);
    +
    +	/* prepare for immediate operation if insert succeeds */
    +	rt_flags = rt->rt_flags;
    +	rt->rt_flags &= ~RTF_MPATH;
    +	rt->rt_dest = dst;
    +	rt->rt_plen = plen;
    +	rt->rt_next = NULL;
    +
    +	rtref(rt); /* take a ref for the table */
    +	an->an_value = rt;
    +
    +	rw_enter_write(&r->r_lock);
    +	prev = art_insert(r->r_art, an);
    +	if (prev == NULL) {
    +		error = ENOMEM;
    +		goto put;
    +	}
    +
    +	if (prev != an) {
    +		struct rtentry *mrt;
    +		int mpathok = ISSET(rt_flags, RTF_MPATH);
    +		int mpath = 0;
     
    -	/* Do not permit exactly the same dst/mask/gw pair. */
    -	an = art_lookup(ar, addr, plen, &sr);
    -	srp_leave(&sr); /* an can't go away while we have the lock */
    -	if (an_match(an, dst, plen)) {
    -		struct rtentry  *mrt;
    -		int		 mpathok = ISSET(rt->rt_flags, RTF_MPATH);
    +		/*
    +		 * An ART node with the same destination/netmask already
    +		 * exists.
    +		 */
    +		art_put(an);
    +		an = prev;
     
    -		SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) {
    +		/* Do not permit exactly the same dst/mask/gw pair. */
    +		for (mrt = SMR_PTR_GET_LOCKED(&an->an_value);
    +		     mrt != NULL;
    +		     mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) {
     			if (prio != RTP_ANY &&
     			    (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK))
     				continue;
    @@ -589,63 +621,34 @@ rtable_insert(unsigned int rtableid, str
     				error = EEXIST;
     				goto leave;
     			}
    +			mpath = RTF_MPATH;
     		}
    -	}
    -
    -	an = art_get(plen);
    -	if (an == NULL) {
    -		error = ENOBUFS;
    -		goto leave;
    -	}
    -
    -	/* prepare for immediate operation if insert succeeds */
    -	rt_flags = rt->rt_flags;
    -	rt->rt_flags &= ~RTF_MPATH;
    -	rt->rt_dest = dst;
    -	rt->rt_plen = plen;
    -	SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
    -
    -	prev = art_insert(ar, an, addr, plen);
    -	if (prev != an) {
    -		SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
    -		    rt_next);
    -		rt->rt_flags = rt_flags;
    -		art_put(an);
    -
    -		if (prev == NULL) {
    -			error = ESRCH;
    -			goto leave;
    -		}
    -
    -		an = prev;
     
    -		mrt = SRPL_FIRST_LOCKED(&an->an_rtlist);
    -		KASSERT(mrt != NULL);
    -		KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio);
    +		/* The new route can be added to the list. */
    +		if (mpath) {
    +			SET(rt->rt_flags, RTF_MPATH);
    +
    +			for (mrt = SMR_PTR_GET_LOCKED(&an->an_value);
    +			     mrt != NULL;
    +			     mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) {
    +				if ((mrt->rt_priority & RTP_MASK) !=
    +				    (prio & RTP_MASK))
    +					continue;
     
    -		/*
    -		 * An ART node with the same destination/netmask already
    -		 * exists, MPATH conflict must have been already checked.
    -		 */
    -		if (rt->rt_flags & RTF_MPATH) {
    -			/*
    -			 * Only keep the RTF_MPATH flag if two routes have
    -			 * the same gateway.
    -			 */
    -			rt->rt_flags &= ~RTF_MPATH;
    -			SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) {
    -				if (mrt->rt_priority == prio) {
    -					mrt->rt_flags |= RTF_MPATH;
    -					rt->rt_flags |= RTF_MPATH;
    -				}
    +				SET(mrt->rt_flags, RTF_MPATH);
     			}
     		}
     
     		/* Put newly inserted entry at the right place. */
     		rtable_mpath_insert(an, rt);
     	}
    +	rw_exit_write(&r->r_lock);
    +	return (error);
    +
    +put:
    +	art_put(an);
     leave:
    -	rw_exit_write(&ar->ar_lock);
    +	rw_exit_write(&r->r_lock);
     	rtfree(rt);
     	return (error);
     }
    @@ -654,118 +657,136 @@ int
     rtable_delete(unsigned int rtableid, const struct sockaddr *dst,
         const struct sockaddr *mask, struct rtentry *rt)
     {
    -	struct art_root			*ar;
    +	struct rtable			*r;
     	struct art_node			*an;
    -	struct srp_ref			 sr;
     	const uint8_t			*addr;
     	int				 plen;
     	struct rtentry			*mrt;
    -	int				 npaths = 0;
    -	int				 error = 0;
     
    -	ar = rtable_get(rtableid, dst->sa_family);
    -	if (ar == NULL)
    +	r = rtable_get(rtableid, dst->sa_family);
    +	if (r == NULL)
     		return (EAFNOSUPPORT);
     
    -	addr = satoaddr(ar, dst);
    +	addr = satoaddr(r, dst);
     	plen = rtable_satoplen(dst->sa_family, mask);
     	if (plen == -1)
     		return (EINVAL);
     
    -	rtref(rt); /* guarantee rtfree won't do anything under ar_lock */
    -	rw_enter_write(&ar->ar_lock);
    -	an = art_lookup(ar, addr, plen, &sr);
    -	srp_leave(&sr); /* an can't go away while we have the lock */
    -
    -	/* Make sure we've got a perfect match. */
    -	if (!an_match(an, dst, plen)) {
    -		error = ESRCH;
    -		goto leave;
    +	rw_enter_write(&r->r_lock);
    +	smr_read_enter();
    +	an = art_lookup(r->r_art, addr, plen);
    +	smr_read_leave();
    +	if (an == NULL) {
    +		rw_exit_write(&r->r_lock);
    +		return (ESRCH);
     	}
     
    -	/*
    -	 * If other multipath route entries are still attached to
    -	 * this ART node we only have to unlink it.
    -	 */
    -	SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next)
    -		npaths++;
    -
    -	if (npaths > 1) {
    -		KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
    -		SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
    -		    rt_next);
    -
    -		mrt = SRPL_FIRST_LOCKED(&an->an_rtlist);
    -		if (npaths == 2)
    -			mrt->rt_flags &= ~RTF_MPATH;
    +	/* If this is the only route in the list then we can delete the node */
    +	if (SMR_PTR_GET_LOCKED(&an->an_value) == rt &&
    +	    SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) {
    +		struct art_node *oan;
    +		oan = art_delete(r->r_art, addr, plen);
    +		if (oan != an)
    +			panic("art %p changed shape during delete", r->r_art);
    +		art_put(an);
    +		/*
    +		 * XXX an and the rt ref could still be alive on other cpus.
    +		 * this currently works because of the NET_LOCK/KERNEL_LOCK
    +		 * but should be fixed if we want to do route lookups outside
    +		 * these locks. - dlg@
    +		 */
    +	} else {
    +		struct rtentry **prt;
    +		struct rtentry *nrt;
    +		unsigned int found = 0;
    +		unsigned int npaths = 0;
     
    -		goto leave;
    +		/*
    +		 * If other multipath route entries are still attached to
    +		 * this ART node we only have to unlink it.
    +		 */
    + 		prt = (struct rtentry **)&an->an_value;
    +		while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
    +			if (mrt == rt) {
    +				found = 1;
    +				SMR_PTR_SET_LOCKED(prt,
    +				    SMR_PTR_GET_LOCKED(&mrt->rt_next));
    +			} else if ((mrt->rt_priority & RTP_MASK) ==
    +			    (rt->rt_priority & RTP_MASK)) {
    +				npaths++;
    +				nrt = mrt;
    +			}
    +			prt = &mrt->rt_next;
    +		}
    +		if (!found)
    +			panic("removing non-existent route");
    +		if (npaths == 1)
    +			CLR(nrt->rt_flags, RTF_MPATH);
     	}
    -
    -	if (art_delete(ar, an, addr, plen) == NULL)
    -		panic("art_delete failed to find node %p", an);
    -
     	KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
    -	SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
    -	art_put(an);
    -
    -leave:
    -	rw_exit_write(&ar->ar_lock);
    +	rw_exit_write(&r->r_lock);
     	rtfree(rt);
     
    -	return (error);
    -}
    -
    -struct rtable_walk_cookie {
    -	int		(*rwc_func)(struct rtentry *, void *, unsigned int);
    -	void		 *rwc_arg;
    -	struct rtentry	**rwc_prt;
    -	unsigned int	  rwc_rid;
    -};
    -
    -/*
    - * Helper for rtable_walk to keep the ART code free from any "struct rtentry".
    - */
    -int
    -rtable_walk_helper(struct art_node *an, void *xrwc)
    -{
    -	struct srp_ref			 sr;
    -	struct rtable_walk_cookie	*rwc = xrwc;
    -	struct rtentry			*rt;
    -	int				 error = 0;
    -
    -	SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) {
    -		error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid);
    -		if (error != 0)
    -			break;
    -	}
    -	if (rwc->rwc_prt != NULL && rt != NULL) {
    -		rtref(rt);
    -		*rwc->rwc_prt = rt;
    -	}
    -	SRPL_LEAVE(&sr);
    -
    -	return (error);
    +	return (0);
     }
     
     int
     rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt,
         int (*func)(struct rtentry *, void *, unsigned int), void *arg)
     {
    -	struct art_root			*ar;
    -	struct rtable_walk_cookie	 rwc;
    +	struct rtable			*r;
    +	struct art_iter			 ai;
    +	struct art_node			*an;
     	int				 error;
     
    -	ar = rtable_get(rtableid, af);
    -	if (ar == NULL)
    +	r = rtable_get(rtableid, af);
    +	if (r == NULL)
     		return (EAFNOSUPPORT);
     
    -	rwc.rwc_func = func;
    -	rwc.rwc_arg = arg;
    -	rwc.rwc_prt = prt;
    -	rwc.rwc_rid = rtableid;
    +	rw_enter_write(&r->r_lock);
    +	ART_FOREACH(an, r->r_art, &ai) {
    +		/*
    +		 * ART nodes have a list of rtentries.
    +		 *
    +		 * art_iter holds references to the topology
    +		 * so it won't change, but not the an_node or rtentries.
    +		 */
    +		struct rtentry *rt = SMR_PTR_GET_LOCKED(&an->an_value);
    +		rtref(rt);
     
    -	error = art_walk(ar, rtable_walk_helper, &rwc);
    +		rw_exit_write(&r->r_lock);
    +		do {
    +			struct rtentry *nrt;
    +
    +			smr_read_enter();
    +			/* Get ready for the next entry. */
    +			nrt = SMR_PTR_GET(&rt->rt_next);
    +			if (nrt != NULL)
    +				rtref(nrt);
    +			smr_read_leave();
    +
    +			error = func(rt, arg, rtableid);
    +			if (error != 0) {
    +				if (prt != NULL)
    +					*prt = rt;
    +				else
    +					rtfree(rt);
    +
    +				if (nrt != NULL)
    +					rtfree(nrt);
    +
    +				rw_enter_write(&r->r_lock);
    +				art_iter_close(&ai);
    +				rw_exit_write(&r->r_lock);
    +				return (error);
    +			}
    +
    +			rtfree(rt);
    +			rt = nrt;
    +		} while (rt != NULL);
    +		rw_enter_write(&r->r_lock);
    +	}
    +	rw_exit_write(&r->r_lock);
     
     	return (error);
     }
    @@ -774,12 +795,12 @@ struct rtentry *
     rtable_iterate(struct rtentry *rt0)
     {
     	struct rtentry *rt = NULL;
    -	struct srp_ref sr;
     
    -	rt = SRPL_NEXT(&sr, rt0, rt_next);
    +	smr_read_enter();
    +	rt = SMR_PTR_GET(&rt0->rt_next);
     	if (rt != NULL)
     		rtref(rt);
    -	SRPL_LEAVE(&sr);
    +	smr_read_leave();
     	rtfree(rt0);
     	return (rt);
     }
    @@ -794,27 +815,25 @@ int
     rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst,
         int plen, uint8_t prio, struct rtentry *rt)
     {
    -	struct art_root			*ar;
    +	struct rtable			*r;
     	struct art_node			*an;
    -	struct srp_ref			 sr;
     	const uint8_t			*addr;
     	int				 error = 0;
     
    -	ar = rtable_get(rtableid, dst->sa_family);
    -	if (ar == NULL)
    +	r = rtable_get(rtableid, dst->sa_family);
    +	if (r == NULL)
     		return (EAFNOSUPPORT);
     
    -	addr = satoaddr(ar, dst);
    -
    -	rw_enter_write(&ar->ar_lock);
    -	an = art_lookup(ar, addr, plen, &sr);
    -	srp_leave(&sr); /* an can't go away while we have the lock */
    +	addr = satoaddr(r, dst);
     
    -	/* Make sure we've got a perfect match. */
    -	if (!an_match(an, dst, plen)) {
    +	rw_enter_write(&r->r_lock);
    +	smr_read_enter();
    +	an = art_lookup(r->r_art, addr, plen);
    +	smr_read_leave();
    +	if (an == NULL) {
     		error = ESRCH;
    -	} else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt &&
    -		SRPL_NEXT_LOCKED(rt, rt_next) == NULL) {
    +	} else if (SMR_PTR_GET_LOCKED(&an->an_value) == rt &&
    +	    SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) {
     		/*
     		 * If there's only one entry on the list do not go
     		 * through an insert/remove cycle.  This is done to
    @@ -823,15 +842,23 @@ rtable_mpath_reprio(unsigned int rtablei
     		 */
     		rt->rt_priority = prio;
     	} else {
    -		rtref(rt); /* keep rt alive in between remove and insert */
    -		SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist,
    -		    rt, rtentry, rt_next);
    +		struct rtentry **prt;
    +		struct rtentry *mrt;
    +
    + 		prt = (struct rtentry **)&an->an_value;
    +		while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
    +			if (mrt == rt)
    +				break;
    +			prt = &mrt->rt_next;
    +		}
    +		KASSERT(mrt != NULL);
    +
    +		SMR_PTR_SET_LOCKED(prt, SMR_PTR_GET_LOCKED(&rt->rt_next));
     		rt->rt_priority = prio;
     		rtable_mpath_insert(an, rt);
    -		rtfree(rt);
     		error = EAGAIN;
     	}
    -	rw_exit_write(&ar->ar_lock);
    +	rw_exit_write(&r->r_lock);
     
     	return (error);
     }
    @@ -839,63 +866,21 @@ rtable_mpath_reprio(unsigned int rtablei
     void
     rtable_mpath_insert(struct art_node *an, struct rtentry *rt)
     {
    -	struct rtentry			*mrt, *prt = NULL;
    +	struct rtentry			*mrt, **prt;
     	uint8_t				 prio = rt->rt_priority;
     
    -	if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) {
    -		SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
    -		return;
    -	}
    -
     	/* Iterate until we find the route to be placed after ``rt''. */
    -	while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) {
    -		prt = mrt;
    -		mrt = SRPL_NEXT_LOCKED(mrt, rt_next);
    -	}
     
    -	if (mrt->rt_priority <= prio) {
    -		SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next);
    -	} else if (prt != NULL) {
    -		SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next);
    -	} else {
    -		SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
    -	}
    -}
    -
    -/*
    - * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise.
    - */
    -int
    -an_match(struct art_node *an, const struct sockaddr *dst, int plen)
    -{
    -	struct rtentry			*rt;
    -	struct srp_ref			 sr;
    -	int				 match;
    -
    -	if (an == NULL || an->an_plen != plen)
    -		return (0);
    -
    -	rt = SRPL_FIRST(&sr, &an->an_rtlist);
    -	match = (rt != NULL && memcmp(rt->rt_dest, dst, dst->sa_len) == 0);
    -	SRPL_LEAVE(&sr);
    -
    -	return (match);
    -}
    -
    -void
    -rtentry_ref(void *null, void *xrt)
    -{
    -	struct rtentry *rt = xrt;
    -
    -	rtref(rt);
    -}
    +	prt = (struct rtentry **)&an->an_value;
    +	while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
    +		if (mrt->rt_priority >= prio)
    +			break;
     
    -void
    -rtentry_unref(void *null, void *xrt)
    -{
    -	struct rtentry *rt = xrt;
    +		prt = &mrt->rt_next;
    +	}
     
    -	rtfree(rt);
    +	SMR_PTR_SET_LOCKED(&rt->rt_next, mrt);
    +	SMR_PTR_SET_LOCKED(prt, rt);
     }
     
     /*
    @@ -904,9 +889,9 @@ rtentry_unref(void *null, void *xrt)
      * of "struct sockaddr" used by this routing table.
      */
     static inline const uint8_t *
    -satoaddr(struct art_root *at, const struct sockaddr *sa)
    +satoaddr(struct rtable *r, const struct sockaddr *sa)
     {
    -	return (((const uint8_t *)sa) + at->ar_off);
    +	return (((const uint8_t *)sa) + r->r_off);
     }
     
     /*
    Index: usr.bin/netstat/route.c
    ===================================================================
    RCS file: /cvs/src/usr.bin/netstat/route.c,v
    diff -u -p -r1.110 route.c
    --- usr.bin/netstat/route.c	14 Nov 2023 10:31:22 -0000	1.110
    +++ usr.bin/netstat/route.c	29 Jun 2025 11:35:03 -0000
    @@ -33,13 +33,11 @@
     #include <sys/types.h>
     #include <sys/protosw.h>
     #include <sys/socket.h>
    +#include <sys/refcnt.h>
    +#include <sys/srp.h>
     
     #include <net/if.h>
     #include <net/if_types.h>
    -#define _KERNEL
    -#include <net/art.h>
    -#include <net/route.h>
    -#undef _KERNEL
     #include <netinet/ip_ipsp.h>
     #include <netinet/in.h>
     #include <arpa/inet.h>
    @@ -51,10 +49,16 @@
     #include <netdb.h>
     #include <stdio.h>
     #include <stdlib.h>
    +#include <stdint.h>
     #include <string.h>
     #include <unistd.h>
     #include <ifaddrs.h>
     
    +#define _KERNEL
    +#include <net/art.h>
    +#include <net/route.h>
    +#undef _KERNEL
    +
     #include "netstat.h"
     
     static union {
    @@ -67,8 +71,8 @@ struct	rtentry rtentry;
     
     static struct sockaddr *kgetsa(struct sockaddr *);
     static struct sockaddr *plentosa(sa_family_t, int, struct sockaddr *);
    -static struct art_node *getdefault(struct art_table *);
    -static void p_table(struct art_table *);
    +static struct art_node *getdefault(art_heap_entry *);
    +static void p_table(art_heap_entry *);
     static void p_artnode(struct art_node *);
     static void p_krtentry(struct rtentry *);
     
    @@ -78,7 +82,7 @@ static void p_krtentry(struct rtentry *)
     void
     routepr(u_long afmap, u_long af2idx, u_long af2idx_max, u_int tableid)
     {
    -	struct art_root ar;
    +	struct art art;
     	struct art_node *node;
     	struct srp *afm_head, *afm;
     	struct {
    @@ -123,21 +127,21 @@ routepr(u_long afmap, u_long af2idx, u_l
     		if (tbl[tableid] == NULL)
     			continue;
     
    -		kread((u_long)tbl[tableid], &ar, sizeof(ar));
    +		kread((u_long)tbl[tableid], &art, sizeof(art));
     
     		free(tbl);
     
    -		if (ar.ar_root.ref == NULL)
    +		if (art.art_root == NULL)
     			continue;
     
     		pr_family(i);
     		pr_rthdr(i, Aflag);
     
    -		node = getdefault(ar.ar_root.ref);
    +		node = getdefault(art.art_root);
     		if (node != NULL)
     			p_artnode(node);
     
    -		p_table(ar.ar_root.ref);
    +		p_table(art.art_root);
     	}
     
     	free(afm);
    @@ -196,64 +200,52 @@ plentosa(sa_family_t af, int plen, struc
     }
     
     static struct art_node *
    -getdefault(struct art_table *at)
    +getdefault(art_heap_entry *heap)
     {
    -	struct art_node *node;
    -	struct art_table table;
    -	union {
    -		struct srp		 node;
    -		unsigned long		 count;
    -	} *heap;
    -
    -	kread((u_long)at, &table, sizeof(table));
    -	heap = calloc(1, AT_HEAPSIZE(table.at_bits));
    -	kread((u_long)table.at_heap, heap, AT_HEAPSIZE(table.at_bits));
    -
    -	node = heap[1].node.ref;
    -
    -	free(heap);
    -
    -	return (node);
    +	art_heap_entry entry;
    +	kread((u_long)(heap + ART_HEAP_IDX_DEFAULT), &entry, sizeof(&entry));
    +	return (art_heap_entry_to_node(entry));
     }
     
     static void
    -p_table(struct art_table *at)
    +p_table(art_heap_entry *kheap)
     {
    +	art_heap_entry entry, *heap, *nheap;
     	struct art_node *next, *node;
    -	struct art_table *nat, table;
    -	union {
    -		struct srp		 node;
    -		unsigned long		 count;
    -	} *heap;
    +	struct art_table table;
     	int i, j;
     
    -	kread((u_long)at, &table, sizeof(table));
    +	kread((u_long)(kheap + ART_HEAP_IDX_TABLE), &entry, sizeof(entry));
    +	kread((u_long)entry, &table, sizeof(table));
     	heap = calloc(1, AT_HEAPSIZE(table.at_bits));
    -	kread((u_long)table.at_heap, heap, AT_HEAPSIZE(table.at_bits));
    +	kread((u_long)kheap, heap, AT_HEAPSIZE(table.at_bits));
     
     	for (j = 1; j < table.at_minfringe; j += 2) {
     		for (i = (j > 2) ? j : 2; i < table.at_minfringe; i <<= 1) {
    -			next = heap[i >> 1].node.ref;
    -			node = heap[i].node.ref;
    +			next = art_heap_entry_to_node(heap[i >> 1]);
    +			node = art_heap_entry_to_node(heap[i]);
     			if (node != NULL && node != next)
     				p_artnode(node);
     		}
     	}
     
     	for (i = table.at_minfringe; i < table.at_minfringe << 1; i++) {
    -		next = heap[i >> 1].node.ref;
    -		node = heap[i].node.ref;
    -		if (!ISLEAF(node)) {
    -			nat = SUBTABLE(node);
    -			node = getdefault(nat);
    -		} else
    -			nat = NULL;
    +		next = art_heap_entry_to_node(heap[i >> 1]);
    +
    +		entry = heap[i];
    +		if (art_heap_entry_is_node(entry)) {
    +			nheap = NULL;
    +			node = art_heap_entry_to_node(entry);
    +		} else {
    +			nheap = art_heap_entry_to_heap(entry);
    +			node = getdefault(nheap);
    +		}
     
     		if (node != NULL && node != next)
     			p_artnode(node);
     
    -		if (nat != NULL)
    -			p_table(nat);
    +		if (nheap != NULL)
    +			p_table(nheap);
     	}
     
     	free(heap);
    @@ -266,14 +258,14 @@ p_artnode(struct art_node *an)
     	struct rtentry *rt;
     
     	kread((u_long)an, &node, sizeof(node));
    -	rt = node.an_rtlist.sl_head.ref;
    +	rt = node.an_value;
     
     	while (rt != NULL) {
     		kread((u_long)rt, &rtentry, sizeof(rtentry));
     		if (Aflag)
     			printf("%-16p ", rt);
     		p_krtentry(&rtentry);
    -		rt = rtentry.rt_next.se_next.ref;
    +		rt = rtentry.rt_next;
     	}
     }
     
    Index: regress/sys/net/rtable/kern_compat.h
    ===================================================================
    RCS file: /cvs/src/regress/sys/net/rtable/kern_compat.h,v
    diff -u -p -r1.16 kern_compat.h
    --- regress/sys/net/rtable/kern_compat.h	11 Nov 2023 07:34:54 -0000	1.16
    +++ regress/sys/net/rtable/kern_compat.h	29 Jun 2025 11:35:11 -0000
    @@ -23,10 +23,14 @@
     #include <stdbool.h>
     #include <stdio.h>
     #include <stdlib.h>
    +#include <stdint.h>
     #include <string.h>
     
    +#include "smr_compat.h"
     #include "srp_compat.h"
     
    +#define membar_producer() __sync_synchronize()
    +
     #define _KERNEL
     #define DIAGNOSTIC
     #define INET
    @@ -65,6 +69,8 @@ struct pool {
     #define nitems(_a) (sizeof((_a)) / sizeof((_a)[0]))
     #endif
     
    +#define roundup(x, y)	((((x)+((y)-1))/(y))*(y))
    +
     #ifndef IPL_NONE
     #define IPL_NONE 0
     #endif
    @@ -89,5 +95,16 @@ extern struct domain *domains[];
     struct rtentry;
     
     int	 rt_hash(struct rtentry *, const struct sockaddr *, uint32_t *);
    +
    +#define READ_ONCE(x) ({							\
    +	typeof(x) __tmp = *(volatile typeof(x) *)&(x);			\
    +	__tmp;								\
    +})
    +
    +#define WRITE_ONCE(x, val) ({						\
    +	typeof(x) __tmp = (val);					\
    +	*(volatile typeof(x) *)&(x) = __tmp;				\
    +	__tmp;								\
    +})
     
     #endif /* _KERN_COMPAT_H_ */
    Index: regress/sys/net/rtable/smr_compat.h
    ===================================================================
    RCS file: regress/sys/net/rtable/smr_compat.h
    diff -N regress/sys/net/rtable/smr_compat.h
    --- /dev/null	1 Jan 1970 00:00:00 -0000
    +++ regress/sys/net/rtable/smr_compat.h	29 Jun 2025 11:35:11 -0000
    @@ -0,0 +1,14 @@
    +
    +#ifndef _SMR_COMPAT_H_
    +#define _SMR_COMPAT_H_
    +
    +#include <sys/smr.h>
    +
    +void smr_read_enter(void);
    +void smr_read_leave(void);
    +
    +void SMR_ASSERT_CRITICAL(void);
    +
    +#define smr_barrier() do { } while (0)
    +
    +#endif /* _SMR_COMPAT_H_ */
    Index: regress/sys/net/rtable/util.c
    ===================================================================
    RCS file: /cvs/src/regress/sys/net/rtable/util.c,v
    diff -u -p -r1.16 util.c
    --- regress/sys/net/rtable/util.c	14 Feb 2025 06:25:00 -0000	1.16
    +++ regress/sys/net/rtable/util.c	29 Jun 2025 11:35:11 -0000
    @@ -1,4 +1,3 @@
    -/*	$OpenBSD: util.c,v 1.16 2025/02/14 06:25:00 anton Exp $ */
     
     /*
      * Copyright (c) 2015 Martin Pieuchot
    @@ -52,6 +51,7 @@
     #undef _KERNEL
     
     #include "srp_compat.h"
    +#include "smr_compat.h"
     
     #include <sys/socket.h>
     #include <sys/domain.h>
    @@ -262,14 +262,16 @@ rtentry_delete(struct rtentry *rt, void 
     	struct sockaddr_in6	 sa_mask;
     	struct sockaddr		*mask = rt_plen2mask(rt, &sa_mask);
     	int			 error;
    +	unsigned int		 refs;
     
     	assert(rt_plen(rt) == rtable_satoplen(af, mask));
    -
    +	refs = refcnt_read(&rt->rt_refcnt);
    +	assert(refs > 0);
     	if ((error = rtable_delete(0, rt_key(rt), mask, rt)) != 0) {
     		inet_net_satop(af, rt_key(rt), rt_plen(rt), dest, sizeof(dest));
     		errx(1, "can't rm route: %s, %s", dest, strerror(error));
     	}
    -	assert(refcnt_read(&rt->rt_refcnt) == 0);
    +	assert(refcnt_read(&rt->rt_refcnt) == refs - 1);
     
     	return (0);
     }
    @@ -576,4 +578,28 @@ rt_hash(struct rtentry *rt, const struct
     void
     rt_timer_init(void)
     {
    +}
    +
    +/*
    + * SMR stuff
    + */
    +
    +unsigned int smr_crit;
    +
    +void
    +smr_read_enter(void)
    +{
    +	smr_crit++;
    +}
    +
    +void
    +smr_read_leave(void)
    +{
    +	smr_crit++;
    +}
    +
    +void
    +SMR_ASSERT_CRITICAL(void)
    +{
    +	assert(smr_crit > 0);
     }
    Index: regress/sys/net/rtable/delete/main.c
    ===================================================================
    RCS file: /cvs/src/regress/sys/net/rtable/delete/main.c,v
    diff -u -p -r1.7 main.c
    --- regress/sys/net/rtable/delete/main.c	13 Apr 2021 08:21:12 -0000	1.7
    +++ regress/sys/net/rtable/delete/main.c	29 Jun 2025 11:35:11 -0000
    @@ -21,6 +21,8 @@
     #include <sys/socket.h>
     #include <net/route.h>
     #include <net/rtable.h>
    +
    +#include <stdint.h>
     #include <net/art.h>
     
     #include <assert.h>
    @@ -58,10 +60,10 @@ main(int argc, char *argv[])
     
     	rtable_walk(0, AF_INET6, NULL, rtentry_dump, NULL);
     
    -	struct art_root *ar;
    +	struct art *ar;
     	ar = rtable_get(0, AF_INET6);
     	assert(ar != NULL);
    -	assert(ar->ar_root.ref == NULL);
    +	assert(art_is_empty(ar));
     
     	return (0);
     }
    
    
  • Alexander Bluhm:

    use the SMR api instead of SRPs in art/rtable code