Download raw body.
use the SMR api instead of SRPs in art/rtable code
On Fri, Jun 13, 2025 at 12:03:37PM +1000, David Gwynne wrote:
> there's other changes too, but moving to SMR is the important one.
Is there any chance that you can split the diff in smaller reviewable
pieces?
With this I got a lockup during regress sbin/ifconfig
==== run-ether-host ====
/sbin/ifconfig vether99 destroy 2>/dev/null || true
/sbin/ifconfig vether99 create
/sbin/ifconfig vether99 10.188.254.1/24
/sbin/ifconfig vether99 10.188.254.1/32
Timeout, server ot29 not responding.
Breaking into ddb shows:
Stopped at db_enter+0x14: popq %rbp
ddb{0}> trace
db_enter() at db_enter+0x14
comintr(ffff8000002ff000) at comintr+0x2ee
intr_handler(ffff800049a63520,ffff8000002ea680) at intr_handler+0xa4
Xintr_ioapic_edge16_untramp() at Xintr_ioapic_edge16_untramp+0x18f
rtable_insert(0,ffff8000017623c0,ffff8000018bdd78,ffff8000018bdd58,4,fffffd8777f57428) at rtable_insert+0x300
rtrequest(1,ffff800049a63798,4,ffff800049a63840,0) at rtrequest+0x5e7
rt_ifa_add(ffff8000018bdd00,840100,ffff8000018bdd58,0) at rt_ifa_add+0x136
in_ifinit(ffff800001b31800,ffff8000018bdd00,ffff800049a63a80,1) at in_ifinit+0x228
in_ioctl_change_ifaddr(8040691a,ffff800049a63a70,ffff800001b31800) at in_ioctl_change_ifaddr+0x3db
ifioctl(ffff80000177a040,8040691a,ffff800049a63a70,ffff8000fffded00) at ifioctl+0xa31
sys_ioctl(ffff8000fffded00,ffff800049a63c20,ffff800049a63ba0) at sys_ioctl+0x30e
syscall(ffff800049a63c20) at syscall+0x5f9
Xsyscall() at Xsyscall+0x128
end of kernel
end trace frame: 0x72d424256700, count: -13
ddb{0}> ps
PID TID PPID UID S FLAGS WAIT COMMAND
*25917 367712 1715 0 7 0x2 ifconfig
1715 79543 76763 0 3 0x10008a sigsusp make
76763 298789 14805 0 3 0x10008a sigsusp sh
14805 371778 57594 0 3 0x10008a sigsusp make
909 466619 98445 0 3 0x100082 piperd gzip
98445 173328 57594 0 3 0x100082 piperd pax
57594 360710 62479 0 3 0x82 piperd perl
62479 114469 71381 0 3 0x10008a sigsusp ksh
71381 48894 90122 0 3 0x98 kqread sshd-session
90122 87012 34319 0 3 0x92 kqread sshd-session
24618 157677 1 0 2 0x100083 getty
11927 376173 1 0 3 0x100083 ttyin getty
85922 325779 1 0 3 0x100083 ttyin getty
94742 203975 1 0 3 0x100083 ttyin getty
5837 287986 1 0 3 0x100083 ttyin getty
77221 281109 1 0 3 0x100083 ttyin getty
25334 489515 1 0 7 0x100010 cron
59930 513924 1 99 3 0x1100090 kqread sndiod
34344 50376 1 110 3 0x100090 kqread sndiod
48287 477509 72847 95 3 0x1100092 kqread smtpd
21932 230335 72847 103 3 0x1100092 kqread smtpd
34183 420668 72847 95 3 0x1100092 kqread smtpd
2789 45631 72847 95 3 0x100092 kqread smtpd
1676 413417 72847 95 3 0x1100092 kqread smtpd
1921 220787 72847 95 3 0x1100092 kqread smtpd
72847 458415 1 0 3 0x100080 kqread smtpd
91399 307822 456 91 2 0xc92 snmpd_metrics
90563 458994 456 91 3 0x1100092 kqread snmpd
456 444914 1 0 3 0x100080 kqread snmpd
34319 11943 1 0 3 0x88 kqread sshd
78684 346476 0 0 7 0x14e00 acct
88192 362227 0 0 3 0x14280 nfsidl nfsio
31840 5671 0 0 3 0x14280 nfsidl nfsio
92421 334349 0 0 3 0x14280 nfsidl nfsio
81928 29509 0 0 3 0x14280 nfsidl nfsio
15758 16037 1 0 3 0x100080 kqread ntpd
20615 133478 23635 83 3 0x100092 kqread ntpd
23635 162680 1 83 2 0x1100c92 ntpd
18100 62556 23940 74 7 0x1100092 pflogd
23940 480085 1 0 3 0x80 sbwait pflogd
9043 26327 56395 73 2 0x1100c90 syslogd
56395 333846 1 0 3 0x100082 sbwait syslogd
49664 472447 1 0 3 0x100080 kqread resolvd
74070 413619 14836 77 3 0x100092 kqread dhcpleased
26143 286279 14836 77 3 0x100092 kqread dhcpleased
14836 386075 1 0 3 0x80 kqread dhcpleased
79448 440746 20151 115 3 0x100092 kqread slaacd
5533 70160 20151 115 3 0x100092 kqread slaacd
20151 416484 1 0 3 0x100080 kqread slaacd
24389 258921 0 0 2 0x40014200 smr
90810 507449 0 0 3 0x14200 pgzero zerothread
56574 424616 0 0 3 0x14200 aiodoned aiodoned
41539 424908 0 0 7 0x14e00 update
98575 373693 0 0 3 0x14200 cleaner cleaner
59215 151639 0 0 3 0x14200 reaper reaper
25589 292838 0 0 3 0x14200 pgdaemon pagedaemon
49205 116542 0 0 3 0x14200 bored wsdisplay0
17674 53309 0 0 3 0x14200 mmctsk sdmmc0
32751 68855 0 0 3 0x14200 usbtsk usbtask
50027 345810 0 0 3 0x14200 usbatsk usbatsk
24684 497676 0 0 2 0x40014200 acpi0
91005 294720 0 0 7 0x40014200 idle11
96640 257040 0 0 7 0x40014200 idle10
52284 114081 0 0 7 0x40014200 idle9
78934 296461 0 0 7 0x40014200 idle8
67165 204090 0 0 7 0x40014200 idle7
76363 192716 0 0 7 0x40014200 idle6
52739 412688 0 0 3 0x40014200 idle5
52867 386711 0 0 3 0x40014200 idle4
90405 202726 0 0 3 0x40014200 idle3
73833 101181 0 0 3 0x40014200 idle2
64244 384244 0 0 3 0x40014200 idle1
9626 136308 0 0 7 0x14200 sensors
55917 308696 0 0 3 0x14200 bored softnet3
58689 23794 0 0 3 0x14200 bored softnet2
29876 160418 0 0 3 0x14200 bored softnet1
66671 251911 0 0 2 0x14200 softnet0
91179 95269 0 0 3 0x14200 smrbar systqmp
18779 503295 0 0 2 0x14200 systq
89333 137237 0 0 3 0x14200 netlock softclockmp
92326 105568 0 0 3 0x40014200 tmoslp softclock
61939 69733 0 0 3 0x40014200 idle0
1 445651 0 0 3 0x82 wait init
0 0 -1 0 3 0x10010200 scheduler swapper
ddb{0}> show all routes
Route tree for af 2, rtableid 0
panic: rtable rwlock 0xffff800000326780: enter write deadlock
Stopped at db_enter+0x14: popq %rbp
TID PID UID PRFLAGS PFLAGS CPU COMMAND
*367712 25917 0 0x2 0 0K ifconfig
489515 25334 0 0x100010 0 4 cron
346476 78684 0 0x14000 0xe00 3 acct
62556 18100 74 0x1100012 0x80 2 pflogd
424908 41539 0 0x14000 0xe00 1 update
136308 9626 0 0x14000 0x200 5 sensors
db_enter() at db_enter+0x14
panic(ffffffff826722d3) at panic+0xd5
rw_do_enter_write(ffff800000326780,0) at rw_do_enter_write+0x1cc
rtable_walk(0,2,0,ffffffff81ed61c0,0) at rtable_walk+0xae
db_show_rtable(2,0) at db_show_rtable+0x57
db_show_all_routes(ffffffff81df0b74,0,ffffffffffffffff,ffff800049a63160) at db_show_all_routes+0x6b
db_command(ffffffff82a9d0b8,ffffffff8278fd30) at db_command+0x34a
db_command_loop() at db_command_loop+0xfa
db_trap(1,0) at db_trap+0x15a
db_ktrap(1,0,ffff800049a633a0) at db_ktrap+0x155
kerntrap(ffff800049a633a0) at kerntrap+0xcb
alltraps_kern_meltdown() at alltraps_kern_meltdown+0x7b
db_enter() at db_enter+0x14
comintr(ffff8000002ff000) at comintr+0x2ee
end trace frame: 0xffff800049a63510, count: 0
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports. Insufficient info makes it difficult to find and fix bugs.
ddb{0}> show all locks
Process 25917 (ifconfig) thread 0xffff8000fffded00 (367712)
exclusive rwlock rtable r = 0 (0xffff800000326798)
exclusive rwlock netlock r = 0 (0xffffffff82a3f438)
exclusive kernel_lock &kernel_lock r = 2 (0xffffffff82a72770)
Process 91179 (systqmp) thread 0xffff8000ffffe290 (95269)
shared rwlock systqmp r = 0 (0xffffffff82a27ad0)
Process 89333 (softclockmp) thread 0xffff8000ffffea40 (137237)
shared rwlock timeout r = 0 (0xffffffff82a00f48)
I just realized that you sent a newer diff. I will try that.
bluhm
> SRPs complicated this code massively, so moving it to SMR is almost like
> backing the SRP changes out. it becomes a lot more readable and similar
> to other ART implementations, which can't hurt.
>
> another change is strip non-ART code out of art.c, specifically the
> rwlock and route sourceaddr info. these features have been moved
> up a layer into rtable.c and are now stored as part of a new struct
> rtable. again, this makes the art code easier to read and compare.
>
> the other big change is to walking over an ART. there's now a
> reentract and non-recursive iterator api which art_walk and rtable_walk
> are now built on top of. rtable_walk can basically do a foreach
> over the ART directly rather than having to pass a bunch of state
> into callbacks to apply with art_walk. we can still do that if we
> want, i just find this easier to deal with in rtable_walk directly.
>
> i modified how the heaps are traversed inside ART as well to try and
> minimise the pointer accesses as much as possible.
>
> i'm sorry the diff is so big. i don't expect code reviews. it took
> me 4 years and at least 3 attempts to do this diff. however, tests
> would be much appreciated.
>
> it looks like the generated code is smaller too.
>
> Index: art.c
> ===================================================================
> RCS file: /cvs/src/sys/net/art.c,v
> diff -u -p -r1.31 art.c
> --- art.c 11 Nov 2023 12:17:50 -0000 1.31
> +++ art.c 13 Jun 2025 01:59:37 -0000
> @@ -22,6 +22,58 @@
> *
> * Yoichi Hariguchi paper can be found at:
> * http://www.hariguchi.org/art/art.pdf
> + *
> + * This implementation is tweaked to minimise pointer traversal
> + * during lookups by only accessing the heaps it avoids following
> + * pointers to or through the art_table structures.
> + *
> + * The heap is an array of art_heap_entry values which have different
> + * meanings depending on their location. The majority of entries in
> + * the heap are used as pointers to nodes (struct an_node, aka leaf
> + * entries), or the next heap to traverse. These pointers are typed/
> + * tagged by the low bit in their heap value, and must be masked
> + * appropriately before use.
> + *
> + * The first two entries in the heap are not used by the search
> + * algorithm, so they are used to store data associated with this
> + * part of the heap. The first entry (heap[0]) stores a pointer to
> + * an art_table struct associated with this heap. The second entry
> + * (heap[1]) stores the node pointer which would be in the parent
> + * heap if this table wasn't using it. This node pointer is also known
> + * as the default entry/route for the current table in the ART.
> + *
> + * Nodes contain the exact information about the prefix they represent
> + * in the ART, ie, the address and prefix length, and a pointer to
> + * data associated with that prefix (an_value).
> + *
> + * The relationships between the separate data structures looks
> + * like the following:
> + *
> + * +------------------+
> + * | struct art |
> + * +------------------+ NULL
> + * | ^
> + * | art_root | at_parent
> + * v |
> + * +------------------+ heap[0] +------------------+
> + * | heap |------------>| struct art_table |
> + * +------------------+ +------------------+
> + * | ^
> + * | heap entry | at_parent
> + * v |
> + * +------------------+ heap[0] +------------------+
> + * | heap |------------>| struct art_table |
> + * +------------------+ +------------------+
> + * |
> + * | node entry
> + * v
> + * +------------------+
> + * | struct art_node |
> + * +------------------+
> + * |
> + * | an_value
> + * v
> + * "user" data
> */
>
> #ifndef _KERNEL
> @@ -33,43 +85,96 @@
> #include <sys/pool.h>
> #include <sys/task.h>
> #include <sys/socket.h>
> +#include <sys/smr.h>
> #endif
>
> #include <net/art.h>
>
> -int art_bindex(struct art_table *, const uint8_t *, int);
> -void art_allot(struct art_table *at, int, struct art_node *,
> - struct art_node *);
> -struct art_table *art_table_get(struct art_root *, struct art_table *,
> - int);
> -struct art_table *art_table_put(struct art_root *, struct art_table *);
> -struct art_node *art_table_insert(struct art_root *, struct art_table *,
> +/*
> + * Allotment Table.
> + */
> +struct art_table {
> + art_heap_entry *at_heap;
> + struct art_table *at_parent; /* Parent table */
> +
> + unsigned int at_index; /* Index in the parent table */
> + unsigned int at_minfringe; /* Index that fringe begins */
> +
> + unsigned int at_level; /* Level of the table */
> + unsigned int at_bits; /* Stride length of the table */
> + unsigned int at_offset; /* Sum of parents' stride len */
> +
> + unsigned int at_refcnt;
> +};
> +
> +static void art_allot(struct art_table *at, unsigned int,
> + art_heap_entry, art_heap_entry);
> +struct art_table *art_table_get(struct art *, struct art_table *,
> + unsigned int);
> +struct art_table *art_table_put(struct art *, struct art_table *);
> +struct art_node *art_table_insert(struct art *, struct art_table *,
> int, struct art_node *);
> -struct art_node *art_table_delete(struct art_root *, struct art_table *,
> +struct art_node *art_table_delete(struct art *, struct art_table *,
> int, struct art_node *);
> -struct art_table *art_table_ref(struct art_root *, struct art_table *);
> -int art_table_free(struct art_root *, struct art_table *);
> -int art_table_walk(struct art_root *, struct art_table *,
> - int (*f)(struct art_node *, void *), void *);
> -int art_walk_apply(struct art_root *,
> - struct art_node *, struct art_node *,
> - int (*f)(struct art_node *, void *), void *);
> +struct art_table *art_table_ref(struct art *, struct art_table *);
> +int art_table_free(struct art *, struct art_table *);
> void art_table_gc(void *);
> void art_gc(void *);
>
> -struct pool an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
> +static struct pool an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
>
> -struct art_table *art_table_gc_list = NULL;
> -struct mutex art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> -struct task art_table_gc_task =
> +static struct art_table *art_table_gc_list = NULL;
> +static struct mutex art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> +static struct task art_table_gc_task =
> TASK_INITIALIZER(art_table_gc, NULL);
>
> -struct art_node *art_node_gc_list = NULL;
> -struct mutex art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> -struct task art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
> +static struct art_node *art_node_gc_list = NULL;
> +static struct mutex art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> +static struct task art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
> +
> +#define ART_HEAP_IDX_TABLE 0
> +#define ART_HEAP_IDX_DEFAULT 1
> +
> +#define AT_HEAPSIZE(bits) ((1 << ((bits) + 1)) * sizeof(art_heap_entry))
> +
> +static inline struct art_table *
> +art_heap_to_table(art_heap_entry *heap)
> +{
> + return ((struct art_table *)heap[ART_HEAP_IDX_TABLE]);
> +}
> +
> +static inline int
> +art_heap_entry_is_node(art_heap_entry ahe)
> +{
> + return (!ISSET(ahe, 1));
> +}
> +
> +static inline struct art_node *
> +art_heap_entry_to_node(art_heap_entry ahe)
> +{
> + return ((struct art_node *)ahe);
> +}
> +
> +static inline art_heap_entry *
> +art_heap_entry_to_heap(art_heap_entry ahe)
> +{
> + return ((art_heap_entry *)(ahe & ~1ULL));
> +}
> +
> +static inline art_heap_entry
> +art_node_to_heap_entry(struct art_node *an)
> +{
> + return ((art_heap_entry)an);
> +}
> +
> +static inline art_heap_entry
> +art_heap_to_heap_entry(art_heap_entry *heap)
> +{
> + return ((art_heap_entry)heap | 1ULL);
> +}
>
> void
> -art_init(void)
> +art_boot(void)
> {
> pool_init(&an_pool, sizeof(struct art_node), 0, IPL_SOFTNET, 0,
> "art_node", NULL);
> @@ -84,56 +189,74 @@ art_init(void)
> /*
> * Per routing table initialization API function.
> */
> -struct art_root *
> -art_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
> -{
> - struct art_root *ar;
> - int i;
>
> - ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO);
> - if (ar == NULL)
> - return (NULL);
> +static const unsigned int art_plen32_levels[] = {
> + 8, 4, 4, 4, 4, 4, 4
> +};
> +
> +static const unsigned int art_plen128_levels[] = {
> + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
> + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
> +};
> +
> +static const unsigned int art_plen20_levels[] = {
> + 4, 4, 4, 4, 4
> +};
> +
> +void
> +art_init(struct art *art, unsigned int alen)
> +{
> + const unsigned int *levels;
> + unsigned int nlevels;
> +#ifdef DIAGNOSTIC
> + unsigned int i;
> + unsigned int bits = 0;
> +#endif
>
> switch (alen) {
> case 32:
> - ar->ar_alen = 32;
> - ar->ar_nlvl = 7;
> - ar->ar_bits[0] = 8;
> - for (i = 1; i < ar->ar_nlvl; i++)
> - ar->ar_bits[i] = 4;
> + levels = art_plen32_levels;
> + nlevels = nitems(art_plen32_levels);
> break;
> case 128:
> - ar->ar_alen = 128;
> - ar->ar_nlvl = 32;
> - for (i = 0; i < ar->ar_nlvl; i++)
> - ar->ar_bits[i] = 4;
> + levels = art_plen128_levels;
> + nlevels = nitems(art_plen128_levels);
> + break;
> + case 20:
> + levels = art_plen20_levels;
> + nlevels = nitems(art_plen20_levels);
> break;
> default:
> - printf("%s: incorrect address length %u\n", __func__, alen);
> - free(ar, M_RTABLE, sizeof(*ar));
> - return (NULL);
> + panic("no configuration for alen %u", alen);
> + /* NOTREACHED */
> }
>
> - ar->ar_off = off;
> - rw_init(&ar->ar_lock, "art");
> +#ifdef DIAGNOSTIC
> + for (i = 0; i < nlevels; i++)
> + bits += levels[i];
> +
> + if (alen != bits)
> + panic("sum of levels %u != address len %u", bits, alen);
> +#endif /* DIAGNOSTIC */
>
> - return (ar);
> + art->art_root = 0;
> + art->art_levels = levels;
> + art->art_nlevels = nlevels;
> + art->art_alen = alen;
> }
>
> -/*
> - * Return 1 if ``old'' and ``new`` are identical, 0 otherwise.
> - */
> -static inline int
> -art_check_duplicate(struct art_root *ar, struct art_node *old,
> - struct art_node *new)
> +struct art *
> +art_alloc(unsigned int alen)
> {
> - if (old == NULL)
> - return (0);
> + struct art *art;
>
> - if (old->an_plen == new->an_plen)
> - return (1);
> + art = malloc(sizeof(*art), M_RTABLE, M_NOWAIT|M_ZERO);
> + if (art == NULL)
> + return (NULL);
>
> - return (0);
> + art_init(art, alen);
> +
> + return (art);
> }
>
> /*
> @@ -148,30 +271,31 @@ art_check_duplicate(struct art_root *ar,
> * 8bit-long tables, there's a maximum of 4 base indexes if the
> * prefix length is > 24.
> */
> -int
> -art_bindex(struct art_table *at, const uint8_t *addr, int plen)
> +static unsigned int __pure
> +art_bindex(unsigned int offset, unsigned int bits,
> + const uint8_t *addr, unsigned int plen)
> {
> - uint8_t boff, bend;
> + unsigned int boff, bend;
> uint32_t k;
>
> - if (plen < at->at_offset || plen > (at->at_offset + at->at_bits))
> - return (-1);
> + KASSERT(plen >= offset);
> + KASSERT(plen <= (offset + bits));
>
> /*
> * We are only interested in the part of the prefix length
> * corresponding to the range of this table.
> */
> - plen -= at->at_offset;
> + plen -= offset;
>
> /*
> * Jump to the first byte of the address containing bits
> * covered by this table.
> */
> - addr += (at->at_offset / 8);
> + addr += (offset / 8);
>
> /* ``at'' covers the bit range between ``boff'' & ``bend''. */
> - boff = (at->at_offset % 8);
> - bend = (at->at_bits + boff);
> + boff = (offset % 8);
> + bend = (bits + boff);
>
> KASSERT(bend <= 32);
>
> @@ -188,25 +312,13 @@ art_bindex(struct art_table *at, const u
> k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
> k |= addr[1] >> (16 - bend);
> } else {
> - k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1);
> + k = (addr[0] >> (8 - bend)) & ((1 << bits) - 1);
> }
>
> /*
> * Single level base index formula:
> */
> - return ((k >> (at->at_bits - plen)) + (1 << plen));
> -}
> -
> -/*
> - * Single level lookup function.
> - *
> - * Return the fringe index of the part of ``addr''
> - * corresponding to the range covered by the table ``at''.
> - */
> -static inline int
> -art_findex(struct art_table *at, const uint8_t *addr)
> -{
> - return art_bindex(at, addr, (at->at_offset + at->at_bits));
> + return ((k >> (bits - plen)) + (1 << plen));
> }
>
> /*
> @@ -215,60 +327,94 @@ art_findex(struct art_table *at, const u
> * Return the best existing match for a destination.
> */
> struct art_node *
> -art_match(struct art_root *ar, const void *addr, struct srp_ref *nsr)
> +art_match(struct art *art, const void *addr)
> {
> - struct srp_ref dsr, ndsr;
> - void *entry;
> - struct art_table *at;
> - struct art_node *dflt, *ndflt;
> - int j;
> -
> - entry = srp_enter(nsr, &ar->ar_root);
> - at = entry;
> + unsigned int offset = 0;
> + unsigned int level = 0;
> + art_heap_entry *heap;
> + art_heap_entry ahe, dahe = 0;
> + struct art_node *an;
> + int j;
>
> - if (at == NULL)
> - goto done;
> + SMR_ASSERT_CRITICAL();
>
> - /*
> - * Remember the default route of each table we visit in case
> - * we do not find a better matching route.
> - */
> - dflt = srp_enter(&dsr, &at->at_default);
> + heap = SMR_PTR_GET(&art->art_root);
> + if (heap == NULL)
> + return (NULL);
>
> /*
> * Iterate until we find a leaf.
> */
> - while (1) {
> + for (;;) {
> + unsigned int bits = art->art_levels[level];
> + unsigned int p = offset + bits;
> +
> + /*
> + * Remember the default route of each table we visit in case
> + * we do not find a better matching route.
> + */
> + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]);
> + if (ahe != 0)
> + dahe = ahe;
> +
> /* Do a single level route lookup. */
> - j = art_findex(at, addr);
> - entry = srp_follow(nsr, &at->at_heap[j].node);
> + j = art_bindex(offset, bits, addr, p);
> + ahe = SMR_PTR_GET(&heap[j]);
>
> /* If this is a leaf (NULL is a leaf) we're done. */
> - if (ISLEAF(entry))
> + if (art_heap_entry_is_node(ahe))
> break;
>
> - at = SUBTABLE(entry);
> + heap = art_heap_entry_to_heap(ahe);
> + offset = p;
> + level++;
> +
> + KASSERT(level < art->art_nlevels);
> + }
> +
> + an = art_heap_entry_to_node(ahe);
> + if (an != NULL)
> + return (an);
> +
> + return art_heap_entry_to_node(dahe);
> +}
> +
> +static int
> +art_node_check(const struct art_node *an,
> + const uint8_t *addr, unsigned int plen)
> +{
> + const uint32_t *wan = (const uint32_t *)an->an_addr;
> + const uint32_t *waddr = (const uint32_t *)addr;
> + unsigned int i = 0;
> + unsigned int shift;
> +
> + if (an->an_plen != plen)
> + return (0);
> +
> + while (plen >= 32) {
> + if (wan[i] != waddr[i])
> + return (0);
> +
> + i++;
> + plen -= 32;
> + }
> + if (plen == 0)
> + return (1);
> +
> + i *= 4;
> + while (plen >= 8) {
> + if (an->an_addr[i] != addr[i])
> + return (0);
> +
> + i++;
> + plen -= 8;
> + }
> + if (plen == 0)
> + return (1);
> +
> + shift = 8 - plen;
>
> - ndflt = srp_enter(&ndsr, &at->at_default);
> - if (ndflt != NULL) {
> - srp_leave(&dsr);
> - dsr = ndsr;
> - dflt = ndflt;
> - } else
> - srp_leave(&ndsr);
> - }
> -
> - if (entry == NULL) {
> - srp_leave(nsr);
> - *nsr = dsr;
> - KASSERT(ISLEAF(dflt));
> - return (dflt);
> - }
> -
> - srp_leave(&dsr);
> -done:
> - KASSERT(ISLEAF(entry));
> - return (entry);
> + return ((an->an_addr[i] >> shift) == (addr[i] >> shift));
> }
>
> /*
> @@ -278,24 +424,28 @@ done:
> * it does not exist.
> */
> struct art_node *
> -art_lookup(struct art_root *ar, const void *addr, int plen, struct srp_ref *nsr)
> +art_lookup(struct art *art, const void *addr, unsigned int plen)
> {
> - void *entry;
> - struct art_table *at;
> - int i, j;
> + unsigned int offset = 0;
> + unsigned int bits, p;
> + unsigned int level = 0;
> + art_heap_entry *heap;
> + art_heap_entry ahe;
> + struct art_node *an;
> + unsigned int i, j;
>
> - KASSERT(plen >= 0 && plen <= ar->ar_alen);
> + KASSERT(plen <= art->art_alen);
>
> - entry = srp_enter(nsr, &ar->ar_root);
> - at = entry;
> + SMR_ASSERT_CRITICAL();
>
> - if (at == NULL)
> - goto done;
> + heap = SMR_PTR_GET(&art->art_root);
> + if (heap == NULL)
> + return (NULL);
>
> /* Default route */
> if (plen == 0) {
> - entry = srp_follow(nsr, &at->at_default);
> - goto done;
> + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]);
> + return art_heap_entry_to_node(ahe);
> }
>
> /*
> @@ -303,31 +453,47 @@ art_lookup(struct art_root *ar, const vo
> * the stride length at this level the entry must
> * be in the current table.
> */
> - while (plen > (at->at_offset + at->at_bits)) {
> + for (;;) {
> + bits = art->art_levels[level];
> + p = offset + bits;
> + if (plen <= p)
> + break;
> +
> /* Do a single level route lookup. */
> - j = art_findex(at, addr);
> - entry = srp_follow(nsr, &at->at_heap[j].node);
> + j = art_bindex(offset, bits, addr, p);
> + ahe = SMR_PTR_GET(&heap[j]);
>
> /* A leaf is a match, but not a perfect one, or NULL */
> - if (ISLEAF(entry))
> + if (art_heap_entry_is_node(ahe))
> return (NULL);
>
> - at = SUBTABLE(entry);
> + heap = art_heap_entry_to_heap(ahe);
> + offset = p;
> + level++;
> +
> + KASSERT(level < art->art_nlevels);
> }
>
> - i = art_bindex(at, addr, plen);
> - if (i == -1)
> - return (NULL);
> + i = art_bindex(offset, bits, addr, plen);
> + ahe = SMR_PTR_GET(&heap[i]);
> + if (!art_heap_entry_is_node(ahe)) {
> + heap = art_heap_entry_to_heap(ahe);
> + ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]);
> + }
> +
> + /* Make sure we've got a perfect match */
> + an = art_heap_entry_to_node(ahe);
> + if (an != NULL && art_node_check(an, addr, plen))
> + return (an);
>
> - entry = srp_follow(nsr, &at->at_heap[i].node);
> - if (!ISLEAF(entry))
> - entry = srp_follow(nsr, &SUBTABLE(entry)->at_default);
> -
> -done:
> - KASSERT(ISLEAF(entry));
> - return (entry);
> + return (NULL);
> }
>
> +int
> +art_is_empty(struct art *art)
> +{
> + return (SMR_PTR_GET_LOCKED(&art->art_root) == NULL);
> +}
>
> /*
> * Insertion API function.
> @@ -336,32 +502,38 @@ done:
> * same destination/mask pair is already present.
> */
> struct art_node *
> -art_insert(struct art_root *ar, struct art_node *an, const void *addr, int plen)
> +art_insert(struct art *art, struct art_node *an)
> {
> - struct art_table *at, *child;
> - struct art_node *node;
> - int i, j;
> -
> - rw_assert_wrlock(&ar->ar_lock);
> - KASSERT(plen >= 0 && plen <= ar->ar_alen);
> -
> - at = srp_get_locked(&ar->ar_root);
> - if (at == NULL) {
> - at = art_table_get(ar, NULL, -1);
> + unsigned int p;
> + art_heap_entry ahe, oahe, *ahep;
> + art_heap_entry *heap;
> + struct art_node *oan;
> + struct art_table *at;
> + unsigned int i, j;
> +
> + KASSERT(an->an_plen <= art->art_alen);
> +
> + heap = SMR_PTR_GET_LOCKED(&art->art_root);
> + if (heap == NULL) {
> + at = art_table_get(art, NULL, -1);
> if (at == NULL)
> return (NULL);
>
> - srp_swap_locked(&ar->ar_root, at);
> - }
> + heap = at->at_heap;
> + SMR_PTR_SET_LOCKED(&art->art_root, heap);
> + } else
> + at = art_heap_to_table(heap);
>
> /* Default route */
> - if (plen == 0) {
> - node = srp_get_locked(&at->at_default);
> - if (node != NULL)
> - return (node);
> + if (an->an_plen == 0) {
> + ahep = &heap[ART_HEAP_IDX_DEFAULT];
> + oahe = SMR_PTR_GET_LOCKED(ahep);
> + oan = art_heap_entry_to_node(oahe);
> + if (oan != NULL)
> + return (oan);
>
> - art_table_ref(ar, at);
> - srp_swap_locked(&at->at_default, an);
> + art_table_ref(art, at);
> + SMR_PTR_SET_LOCKED(ahep, art_node_to_heap_entry(an));
> return (an);
> }
>
> @@ -370,10 +542,11 @@ art_insert(struct art_root *ar, struct a
> * the stride length at this level the entry must
> * be in the current table.
> */
> - while (plen > (at->at_offset + at->at_bits)) {
> + while ((p = at->at_offset + at->at_bits) < an->an_plen) {
> /* Do a single level route lookup. */
> - j = art_findex(at, addr);
> - node = srp_get_locked(&at->at_heap[j].node);
> + j = art_bindex(at->at_offset, at->at_bits, an->an_addr, p);
> + ahep = &heap[j];
> + ahe = SMR_PTR_GET_LOCKED(ahep);
>
> /*
> * If the node corresponding to the fringe index is
> @@ -381,84 +554,86 @@ art_insert(struct art_root *ar, struct a
> * entry of this node will then become the default
> * route of the subtable.
> */
> - if (ISLEAF(node)) {
> - child = art_table_get(ar, at, j);
> + if (art_heap_entry_is_node(ahe)) {
> + struct art_table *child = art_table_get(art, at, j);
> if (child == NULL)
> return (NULL);
>
> - art_table_ref(ar, at);
> - srp_swap_locked(&at->at_heap[j].node, ASNODE(child));
> + art_table_ref(art, at);
> +
> at = child;
> - } else
> - at = SUBTABLE(node);
> + heap = at->at_heap;
> + SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe);
> + SMR_PTR_SET_LOCKED(ahep, art_heap_to_heap_entry(heap));
> + } else {
> + heap = art_heap_entry_to_heap(ahe);
> + at = art_heap_to_table(heap);
> + }
> }
>
> - i = art_bindex(at, addr, plen);
> - if (i == -1)
> - return (NULL);
> -
> - return (art_table_insert(ar, at, i, an));
> -}
> -
> -/*
> - * Single level insertion.
> - */
> -struct art_node *
> -art_table_insert(struct art_root *ar, struct art_table *at, int i,
> - struct art_node *an)
> -{
> - struct art_node *prev, *node;
> -
> - node = srp_get_locked(&at->at_heap[i].node);
> - if (!ISLEAF(node))
> - prev = srp_get_locked(&SUBTABLE(node)->at_default);
> - else
> - prev = node;
> -
> - if (art_check_duplicate(ar, prev, an))
> - return (prev);
> + i = art_bindex(at->at_offset, at->at_bits, an->an_addr, an->an_plen);
> + ahep = &heap[i];
> + oahe = SMR_PTR_GET_LOCKED(ahep);
> + if (!art_heap_entry_is_node(oahe)) {
> + heap = art_heap_entry_to_heap(oahe);
> + ahep = &heap[ART_HEAP_IDX_DEFAULT];
> + oahe = SMR_PTR_GET_LOCKED(ahep);
> + }
>
> - art_table_ref(ar, at);
> + /* Check if there's an existing node */
> + oan = art_heap_entry_to_node(oahe);
> + if (oan != NULL && art_node_check(oan, an->an_addr, an->an_plen))
> + return (oan);
>
> /*
> * If the index `i' of the route that we are inserting is not
> * a fringe index, we need to allot this new route pointer to
> * all the corresponding fringe indices.
> */
> + art_table_ref(art, at);
> + ahe = art_node_to_heap_entry(an);
> if (i < at->at_minfringe)
> - art_allot(at, i, prev, an);
> - else if (!ISLEAF(node))
> - srp_swap_locked(&SUBTABLE(node)->at_default, an);
> + art_allot(at, i, oahe, ahe);
> else
> - srp_swap_locked(&at->at_heap[i].node, an);
> + SMR_PTR_SET_LOCKED(ahep, ahe);
>
> return (an);
> }
>
> -
> /*
> * Deletion API function.
> */
> struct art_node *
> -art_delete(struct art_root *ar, struct art_node *an, const void *addr, int plen)
> +art_delete(struct art *art, const void *addr, unsigned int plen)
> {
> + unsigned int p;
> + art_heap_entry *heap;
> struct art_table *at;
> - struct art_node *node;
> - int i, j;
> + art_heap_entry ahe, pahe, *ahep;
> + struct art_node *an;
> + unsigned int i, j;
>
> - rw_assert_wrlock(&ar->ar_lock);
> - KASSERT(plen >= 0 && plen <= ar->ar_alen);
> + KASSERT(plen <= art->art_alen);
>
> - at = srp_get_locked(&ar->ar_root);
> - if (at == NULL)
> + heap = SMR_PTR_GET_LOCKED(&art->art_root);
> + if (heap == NULL)
> return (NULL);
>
> + at = art_heap_to_table(heap);
> +
> /* Default route */
> if (plen == 0) {
> - node = srp_get_locked(&at->at_default);
> - srp_swap_locked(&at->at_default, NULL);
> - art_table_free(ar, at);
> - return (node);
> + ahep = &heap[ART_HEAP_IDX_DEFAULT];
> +
> + ahe = SMR_PTR_GET_LOCKED(ahep);
> + an = art_heap_entry_to_node(ahe);
> + if (an == NULL)
> + return (NULL);
> +
> + SMR_PTR_SET_LOCKED(ahep, 0);
> + art_table_free(art, at);
> +
> + return (an);
> }
>
> /*
> @@ -466,53 +641,37 @@ art_delete(struct art_root *ar, struct a
> * the stride length at this level the entry must
> * be in the current table.
> */
> - while (plen > (at->at_offset + at->at_bits)) {
> + while ((p = at->at_offset + at->at_bits) < plen) {
> /* Do a single level route lookup. */
> - j = art_findex(at, addr);
> - node = srp_get_locked(&at->at_heap[j].node);
> + j = art_bindex(at->at_offset, at->at_bits, addr, p);
> + ahe = SMR_PTR_GET_LOCKED(&heap[j]);
>
> /* If this is a leaf, there is no route to delete. */
> - if (ISLEAF(node))
> + if (art_heap_entry_is_node(ahe))
> return (NULL);
>
> - at = SUBTABLE(node);
> + heap = art_heap_entry_to_heap(ahe);
> + at = art_heap_to_table(heap);
> }
>
> - i = art_bindex(at, addr, plen);
> - if (i == -1)
> + i = art_bindex(at->at_offset, at->at_bits, addr, plen);
> + ahep = &heap[i];
> + ahe = SMR_PTR_GET_LOCKED(ahep);
> + if (!art_heap_entry_is_node(ahe)) {
> + art_heap_entry *nheap = art_heap_entry_to_heap(ahe);
> + ahep = &nheap[ART_HEAP_IDX_DEFAULT];
> + ahe = SMR_PTR_GET_LOCKED(ahep);
> + }
> +
> + an = art_heap_entry_to_node(ahe);
> + if (an == NULL) {
> + /* No route to delete */
> return (NULL);
> -
> - return (art_table_delete(ar, at, i, an));
> -}
> -
> -/*
> - * Single level deletion.
> - */
> -struct art_node *
> -art_table_delete(struct art_root *ar, struct art_table *at, int i,
> - struct art_node *an)
> -{
> - struct art_node *next, *node;
> -
> -#ifdef DIAGNOSTIC
> - struct art_node *prev;
> -#endif
> -
> - node = srp_get_locked(&at->at_heap[i].node);
> -#ifdef DIAGNOSTIC
> - if (!ISLEAF(node))
> - prev = srp_get_locked(&SUBTABLE(node)->at_default);
> - else
> - prev = node;
> -
> - KASSERT(prev == an);
> -#endif
> + }
>
> /* Get the next most specific route for the index `i'. */
> - if ((i >> 1) > 1)
> - next = srp_get_locked(&at->at_heap[i >> 1].node);
> - else
> - next = NULL;
> + j = i >> 1;
> + pahe = (j > 1) ? SMR_PTR_GET_LOCKED(&heap[j]) : 0;
>
> /*
> * If the index `i' of the route that we are removing is not
> @@ -520,20 +679,18 @@ art_table_delete(struct art_root *ar, st
> * route pointer to all the corresponding fringe indices.
> */
> if (i < at->at_minfringe)
> - art_allot(at, i, an, next);
> - else if (!ISLEAF(node))
> - srp_swap_locked(&SUBTABLE(node)->at_default, next);
> + art_allot(at, i, ahe, pahe);
> else
> - srp_swap_locked(&at->at_heap[i].node, next);
> + SMR_PTR_SET_LOCKED(ahep, pahe);
>
> /* We have removed an entry from this table. */
> - art_table_free(ar, at);
> + art_table_free(art, at);
>
> return (an);
> }
>
> struct art_table *
> -art_table_ref(struct art_root *ar, struct art_table *at)
> +art_table_ref(struct art *art, struct art_table *at)
> {
> at->at_refcnt++;
> return (at);
> @@ -544,12 +701,11 @@ art_table_rele(struct art_table *at)
> {
> if (at == NULL)
> return (0);
> -
> return (--at->at_refcnt == 0);
> }
>
> int
> -art_table_free(struct art_root *ar, struct art_table *at)
> +art_table_free(struct art *art, struct art_table *at)
> {
> if (art_table_rele(at)) {
> /*
> @@ -557,7 +713,7 @@ art_table_free(struct art_root *ar, stru
> * that are empty.
> */
> do {
> - at = art_table_put(ar, at);
> + at = art_table_put(art, at);
> } while (art_table_rele(at));
>
> return (1);
> @@ -569,116 +725,187 @@ art_table_free(struct art_root *ar, stru
> /*
> * Iteration API function.
> */
> -int
> -art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg)
> +
> +static struct art_node *
> +art_iter_descend(struct art_iter *ai, art_heap_entry *heap,
> + art_heap_entry pahe)
> {
> - struct srp_ref sr;
> - struct art_table *at;
> - struct art_node *node;
> - int error = 0;
> + struct art_table *at;
> + art_heap_entry ahe;
>
> - rw_enter_write(&ar->ar_lock);
> - at = srp_get_locked(&ar->ar_root);
> - if (at != NULL) {
> - art_table_ref(ar, at);
> + at = art_heap_to_table(heap);
> + ai->ai_table = art_table_ref(ai->ai_art, at);
>
> - /*
> - * The default route should be processed here because the root
> - * table does not have a parent.
> - */
> - node = srp_enter(&sr, &at->at_default);
> - error = art_walk_apply(ar, node, NULL, f, arg);
> - srp_leave(&sr);
> + /*
> + * Start looking at non-fringe nodes.
> + */
> + ai->ai_j = 1;
> +
> + /*
> + * The default route (index 1) is processed by the
> + * parent table (where it belongs) otherwise it could
> + * be processed more than once.
> + */
> + ai->ai_i = 2;
>
> - if (error == 0)
> - error = art_table_walk(ar, at, f, arg);
> + /*
> + * Process the default route now.
> + */
> + ahe = SMR_PTR_GET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT]);
> + if (ahe != 0 && ahe != pahe)
> + return (art_heap_entry_to_node(ahe));
> +
> + /*
> + * Tell the caller to proceed with art_iter_next.
> + */
> + return (NULL);
> +}
> +
> +struct art_node *
> +art_iter_open(struct art *art, struct art_iter *ai)
> +{
> + art_heap_entry *heap = SMR_PTR_GET(&art->art_root);
> + struct art_node *an;
>
> - art_table_free(ar, at);
> + ai->ai_art = art;
> +
> + if (heap == NULL) {
> + /* empty, we're already done */
> + ai->ai_table = NULL;
> + return (NULL);
> }
> - rw_exit_write(&ar->ar_lock);
>
> - return (error);
> + an = art_iter_descend(ai, heap, 0);
> + if (an != NULL)
> + return (an); /* default route */
> +
> + return (art_iter_next(ai));
> }
>
> -int
> -art_table_walk(struct art_root *ar, struct art_table *at,
> - int (*f)(struct art_node *, void *), void *arg)
> +struct art_node *
> +art_iter_next(struct art_iter *ai)
> {
> - struct srp_ref sr;
> - struct art_node *node, *next;
> - struct art_table *nat;
> - int i, j, error = 0;
> - uint32_t maxfringe = (at->at_minfringe << 1);
> + struct art_table *at = ai->ai_table;
> + art_heap_entry *heap = at->at_heap;
> + art_heap_entry ahe, pahe;
> + unsigned int i;
>
> +descend:
> /*
> * Iterate non-fringe nodes in ``natural'' order.
> */
> - for (j = 1; j < at->at_minfringe; j += 2) {
> - /*
> - * The default route (index 1) is processed by the
> - * parent table (where it belongs) otherwise it could
> - * be processed more than once.
> - */
> - for (i = max(j, 2); i < at->at_minfringe; i <<= 1) {
> - next = srp_get_locked(&at->at_heap[i >> 1].node);
> -
> - node = srp_enter(&sr, &at->at_heap[i].node);
> - error = art_walk_apply(ar, node, next, f, arg);
> - srp_leave(&sr);
> + if (ai->ai_j < at->at_minfringe) {
> + for (;;) {
> + while ((i = ai->ai_i) < at->at_minfringe) {
> + ai->ai_i = i << 1;
> +
> + pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]);
> + ahe = SMR_PTR_GET_LOCKED(&heap[i]);
> + if (ahe != 0 && ahe != pahe)
> + return (art_heap_entry_to_node(ahe));
> + }
>
> - if (error != 0)
> - return (error);
> + ai->ai_j += 2;
> + if (ai->ai_j < at->at_minfringe)
> + ai->ai_i = ai->ai_j;
> + else {
> + /* Set up the fringe loop */
> + ai->ai_i = at->at_minfringe;
> + break;
> + }
> }
> }
>
> /*
> - * Iterate fringe nodes.
> + * Descendent tables are only found on fringe nodes, and
> + * they record where they were placed in their parent. This
> + * allows the iterator to know where to resume traversal when
> + * it ascends back to the parent table. By keeping the table
> + * refs when going down the tree, the topolgy is preserved
> + * even if all the nodes are removed.
> */
> - for (i = at->at_minfringe; i < maxfringe; i++) {
> - next = srp_get_locked(&at->at_heap[i >> 1].node);
>
> - node = srp_enter(&sr, &at->at_heap[i].node);
> - if (!ISLEAF(node)) {
> - nat = art_table_ref(ar, SUBTABLE(node));
> - node = srp_follow(&sr, &nat->at_default);
> - } else
> - nat = NULL;
> + for (;;) {
> + unsigned int maxfringe = at->at_minfringe << 1;
> + struct art_table *parent;
>
> - error = art_walk_apply(ar, node, next, f, arg);
> - srp_leave(&sr);
> + /*
> + * Iterate fringe nodes.
> + */
> + while ((i = ai->ai_i) < maxfringe) {
> + ai->ai_i = i + 1;
>
> - if (error != 0) {
> - art_table_free(ar, nat);
> - return (error);
> + pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]);
> + ahe = SMR_PTR_GET_LOCKED(&heap[i]);
> + if (art_heap_entry_is_node(ahe)) {
> + if (ahe != 0 && ahe != pahe)
> + return (art_heap_entry_to_node(ahe));
> + } else {
> + struct art_node *an;
> +
> + heap = art_heap_entry_to_heap(ahe);
> +
> + an = art_iter_descend(ai, heap, pahe);
> + if (an != NULL) /* default route? */
> + return (an);
> +
> + /* Start looping over the child table */
> + at = art_heap_to_table(heap);
> + goto descend;
> + }
> }
>
> - if (nat != NULL) {
> - error = art_table_walk(ar, nat, f, arg);
> - art_table_free(ar, nat);
> - if (error != 0)
> - return (error);
> + /*
> + * Ascend back up to the parent
> + */
> + parent = at->at_parent;
> + ai->ai_i = at->at_index + 1;
> + art_table_free(ai->ai_art, at);
> +
> + ai->ai_table = parent;
> + if (parent == NULL) {
> + /* The root table has no parent */
> + break;
> }
> +
> + at = parent;
> + ai->ai_j = at->at_minfringe;
> + heap = at->at_heap;
> }
>
> - return (0);
> + return (NULL);
> }
>
> -int
> -art_walk_apply(struct art_root *ar,
> - struct art_node *an, struct art_node *next,
> - int (*f)(struct art_node *, void *), void *arg)
> +void
> +art_iter_close(struct art_iter *ai)
> {
> - int error = 0;
> + struct art_table *at, *parent;
>
> - if ((an != NULL) && (an != next)) {
> - rw_exit_write(&ar->ar_lock);
> - error = (*f)(an, arg);
> - rw_enter_write(&ar->ar_lock);
> + for (at = ai->ai_table; at != NULL; at = parent) {
> + parent = at->at_parent;
> + art_table_free(ai->ai_art, at);
> }
>
> - return (error);
> + ai->ai_table = NULL;
> }
>
> +int
> +art_walk(struct art *art, int (*f)(struct art_node *, void *), void *arg)
> +{
> + struct art_iter ai;
> + struct art_node *an;
> + int error = 0;
> +
> + ART_FOREACH(an, art, &ai) {
> + error = f(an, arg);
> + if (error != 0) {
> + art_iter_close(&ai);
> + return (error);
> + }
> + }
> +
> + return (0);
> +}
>
> /*
> * Create a table and use the given index to set its default route.
> @@ -686,89 +913,90 @@ art_walk_apply(struct art_root *ar,
> * Note: This function does not modify the root or the parent.
> */
> struct art_table *
> -art_table_get(struct art_root *ar, struct art_table *parent, int j)
> +art_table_get(struct art *art, struct art_table *parent, unsigned int j)
> {
> struct art_table *at;
> - struct art_node *node;
> - void *at_heap;
> - uint32_t lvl;
> + art_heap_entry *heap;
> + unsigned int level;
> + unsigned int bits;
>
> - KASSERT(j != 0 && j != 1);
> + KASSERT(j != ART_HEAP_IDX_TABLE && j != ART_HEAP_IDX_DEFAULT);
> KASSERT(parent != NULL || j == -1);
>
> - if (parent != NULL)
> - lvl = parent->at_level + 1;
> - else
> - lvl = 0;
> -
> - KASSERT(lvl < ar->ar_nlvl);
> + level = (parent != NULL) ? parent->at_level + 1 : 0;
> + KASSERT(level < art->art_nlevels);
>
> at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO);
> if (at == NULL)
> return (NULL);
>
> - switch (AT_HEAPSIZE(ar->ar_bits[lvl])) {
> - case AT_HEAPSIZE(4):
> - at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
> + bits = art->art_levels[level];
> + switch (bits) {
> + case 4:
> + heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
> break;
> - case AT_HEAPSIZE(8):
> - at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
> + case 8:
> + heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
> break;
> default:
> - panic("incorrect stride length %u", ar->ar_bits[lvl]);
> + panic("incorrect stride length %u", bits);
> }
>
> - if (at_heap == NULL) {
> + if (heap == NULL) {
> pool_put(&at_pool, at);
> return (NULL);
> }
>
> + heap[ART_HEAP_IDX_TABLE] = (art_heap_entry)at;
> +
> at->at_parent = parent;
> at->at_index = j;
> - at->at_minfringe = (1 << ar->ar_bits[lvl]);
> - at->at_level = lvl;
> - at->at_bits = ar->ar_bits[lvl];
> - at->at_heap = at_heap;
> + at->at_minfringe = (1 << bits);
> + at->at_level = level;
> + at->at_bits = bits;
> + at->at_heap = heap;
> at->at_refcnt = 0;
>
> if (parent != NULL) {
> - node = srp_get_locked(&parent->at_heap[j].node);
> - /* node isn't being deleted, no srp_finalize needed */
> - srp_swap_locked(&at->at_default, node);
> + art_heap_entry ahe;
> +
> at->at_offset = (parent->at_offset + parent->at_bits);
> +
> + ahe = SMR_PTR_GET_LOCKED(&parent->at_heap[j]);
> + SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe);
> }
>
> return (at);
> }
>
> -
> /*
> * Delete a table and use its index to restore its parent's default route.
> *
> * Note: Modify its parent to unlink the table from it.
> */
> struct art_table *
> -art_table_put(struct art_root *ar, struct art_table *at)
> +art_table_put(struct art *art, struct art_table *at)
> {
> struct art_table *parent = at->at_parent;
> - struct art_node *node;
> - uint32_t j = at->at_index;
> + unsigned int j = at->at_index;
>
> KASSERT(at->at_refcnt == 0);
> KASSERT(j != 0 && j != 1);
>
> if (parent != NULL) {
> + art_heap_entry ahe;
> +
> KASSERT(j != -1);
> KASSERT(at->at_level == parent->at_level + 1);
> KASSERT(parent->at_refcnt >= 1);
>
> /* Give the route back to its parent. */
> - node = srp_get_locked(&at->at_default);
> - srp_swap_locked(&parent->at_heap[j].node, node);
> + ahe = SMR_PTR_GET_LOCKED(&at->at_heap[ART_HEAP_IDX_DEFAULT]);
> + SMR_PTR_SET_LOCKED(&parent->at_heap[j], ahe);
> } else {
> KASSERT(j == -1);
> KASSERT(at->at_level == 0);
> - srp_swap_locked(&ar->ar_root, NULL);
> + SMR_PTR_SET_LOCKED(&art->art_root, NULL);
> }
>
> mtx_enter(&art_table_gc_mtx);
> @@ -791,19 +1019,16 @@ art_table_gc(void *null)
> art_table_gc_list = NULL;
> mtx_leave(&art_table_gc_mtx);
>
> + smr_barrier();
> +
> while (at != NULL) {
> next = at->at_parent;
>
> - if (at->at_level == 0)
> - srp_finalize(at, "arttfini");
> - else
> - srp_finalize(ASNODE(at), "arttfini");
> -
> - switch (AT_HEAPSIZE(at->at_bits)) {
> - case AT_HEAPSIZE(4):
> + switch (at->at_bits) {
> + case 4:
> pool_put(&at_heap_4_pool, at->at_heap);
> break;
> - case AT_HEAPSIZE(8):
> + case 8:
> pool_put(&at_heap_8_pool, at->at_heap);
> break;
> default:
> @@ -839,40 +1064,45 @@ art_table_gc(void *null)
> * art_allot(at, k, old, new);
> * }
> */
> -void
> -art_allot(struct art_table *at, int i, struct art_node *old,
> - struct art_node *new)
> -{
> - struct art_node *node, *dflt;
> - int k = i;
> +static void
> +art_allot(struct art_table *at, unsigned int i,
> + art_heap_entry oahe, art_heap_entry nahe)
> +{
> + art_heap_entry ahe, *ahep;
> + art_heap_entry *heap;
> + unsigned int k = i;
>
> KASSERT(i < at->at_minfringe);
>
> + heap = at->at_heap;
> +
> again:
> k <<= 1;
> if (k < at->at_minfringe)
> goto nonfringe;
>
> /* Change fringe nodes. */
> - while (1) {
> - node = srp_get_locked(&at->at_heap[k].node);
> - if (!ISLEAF(node)) {
> - dflt = srp_get_locked(&SUBTABLE(node)->at_default);
> - if (dflt == old) {
> - srp_swap_locked(&SUBTABLE(node)->at_default,
> - new);
> - }
> - } else if (node == old) {
> - srp_swap_locked(&at->at_heap[k].node, new);
> + for (;;) {
> + ahep = &heap[k];
> + ahe = SMR_PTR_GET_LOCKED(ahep);
> +
> + if (!art_heap_entry_is_node(ahe)) {
> + art_heap_entry *child = art_heap_entry_to_heap(ahe);
> + ahep = &child[ART_HEAP_IDX_DEFAULT];
> + ahe = SMR_PTR_GET_LOCKED(ahep);
> }
> +
> + if (ahe == oahe)
> + SMR_PTR_SET_LOCKED(ahep, nahe);
> +
> if (k % 2)
> goto moveup;
> k++;
> }
>
> nonfringe:
> - node = srp_get_locked(&at->at_heap[k].node);
> - if (node == old)
> + ahe = SMR_PTR_GET_LOCKED(&heap[k]);
> + if (ahe == oahe)
> goto again;
> moveon:
> if (k % 2)
> @@ -881,7 +1111,7 @@ moveon:
> goto nonfringe;
> moveup:
> k >>= 1;
> - srp_swap_locked(&at->at_heap[k].node, new);
> + SMR_PTR_SET_LOCKED(&heap[k], nahe);
>
> /* Change non-fringe node. */
> if (k != i)
> @@ -889,7 +1119,7 @@ moveup:
> }
>
> struct art_node *
> -art_get(uint8_t plen)
> +art_get(const uint8_t *addr, unsigned int plen)
> {
> struct art_node *an;
>
> @@ -897,17 +1127,25 @@ art_get(uint8_t plen)
> if (an == NULL)
> return (NULL);
>
> - an->an_plen = plen;
> - SRPL_INIT(&an->an_rtlist);
> + art_node_init(an, addr, plen);
>
> return (an);
> }
>
> void
> -art_put(struct art_node *an)
> +art_node_init(struct art_node *an, const uint8_t *addr, unsigned int plen)
> {
> - KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist));
> + size_t len;
> +
> + len = roundup(plen, 8) / 8;
> + KASSERT(len <= sizeof(an->an_addr));
> + memcpy(an->an_addr, addr, len);
> + an->an_plen = plen;
> +}
>
> +void
> +art_put(struct art_node *an)
> +{
> mtx_enter(&art_node_gc_mtx);
> an->an_gc = art_node_gc_list;
> art_node_gc_list = an;
> @@ -919,17 +1157,17 @@ art_put(struct art_node *an)
> void
> art_gc(void *null)
> {
> - struct art_node *an, *next;
> + struct art_node *an;
>
> mtx_enter(&art_node_gc_mtx);
> an = art_node_gc_list;
> art_node_gc_list = NULL;
> mtx_leave(&art_node_gc_mtx);
>
> - while (an != NULL) {
> - next = an->an_gc;
> + smr_barrier();
>
> - srp_finalize(an, "artnfini");
> + while (an != NULL) {
> + struct art_node *next = an->an_gc;
>
> pool_put(&an_pool, an);
>
> Index: art.h
> ===================================================================
> RCS file: /cvs/src/sys/net/art.h,v
> diff -u -p -r1.25 art.h
> --- art.h 11 Nov 2023 12:17:50 -0000 1.25
> +++ art.h 13 Jun 2025 01:59:37 -0000
> @@ -19,94 +19,90 @@
> #ifndef _NET_ART_H_
> #define _NET_ART_H_
>
> -#include <sys/rwlock.h>
> -#include <sys/srp.h>
> -
> -#define ART_MAXLVL 32 /* We currently use 32 levels for IPv6. */
> -
> /*
> - * Root of the ART tables, equivalent to the radix head.
> + * Allotment Routing Table (ART)
> + *
> + * Yoichi Hariguchi paper can be found at:
> + * http://www.hariguchi.org/art/art.pdf
> + *
> + * Locking:
> + *
> + * Modification (ie, art_insert or art_delete) and iteration
> + * (art_iter_next, etc) over the ART must be serialised by the caller.
> + * Lookups (ie, art_match and art_lookup) run within an SMR critical
> + * section.
> *
> - * Locks used to protect struct members in this file:
> - * I immutable after creation
> - * l root's `ar_lock'
> - * K kernel lock
> - * For SRP related structures that allow lock-free reads, the write lock
> - * is indicated below.
> + * Iteration requires serialisation as it manipulates the reference
> + * counts on tables as it traverses the tree. The iterator maintains
> + * these references until it runs out of entries. This allows code
> + * iterating over the ART to release locks in between calls to
> + * art_iter_open and art_iter_next. The references may be dropped
> + * early with art_iter_close.
> + *
> + * Note, the iterator does not hold a reference to the art_node
> + * structure or the data hanging off the an_value pointer, they must
> + * be accounted for separately or their use must be serialised with
> + * art_delete.
> */
> -struct art_root {
> - struct srp ar_root; /* [l] First table */
> - struct rwlock ar_lock; /* [] Serialise modifications */
> - uint8_t ar_bits[ART_MAXLVL]; /* [I] Per level stride */
> - uint8_t ar_nlvl; /* [I] Number of levels */
> - uint8_t ar_alen; /* [I] Address length in bits */
> - uint8_t ar_off; /* [I] Offset of key in bytes */
> - struct sockaddr *ar_source; /* [N] use optional src addr */
> -};
>
> -#define ISLEAF(e) (((unsigned long)(e) & 1) == 0)
> -#define SUBTABLE(e) ((struct art_table *)((unsigned long)(e) & ~1))
> -#define ASNODE(t) ((struct art_node *)((unsigned long)(t) | 1))
> +typedef uintptr_t art_heap_entry;
>
> /*
> - * Allotment Table.
> + * Root of the ART, equivalent to the radix head.
> */
> -struct art_table {
> - struct art_table *at_parent; /* Parent table */
> - uint32_t at_index; /* Index in the parent table */
> - uint32_t at_minfringe; /* Index that fringe begins */
> - uint32_t at_level; /* Level of the table */
> - uint8_t at_bits; /* Stride length of the table */
> - uint8_t at_offset; /* Sum of parents' stride len */
> -
> - /*
> - * Items stored in the heap are pointers to nodes, in the leaf
> - * case, or tables otherwise. One exception is index 0 which
> - * is a route counter.
> - */
> - union {
> - struct srp node;
> - unsigned long count;
> - } *at_heap; /* Array of 2^(slen+1) items */
> -};
> -#define at_refcnt at_heap[0].count/* Refcounter (1 per different route) */
> -#define at_default at_heap[1].node /* Default route (was in parent heap) */
> -
> -/* Heap size for an ART table of stride length ``slen''. */
> -#define AT_HEAPSIZE(slen) ((1 << ((slen) + 1)) * sizeof(void *))
>
> -/*
> - * Forward declaration needed for the list of mpath routes
> - * attached to a single ART node.
> - */
> -struct rtentry;
> +struct art {
> + art_heap_entry *art_root;
> + const unsigned int *art_levels;
> + unsigned int art_nlevels;
> + unsigned int art_alen;
> +};
>
> /*
> * A node is the internal representation of a route entry.
> */
> struct art_node {
> + void *an_value;
> union {
> - SRPL_HEAD(, rtentry) an__rtlist; /* Route related to this node */
> - struct art_node *an__gc; /* Entry on GC list */
> - } an_pointer;
> - uint8_t an_plen; /* Prefix length */
> + struct art_node *an__gc;
> + uint8_t an__addr[16];
> + } an__u;
> +#define an_gc an__u.an__gc
> +#define an_addr an__u.an__addr
> + unsigned int an_plen;
> };
> -#define an_rtlist an_pointer.an__rtlist
> -#define an_gc an_pointer.an__gc
> -
> -void art_init(void);
> -struct art_root *art_alloc(unsigned int, unsigned int, unsigned int);
> -struct art_node *art_insert(struct art_root *, struct art_node *, const void *,
> - int);
> -struct art_node *art_delete(struct art_root *, struct art_node *, const void *,
> - int);
> -struct art_node *art_match(struct art_root *, const void *, struct srp_ref *);
> -struct art_node *art_lookup(struct art_root *, const void *, int,
> - struct srp_ref *);
> -int art_walk(struct art_root *,
> - int (*)(struct art_node *, void *), void *);
>
> -struct art_node *art_get(uint8_t);
> +void art_boot(void);
> +struct art *art_alloc(unsigned int);
> +void art_init(struct art *, unsigned int);
> +struct art_node *art_insert(struct art *, struct art_node *);
> +struct art_node *art_delete(struct art *, const void *, unsigned int);
> +struct art_node *art_match(struct art *, const void *);
> +struct art_node *art_lookup(struct art *, const void *, unsigned int);
> +int art_is_empty(struct art *);
> +
> +struct art_node *art_get(const uint8_t *, unsigned int);
> +void art_node_init(struct art_node *,
> + const uint8_t *, unsigned int);
> void art_put(struct art_node *);
> +
> +struct art_iter {
> + struct art *ai_art;
> + struct art_table *ai_table;
> + unsigned int ai_j;
> + unsigned int ai_i;
> +};
> +
> +struct art_node *art_iter_open(struct art *, struct art_iter *);
> +struct art_node *art_iter_next(struct art_iter *);
> +void art_iter_close(struct art_iter *);
> +
> +#define ART_FOREACH(_an, _art, _ai) \
> + for ((_an) = art_iter_open((_art), (_ai)); \
> + (_an) != NULL; \
> + (_an) = art_iter_next((_ai)))
> +
> +int art_walk(struct art *,
> + int (*)(struct art_node *, void *), void *);
>
> #endif /* _NET_ART_H_ */
> Index: if_tun.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_tun.c,v
> diff -u -p -r1.251 if_tun.c
> --- if_tun.c 2 Mar 2025 21:28:32 -0000 1.251
> +++ if_tun.c 13 Jun 2025 01:59:37 -0000
> @@ -318,8 +318,6 @@ tun_clone_destroy(struct ifnet *ifp)
>
> if (vfinddev(dev, VCHR, &vp))
> VOP_REVOKE(vp, REVOKEALL);
> -
> - KASSERT(sc->sc_dev == 0);
> }
>
> /* prevent userland from getting to the device again */
> Index: if_wg.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_wg.c,v
> diff -u -p -r1.42 if_wg.c
> --- if_wg.c 17 Apr 2025 12:42:50 -0000 1.42
> +++ if_wg.c 13 Jun 2025 01:59:37 -0000
> @@ -31,6 +31,7 @@
> #include <sys/ioctl.h>
> #include <sys/mbuf.h>
> #include <sys/syslog.h>
> +#include <sys/smr.h>
>
> #include <net/if.h>
> #include <net/if_var.h>
> @@ -251,10 +252,11 @@ struct wg_softc {
> struct socket *sc_so6;
> #endif
>
> + struct rwlock sc_aip_lock;
> size_t sc_aip_num;
> - struct art_root *sc_aip4;
> + struct art *sc_aip4;
> #ifdef INET6
> - struct art_root *sc_aip6;
> + struct art *sc_aip6;
> #endif
>
> struct rwlock sc_peer_lock;
> @@ -290,7 +292,7 @@ void wg_peer_counters_add(struct wg_peer
>
> int wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
> struct wg_peer *
> - wg_aip_lookup(struct art_root *, void *);
> + wg_aip_lookup(struct art *, void *);
> int wg_aip_remove(struct wg_softc *, struct wg_peer *,
> struct wg_aip_io *);
>
> @@ -609,7 +611,7 @@ wg_peer_counters_add(struct wg_peer *pee
> int
> wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
> {
> - struct art_root *root;
> + struct art *root;
> struct art_node *node;
> struct wg_aip *aip;
> int ret = 0;
> @@ -625,8 +627,10 @@ wg_aip_add(struct wg_softc *sc, struct w
> if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL)
> return ENOBUFS;
>
> - rw_enter_write(&root->ar_lock);
> - node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
> + art_node_init(&aip->a_node, &d->a_addr.addr_bytes, d->a_cidr);
> +
> + rw_enter_write(&sc->sc_aip_lock);
> + node = art_insert(root, &aip->a_node);
>
> if (node == &aip->a_node) {
> aip->a_peer = peer;
> @@ -642,18 +646,18 @@ wg_aip_add(struct wg_softc *sc, struct w
> aip->a_peer = peer;
> }
> }
> - rw_exit_write(&root->ar_lock);
> + rw_exit_write(&sc->sc_aip_lock);
> return ret;
> }
>
> struct wg_peer *
> -wg_aip_lookup(struct art_root *root, void *addr)
> +wg_aip_lookup(struct art *root, void *addr)
> {
> - struct srp_ref sr;
> struct art_node *node;
>
> - node = art_match(root, addr, &sr);
> - srp_leave(&sr);
> + smr_read_enter();
> + node = art_match(root, addr);
> + smr_read_leave();
>
> return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
> }
> @@ -661,8 +665,7 @@ wg_aip_lookup(struct art_root *root, voi
> int
> wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
> {
> - struct srp_ref sr;
> - struct art_root *root;
> + struct art *root;
> struct art_node *node;
> struct wg_aip *aip;
> int ret = 0;
> @@ -675,23 +678,21 @@ wg_aip_remove(struct wg_softc *sc, struc
> default: return EAFNOSUPPORT;
> }
>
> - rw_enter_write(&root->ar_lock);
> - if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
> + rw_enter_write(&sc->sc_aip_lock);
> + if ((node = art_lookup(root, &d->a_addr, d->a_cidr)) == NULL) {
> ret = ENOENT;
> } else if (((struct wg_aip *) node)->a_peer != peer) {
> ret = EXDEV;
> } else {
> aip = (struct wg_aip *)node;
> - if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
> + if (art_delete(root, &d->a_addr, d->a_cidr) == NULL)
> panic("art_delete failed to delete node %p", node);
>
> sc->sc_aip_num--;
> LIST_REMOVE(aip, a_entry);
> pool_put(&wg_aip_pool, aip);
> }
> -
> - srp_leave(&sr);
> - rw_exit_write(&root->ar_lock);
> + rw_exit_write(&sc->sc_aip_lock);
> return ret;
> }
>
> @@ -2726,10 +2727,11 @@ wg_clone_create(struct if_clone *ifc, in
> #endif
>
> sc->sc_aip_num = 0;
> - if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
> + rw_init(&sc->sc_aip_lock, "wgaip");
> + if ((sc->sc_aip4 = art_alloc(32)) == NULL)
> goto ret_02;
> #ifdef INET6
> - if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
> + if ((sc->sc_aip6 = art_alloc(128)) == NULL)
> goto ret_03;
> #endif
>
> Index: if_wg.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_wg.h,v
> diff -u -p -r1.6 if_wg.h
> --- if_wg.h 13 Oct 2024 00:53:21 -0000 1.6
> +++ if_wg.h 13 Jun 2025 01:59:37 -0000
> @@ -46,6 +46,7 @@ struct wg_aip_io {
> sa_family_t a_af;
> int a_cidr;
> union wg_aip_addr {
> + uint8_t addr_bytes;
> struct in_addr addr_ipv4;
> struct in6_addr addr_ipv6;
> } a_addr;
> Index: route.h
> ===================================================================
> RCS file: /cvs/src/sys/net/route.h,v
> diff -u -p -r1.216 route.h
> --- route.h 16 Mar 2025 21:58:08 -0000 1.216
> +++ route.h 13 Jun 2025 01:59:37 -0000
> @@ -40,7 +40,7 @@
> * I immutable after creation
> * N net lock
> * X exclusive net lock, or shared net lock + kernel lock
> - * R art (rtable) lock
> + * R rtable lock
> * r per route entry mutex rt_mtx
> * L arp/nd6/etc lock for updates, net lock for reads
> * T rttimer_mtx route timer lists
> @@ -117,7 +117,7 @@ struct rttimer;
>
> struct rtentry {
> struct sockaddr *rt_dest; /* [I] destination */
> - SRPL_ENTRY(rtentry) rt_next; /* [R] next mpath entry to our dst */
> + struct rtentry *rt_next; /* [R] next mpath entry to our dst */
> struct sockaddr *rt_gateway; /* [X] gateway address */
> struct ifaddr *rt_ifa; /* [N] interface addr to use */
> caddr_t rt_llinfo; /* [L] pointer to link level info or
> Index: rtable.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtable.c,v
> diff -u -p -r1.87 rtable.c
> --- rtable.c 9 Apr 2024 12:53:08 -0000 1.87
> +++ rtable.c 13 Jun 2025 01:59:37 -0000
> @@ -26,12 +26,21 @@
> #include <sys/queue.h>
> #include <sys/domain.h>
> #include <sys/srp.h>
> +#include <sys/smr.h>
> #endif
>
> #include <net/rtable.h>
> #include <net/route.h>
> #include <net/art.h>
>
> +struct rtable {
> + struct rwlock r_lock;
> + struct art *r_art; /* [I] */
> + unsigned int r_off; /* [I] Offset of key in bytes */
> +
> + struct sockaddr *r_source; /* [N] use optional src addr */
> +};
> +
> /*
> * Structures used by rtable_get() to retrieve the corresponding
> * routing table for a given pair of ``af'' and ``rtableid''.
> @@ -85,8 +94,8 @@ void rtmap_dtor(void *, void *);
> struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL);
>
> void rtable_init_backend(void);
> -void *rtable_alloc(unsigned int, unsigned int, unsigned int);
> -void *rtable_get(unsigned int, sa_family_t);
> +struct rtable *rtable_alloc(unsigned int, unsigned int, unsigned int);
> +struct rtable *rtable_get(unsigned int, sa_family_t);
>
> void
> rtmap_init(void)
> @@ -193,7 +202,7 @@ int
> rtable_add(unsigned int id)
> {
> const struct domain *dp;
> - void *tbl;
> + struct rtable *tbl;
> struct rtmap *map;
> struct dommp *dmm;
> sa_family_t af;
> @@ -244,11 +253,11 @@ out:
> return (error);
> }
>
> -void *
> +struct rtable *
> rtable_get(unsigned int rtableid, sa_family_t af)
> {
> struct rtmap *map;
> - void *tbl = NULL;
> + struct rtable *tbl = NULL;
> struct srp_ref sr;
>
> if (af >= nitems(af2idx) || af2idx[af] == 0)
> @@ -286,7 +295,7 @@ rtable_empty(unsigned int rtableid)
> {
> const struct domain *dp;
> int i;
> - struct art_root *tbl;
> + struct rtable *tbl;
>
> for (i = 0; (dp = domains[i]) != NULL; i++) {
> if (dp->dom_rtoffset == 0)
> @@ -295,7 +304,7 @@ rtable_empty(unsigned int rtableid)
> tbl = rtable_get(rtableid, dp->dom_family);
> if (tbl == NULL)
> continue;
> - if (tbl->ar_root.ref != NULL)
> + if (!art_is_empty(tbl->r_art))
> return (0);
> }
>
> @@ -350,40 +359,51 @@ rtable_l2set(unsigned int rtableid, unsi
> }
>
>
> -static inline const uint8_t *satoaddr(struct art_root *,
> +static inline const uint8_t *satoaddr(struct rtable *,
> const struct sockaddr *);
>
> -int an_match(struct art_node *, const struct sockaddr *, int);
> -void rtentry_ref(void *, void *);
> -void rtentry_unref(void *, void *);
> -
> void rtable_mpath_insert(struct art_node *, struct rtentry *);
>
> -struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL);
> -
> void
> rtable_init_backend(void)
> {
> - art_init();
> + art_boot();
> }
>
> -void *
> +struct rtable *
> rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
> {
> - return (art_alloc(rtableid, alen, off));
> + struct rtable *r;
> +
> + r = malloc(sizeof(*r), M_RTABLE, M_NOWAIT|M_ZERO);
> + if (r == NULL)
> + return (r);
> +
> +
> + r->r_art = art_alloc(alen);
> + if (r->r_art == NULL) {
> + free(r, M_RTABLE, sizeof(*r));
> + return (NULL);
> + }
> +
> + rw_init(&r->r_lock, "rtable");
> + r->r_off = off;
> + r->r_source = NULL;
> +
> + return (r);
> }
>
> int
> rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src)
> {
> - struct art_root *ar;
> + struct rtable *r;
>
> NET_ASSERT_LOCKED_EXCLUSIVE();
>
> - if ((ar = rtable_get(rtableid, af)) == NULL)
> + if ((r = rtable_get(rtableid, af)) == NULL)
> return (EAFNOSUPPORT);
>
> - ar->ar_source = src;
> + r->r_source = src;
>
> return (0);
> }
> @@ -391,15 +411,15 @@ rtable_setsource(unsigned int rtableid,
> struct sockaddr *
> rtable_getsource(unsigned int rtableid, int af)
> {
> - struct art_root *ar;
> + struct rtable *r;
>
> NET_ASSERT_LOCKED();
>
> - ar = rtable_get(rtableid, af);
> - if (ar == NULL)
> + r = rtable_get(rtableid, af);
> + if (r == NULL)
> return (NULL);
>
> - return (ar->ar_source);
> + return (r->r_source);
> }
>
> void
> @@ -419,37 +439,34 @@ struct rtentry *
> rtable_lookup(unsigned int rtableid, const struct sockaddr *dst,
> const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio)
> {
> - struct art_root *ar;
> + struct rtable *r;
> struct art_node *an;
> struct rtentry *rt = NULL;
> - struct srp_ref sr, nsr;
> const uint8_t *addr;
> int plen;
>
> - ar = rtable_get(rtableid, dst->sa_family);
> - if (ar == NULL)
> + r = rtable_get(rtableid, dst->sa_family);
> + if (r == NULL)
> return (NULL);
>
> - addr = satoaddr(ar, dst);
> + addr = satoaddr(r, dst);
>
> - /* No need for a perfect match. */
> + smr_read_enter();
> if (mask == NULL) {
> - an = art_match(ar, addr, &nsr);
> - if (an == NULL)
> - goto out;
> + /* No need for a perfect match. */
> + an = art_match(r->r_art, addr);
> } else {
> plen = rtable_satoplen(dst->sa_family, mask);
> if (plen == -1)
> return (NULL);
>
> - an = art_lookup(ar, addr, plen, &nsr);
> -
> - /* Make sure we've got a perfect match. */
> - if (!an_match(an, dst, plen))
> - goto out;
> + an = art_lookup(r->r_art, addr, plen);
> }
> + if (an == NULL)
> + goto out;
>
> - SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) {
> + for (rt = SMR_PTR_GET(&an->an_value); rt != NULL;
> + rt = SMR_PTR_GET(&rt->rt_next)) {
> if (prio != RTP_ANY &&
> (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK))
> continue;
> @@ -464,9 +481,8 @@ rtable_lookup(unsigned int rtableid, con
> if (rt != NULL)
> rtref(rt);
>
> - SRPL_LEAVE(&sr);
> out:
> - srp_leave(&nsr);
> + smr_read_leave();
>
> return (rt);
> }
> @@ -474,44 +490,41 @@ out:
> struct rtentry *
> rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src)
> {
> - struct art_root *ar;
> + struct rtable *r;
> struct art_node *an;
> struct rtentry *rt = NULL;
> - struct srp_ref sr, nsr;
> const uint8_t *addr;
> int hash;
> + uint8_t prio;
>
> - ar = rtable_get(rtableid, dst->sa_family);
> - if (ar == NULL)
> + r = rtable_get(rtableid, dst->sa_family);
> + if (r == NULL)
> return (NULL);
>
> - addr = satoaddr(ar, dst);
> + addr = satoaddr(r, dst);
>
> - an = art_match(ar, addr, &nsr);
> + smr_read_enter();
> + an = art_match(r->r_art, addr);
> if (an == NULL)
> goto out;
>
> - rt = SRPL_FIRST(&sr, &an->an_rtlist);
> - if (rt == NULL) {
> - SRPL_LEAVE(&sr);
> - goto out;
> - }
> - rtref(rt);
> - SRPL_LEAVE(&sr);
> + rt = SMR_PTR_GET(&an->an_value);
> + KASSERT(rt != NULL);
> + prio = rt->rt_priority;
>
> /* Gateway selection by Hash-Threshold (RFC 2992) */
> if ((hash = rt_hash(rt, dst, src)) != -1) {
> struct rtentry *mrt;
> - int threshold, npaths = 0;
> + int threshold, npaths = 1;
>
> KASSERT(hash <= 0xffff);
>
> - SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) {
> - /* Only count nexthops with the same priority. */
> - if (mrt->rt_priority == rt->rt_priority)
> + /* Only count nexthops with the same priority. */
> + mrt = rt;
> + while ((mrt = SMR_PTR_GET(&mrt->rt_next)) != NULL) {
> + if (mrt->rt_priority == prio)
> npaths++;
> }
> - SRPL_LEAVE(&sr);
>
> threshold = (0xffff / npaths) + 1;
>
> @@ -520,27 +533,22 @@ rtable_match(unsigned int rtableid, cons
> * route list attached to the node, so we won't necessarily
> * have the same number of routes. for most modifications,
> * we'll pick a route that we wouldn't have if we only saw the
> - * list before or after the change. if we were going to use
> - * the last available route, but it got removed, we'll hit
> - * the end of the list and then pick the first route.
> + * list before or after the change.
> */
> -
> - mrt = SRPL_FIRST(&sr, &an->an_rtlist);
> - while (hash > threshold && mrt != NULL) {
> - if (mrt->rt_priority == rt->rt_priority)
> + mrt = rt;
> + while (hash > threshold) {
> + if (mrt->rt_priority == prio) {
> + rt = mrt;
> hash -= threshold;
> - mrt = SRPL_FOLLOW(&sr, mrt, rt_next);
> - }
> -
> - if (mrt != NULL) {
> - rtref(mrt);
> - rtfree(rt);
> - rt = mrt;
> + }
> + mrt = SMR_PTR_GET(&mrt->rt_next);
> + if (mrt == NULL)
> + break;
> }
> - SRPL_LEAVE(&sr);
> }
> + rtref(rt);
> out:
> - srp_leave(&nsr);
> + smr_read_leave();
> return (rt);
> }
>
> @@ -549,35 +557,59 @@ rtable_insert(unsigned int rtableid, str
> const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio,
> struct rtentry *rt)
> {
> - struct rtentry *mrt;
> - struct srp_ref sr;
> - struct art_root *ar;
> + struct rtable *r;
> struct art_node *an, *prev;
> const uint8_t *addr;
> int plen;
> unsigned int rt_flags;
> int error = 0;
>
> - ar = rtable_get(rtableid, dst->sa_family);
> - if (ar == NULL)
> + r = rtable_get(rtableid, dst->sa_family);
> + if (r == NULL)
> return (EAFNOSUPPORT);
>
> - addr = satoaddr(ar, dst);
> + addr = satoaddr(r, dst);
> plen = rtable_satoplen(dst->sa_family, mask);
> if (plen == -1)
> return (EINVAL);
>
> - rtref(rt); /* guarantee rtfree won't do anything during insert */
> - rw_enter_write(&ar->ar_lock);
> + an = art_get(addr, plen);
> + if (an == NULL)
> + return (ENOMEM);
>
> - /* Do not permit exactly the same dst/mask/gw pair. */
> - an = art_lookup(ar, addr, plen, &sr);
> - srp_leave(&sr); /* an can't go away while we have the lock */
> - if (an_match(an, dst, plen)) {
> - struct rtentry *mrt;
> - int mpathok = ISSET(rt->rt_flags, RTF_MPATH);
> + /* prepare for immediate operation if insert succeeds */
> + rt_flags = rt->rt_flags;
> + rt->rt_flags &= ~RTF_MPATH;
> + rt->rt_dest = dst;
> + rt->rt_plen = plen;
> + rt->rt_next = NULL;
>
> - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) {
> + rtref(rt); /* take a ref for the table */
> + an->an_value = rt;
> +
> + rw_enter_write(&r->r_lock);
> + prev = art_insert(r->r_art, an);
> + if (prev == NULL) {
> + error = ENOMEM;
> + goto put;
> + }
> +
> + if (prev != an) {
> + struct rtentry *mrt;
> + int mpathok = ISSET(rt_flags, RTF_MPATH);
> + int mpath = 0;
> +
> + /*
> + * An ART node with the same destination/netmask already
> + * exists.
> + */
> + art_put(an);
> + an = prev;
> +
> + /* Do not permit exactly the same dst/mask/gw pair. */
> + for (mrt = SMR_PTR_GET_LOCKED(&an->an_value);
> + mrt != NULL;
> + mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) {
> if (prio != RTP_ANY &&
> (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK))
> continue;
> @@ -589,63 +621,34 @@ rtable_insert(unsigned int rtableid, str
> error = EEXIST;
> goto leave;
> }
> - }
> - }
> -
> - an = art_get(plen);
> - if (an == NULL) {
> - error = ENOBUFS;
> - goto leave;
> - }
> -
> - /* prepare for immediate operation if insert succeeds */
> - rt_flags = rt->rt_flags;
> - rt->rt_flags &= ~RTF_MPATH;
> - rt->rt_dest = dst;
> - rt->rt_plen = plen;
> - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
> -
> - prev = art_insert(ar, an, addr, plen);
> - if (prev != an) {
> - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
> - rt_next);
> - rt->rt_flags = rt_flags;
> - art_put(an);
> -
> - if (prev == NULL) {
> - error = ESRCH;
> - goto leave;
> + mpath = RTF_MPATH;
> }
>
> - an = prev;
> -
> - mrt = SRPL_FIRST_LOCKED(&an->an_rtlist);
> - KASSERT(mrt != NULL);
> - KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio);
> + /* The new route can be added to the list. */
> + if (mpath) {
> + SET(rt->rt_flags, RTF_MPATH);
> +
> + for (mrt = SMR_PTR_GET_LOCKED(&an->an_value);
> + mrt != NULL;
> + mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) {
> + if ((mrt->rt_priority & RTP_MASK) !=
> + (prio & RTP_MASK))
> + continue;
>
> - /*
> - * An ART node with the same destination/netmask already
> - * exists, MPATH conflict must have been already checked.
> - */
> - if (rt->rt_flags & RTF_MPATH) {
> - /*
> - * Only keep the RTF_MPATH flag if two routes have
> - * the same gateway.
> - */
> - rt->rt_flags &= ~RTF_MPATH;
> - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) {
> - if (mrt->rt_priority == prio) {
> - mrt->rt_flags |= RTF_MPATH;
> - rt->rt_flags |= RTF_MPATH;
> - }
> + SET(mrt->rt_flags, RTF_MPATH);
> }
> }
>
> /* Put newly inserted entry at the right place. */
> rtable_mpath_insert(an, rt);
> }
> + rw_exit_write(&r->r_lock);
> + return (error);
> +
> +put:
> + art_put(an);
> leave:
> - rw_exit_write(&ar->ar_lock);
> + rw_exit_write(&r->r_lock);
> rtfree(rt);
> return (error);
> }
> @@ -654,118 +657,134 @@ int
> rtable_delete(unsigned int rtableid, const struct sockaddr *dst,
> const struct sockaddr *mask, struct rtentry *rt)
> {
> - struct art_root *ar;
> + struct rtable *r;
> struct art_node *an;
> - struct srp_ref sr;
> const uint8_t *addr;
> int plen;
> struct rtentry *mrt;
> - int npaths = 0;
> - int error = 0;
>
> - ar = rtable_get(rtableid, dst->sa_family);
> - if (ar == NULL)
> + r = rtable_get(rtableid, dst->sa_family);
> + if (r == NULL)
> return (EAFNOSUPPORT);
>
> - addr = satoaddr(ar, dst);
> + addr = satoaddr(r, dst);
> plen = rtable_satoplen(dst->sa_family, mask);
> if (plen == -1)
> return (EINVAL);
>
> - rtref(rt); /* guarantee rtfree won't do anything under ar_lock */
> - rw_enter_write(&ar->ar_lock);
> - an = art_lookup(ar, addr, plen, &sr);
> - srp_leave(&sr); /* an can't go away while we have the lock */
> -
> - /* Make sure we've got a perfect match. */
> - if (!an_match(an, dst, plen)) {
> - error = ESRCH;
> - goto leave;
> + rw_enter_write(&r->r_lock);
> + smr_read_enter();
> + an = art_lookup(r->r_art, addr, plen);
> + smr_read_leave();
> + if (an == NULL) {
> + rw_exit_write(&r->r_lock);
> + return (ESRCH);
> }
>
> - /*
> - * If other multipath route entries are still attached to
> - * this ART node we only have to unlink it.
> - */
> - SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next)
> - npaths++;
> -
> - if (npaths > 1) {
> - KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
> - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
> - rt_next);
> -
> - mrt = SRPL_FIRST_LOCKED(&an->an_rtlist);
> - if (npaths == 2)
> - mrt->rt_flags &= ~RTF_MPATH;
> + /* If this is the only route in the list then we can delete the node */
> + if (SMR_PTR_GET_LOCKED(&an->an_value) == rt &&
> + SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) {
> + struct art_node *oan;
> + oan = art_delete(r->r_art, addr, plen);
> + if (oan != an)
> + panic("art %p changed shape during delete", r->r_art);
> + art_put(an);
> + /*
> + * XXX an and the rt ref could still be alive on other cpus.
> + * this currently works because of the NET_LOCK/KERNEL_LOCK
> + * but should be fixed if we want to do route lookups outside
> + * these locks. - dlg@
> + */
> + } else {
> + struct rtentry **prt = (struct rtentry **)&an->an_value;
> + struct rtentry *nrt;
> + unsigned int found = 0;
> + unsigned int npaths = 0;
>
> - goto leave;
> + /*
> + * If other multipath route entries are still attached to
> + * this ART node we only have to unlink it.
> + */
> + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
> + if (mrt == rt) {
> + found = 1;
> + SMR_PTR_SET_LOCKED(prt,
> + SMR_PTR_GET_LOCKED(&mrt->rt_next));
> + } else if ((mrt->rt_priority & RTP_MASK) ==
> + (rt->rt_priority & RTP_MASK)) {
> + npaths++;
> + nrt = mrt;
> + }
> + }
> + if (!found)
> + panic("removing non-existent route");
> + if (npaths == 1)
> + CLR(nrt->rt_flags, RTF_MPATH);
> }
> -
> - if (art_delete(ar, an, addr, plen) == NULL)
> - panic("art_delete failed to find node %p", an);
> -
> KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
> - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
> - art_put(an);
> -
> -leave:
> - rw_exit_write(&ar->ar_lock);
> + rw_exit_write(&r->r_lock);
> rtfree(rt);
>
> - return (error);
> -}
> -
> -struct rtable_walk_cookie {
> - int (*rwc_func)(struct rtentry *, void *, unsigned int);
> - void *rwc_arg;
> - struct rtentry **rwc_prt;
> - unsigned int rwc_rid;
> -};
> -
> -/*
> - * Helper for rtable_walk to keep the ART code free from any "struct rtentry".
> - */
> -int
> -rtable_walk_helper(struct art_node *an, void *xrwc)
> -{
> - struct srp_ref sr;
> - struct rtable_walk_cookie *rwc = xrwc;
> - struct rtentry *rt;
> - int error = 0;
> -
> - SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) {
> - error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid);
> - if (error != 0)
> - break;
> - }
> - if (rwc->rwc_prt != NULL && rt != NULL) {
> - rtref(rt);
> - *rwc->rwc_prt = rt;
> - }
> - SRPL_LEAVE(&sr);
> -
> - return (error);
> + return (0);
> }
>
> int
> rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt,
> int (*func)(struct rtentry *, void *, unsigned int), void *arg)
> {
> - struct art_root *ar;
> - struct rtable_walk_cookie rwc;
> + struct rtable *r;
> + struct art_iter ai;
> + struct art_node *an;
> int error;
>
> - ar = rtable_get(rtableid, af);
> - if (ar == NULL)
> + r = rtable_get(rtableid, af);
> + if (r == NULL)
> return (EAFNOSUPPORT);
>
> - rwc.rwc_func = func;
> - rwc.rwc_arg = arg;
> - rwc.rwc_prt = prt;
> - rwc.rwc_rid = rtableid;
> + rw_enter_write(&r->r_lock);
> + ART_FOREACH(an, r->r_art, &ai) {
> + /*
> + * ART nodes have a list of rtentries.
> + *
> + * art_iter holds references to the topology
> + * so it won't change, but not the an_node or rtentries.
> + */
> + struct rtentry *rt = SMR_PTR_GET_LOCKED(&an->an_value);
> + rtref(rt);
> +
> + rw_exit_write(&r->r_lock);
> + do {
> + struct rtentry *nrt;
> +
> + smr_read_enter();
> + /* Get ready for the next entry. */
> + nrt = SMR_PTR_GET(&rt->rt_next);
> + if (nrt != NULL)
> + rtref(nrt);
> + smr_read_leave();
> +
> + error = func(rt, arg, rtableid);
> + if (error != 0) {
> + if (prt != NULL)
> + *prt = rt;
> + else
> + rtfree(rt);
> +
> + if (nrt != NULL)
> + rtfree(nrt);
> +
> + rw_enter_write(&r->r_lock);
> + art_iter_close(&ai);
> + rw_exit_write(&r->r_lock);
> + return (error);
> + }
>
> - error = art_walk(ar, rtable_walk_helper, &rwc);
> + rtfree(rt);
> + rt = nrt;
> + } while (rt != NULL);
> + rw_enter_write(&r->r_lock);
> + }
> + rw_exit_write(&r->r_lock);
>
> return (error);
> }
> @@ -774,12 +793,12 @@ struct rtentry *
> rtable_iterate(struct rtentry *rt0)
> {
> struct rtentry *rt = NULL;
> - struct srp_ref sr;
>
> - rt = SRPL_NEXT(&sr, rt0, rt_next);
> + smr_read_enter();
> + rt = SMR_PTR_GET(&rt0->rt_next);
> if (rt != NULL)
> rtref(rt);
> - SRPL_LEAVE(&sr);
> + smr_read_leave();
> rtfree(rt0);
> return (rt);
> }
> @@ -794,27 +813,28 @@ int
> rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst,
> int plen, uint8_t prio, struct rtentry *rt)
> {
> - struct art_root *ar;
> + struct rtable *r;
> struct art_node *an;
> - struct srp_ref sr;
> const uint8_t *addr;
> int error = 0;
>
> - ar = rtable_get(rtableid, dst->sa_family);
> - if (ar == NULL)
> + r = rtable_get(rtableid, dst->sa_family);
> + if (r == NULL)
> return (EAFNOSUPPORT);
>
> - addr = satoaddr(ar, dst);
> -
> - rw_enter_write(&ar->ar_lock);
> - an = art_lookup(ar, addr, plen, &sr);
> - srp_leave(&sr); /* an can't go away while we have the lock */
> + addr = satoaddr(r, dst);
>
> - /* Make sure we've got a perfect match. */
> - if (!an_match(an, dst, plen)) {
> + rw_enter_write(&r->r_lock);
> + smr_read_enter();
> + an = art_lookup(r->r_art, addr, plen);
> + smr_read_leave();
> + if (an == NULL) {
> error = ESRCH;
> - } else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt &&
> - SRPL_NEXT_LOCKED(rt, rt_next) == NULL) {
> + goto leave;
> + }
> +
> + if (SMR_PTR_GET_LOCKED(&an->an_value) == rt &&
> + SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) {
> /*
> * If there's only one entry on the list do not go
> * through an insert/remove cycle. This is done to
> @@ -823,15 +843,22 @@ rtable_mpath_reprio(unsigned int rtablei
> */
> rt->rt_priority = prio;
> } else {
> - rtref(rt); /* keep rt alive in between remove and insert */
> - SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist,
> - rt, rtentry, rt_next);
> + struct rtentry **prt = (struct rtentry **)&an->an_value;
> + struct rtentry *mrt;
> +
> + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
> + if (mrt == rt)
> + break;
> + }
> + KASSERT(mrt != NULL);
> +
> + SMR_PTR_SET_LOCKED(prt, SMR_PTR_GET_LOCKED(&rt->rt_next));
> rt->rt_priority = prio;
> rtable_mpath_insert(an, rt);
> - rtfree(rt);
> error = EAGAIN;
> }
> - rw_exit_write(&ar->ar_lock);
> +leave:
> + rw_exit_write(&r->r_lock);
>
> return (error);
> }
> @@ -839,63 +866,19 @@ rtable_mpath_reprio(unsigned int rtablei
> void
> rtable_mpath_insert(struct art_node *an, struct rtentry *rt)
> {
> - struct rtentry *mrt, *prt = NULL;
> + struct rtentry *mrt, **prt;
> uint8_t prio = rt->rt_priority;
>
> - if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) {
> - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
> - return;
> - }
> + prt = (struct rtentry **)&an->an_value;
>
> /* Iterate until we find the route to be placed after ``rt''. */
> - while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) {
> - prt = mrt;
> - mrt = SRPL_NEXT_LOCKED(mrt, rt_next);
> - }
> -
> - if (mrt->rt_priority <= prio) {
> - SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next);
> - } else if (prt != NULL) {
> - SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next);
> - } else {
> - SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
> + while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
> + if (mrt->rt_priority >= prio)
> + break;
> }
> -}
>
> -/*
> - * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise.
> - */
> -int
> -an_match(struct art_node *an, const struct sockaddr *dst, int plen)
> -{
> - struct rtentry *rt;
> - struct srp_ref sr;
> - int match;
> -
> - if (an == NULL || an->an_plen != plen)
> - return (0);
> -
> - rt = SRPL_FIRST(&sr, &an->an_rtlist);
> - match = (rt != NULL && memcmp(rt->rt_dest, dst, dst->sa_len) == 0);
> - SRPL_LEAVE(&sr);
> -
> - return (match);
> -}
> -
> -void
> -rtentry_ref(void *null, void *xrt)
> -{
> - struct rtentry *rt = xrt;
> -
> - rtref(rt);
> -}
> -
> -void
> -rtentry_unref(void *null, void *xrt)
> -{
> - struct rtentry *rt = xrt;
> -
> - rtfree(rt);
> + SMR_PTR_SET_LOCKED(&rt->rt_next, mrt);
> + SMR_PTR_SET_LOCKED(prt, rt);
> }
>
> /*
> @@ -904,9 +887,9 @@ rtentry_unref(void *null, void *xrt)
> * of "struct sockaddr" used by this routing table.
> */
> static inline const uint8_t *
> -satoaddr(struct art_root *at, const struct sockaddr *sa)
> +satoaddr(struct rtable *r, const struct sockaddr *sa)
> {
> - return (((const uint8_t *)sa) + at->ar_off);
> + return (((const uint8_t *)sa) + r->r_off);
> }
>
> /*
> Index: rtable.h
> ===================================================================
> RCS file: /cvs/src/sys/net/rtable.h,v
> diff -u -p -r1.30 rtable.h
> --- rtable.h 13 May 2024 01:15:53 -0000 1.30
> +++ rtable.h 13 Jun 2025 01:59:37 -0000
> @@ -28,6 +28,8 @@
> #define rt_plen(rt) ((rt)->rt_plen)
> #define RT_ROOT(rt) (0)
>
> +struct rtable;
> +
> int rtable_satoplen(sa_family_t, const struct sockaddr *);
>
> void rtable_init(void);
use the SMR api instead of SRPs in art/rtable code