Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: use the SMR api instead of SRPs in art/rtable code
To:
David Gwynne <david@gwynne.id.au>
Cc:
tech@openbsd.org
Date:
Thu, 26 Jun 2025 22:33:50 +0200

Download raw body.

Thread
On Fri, Jun 13, 2025 at 04:44:56PM +1000, David Gwynne wrote:
> On Fri, Jun 13, 2025 at 12:03:37PM +1000, David Gwynne wrote:
> > there's other changes too, but moving to SMR is the important one.
> > 
> > SRPs complicated this code massively, so moving it to SMR is almost like
> > backing the SRP changes out. it becomes a lot more readable and similar
> > to other ART implementations, which can't hurt.
> > 
> > another change is strip non-ART code out of art.c, specifically the
> > rwlock and route sourceaddr info. these features have been moved
> > up a layer into rtable.c and are now stored as part of a new struct
> > rtable. again, this makes the art code easier to read and compare.
> > 
> > the other big change is to walking over an ART. there's now a
> > reentract and non-recursive iterator api which art_walk and rtable_walk
> > are now built on top of. rtable_walk can basically do a foreach
> > over the ART directly rather than having to pass a bunch of state
> > into callbacks to apply with art_walk. we can still do that if we
> > want, i just find this easier to deal with in rtable_walk directly.
> > 
> > i modified how the heaps are traversed inside ART as well to try and
> > minimise the pointer accesses as much as possible.
> > 
> > i'm sorry the diff is so big. i don't expect code reviews. it took
> > me 4 years and at least 3 attempts to do this diff. however, tests
> > would be much appreciated.
> > 
> > it looks like the generated code is smaller too.
> 
> apologies, i sent a slightly old version that missed some fixes around
> reprio and mpath insertion. i didn't use scp enough before sending it
> out.

This time it crashes in regress/sbin/ifconfig

==== run-ether-inet6-contiguous-netmask ====
/sbin/ifconfig vether99 destroy 2>/dev/null || true
/sbin/ifconfig vether99 create
/sbin/ifconfig vether99 inet6 fdd7:e83e:66bc:254::74 netmask ffff:ffff:ffff:ffff:ffff::
/sbin/ifconfig vether99 inet6 fdd7:e83e:66bc:254::74 delete
!   /sbin/ifconfig vether99 inet6 fdd7:e83e:66bc:254::74 netmask ffff:ffff:ffff:ffff:ffff:4000::
ifconfig: SIOCAIFADDR: Invalid argument
Timeout, server ot29 not responding.

panic: kernel diagnostic assertion "curcpu()->ci_schedstate.spc_smrdepth == 0" failed: file "/usr/src/sys/kern/subr_xxx.c", line 157
Stopped at      db_enter+0x14:  popq    %rbp
    TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
  57177  76970      0     0x14000      0x200    4  sensors
db_enter() at db_enter+0x14
panic(ffffffff825ee372) at panic+0xd5
__assert(ffffffff8262ac13,ffffffff8256b2e4,9d,ffffffff825e06cd) at __assert+0x29
assertwaitok() at assertwaitok+0xdc
mi_switch() at mi_switch+0x218
sched_idle(ffff8000491d2ff0) at sched_idle+0x13a
end trace frame: 0x0, count: 9
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.

ddb{1}> trace
db_enter() at db_enter+0x14
panic(ffffffff825ee372) at panic+0xd5
__assert(ffffffff8262ac13,ffffffff8256b2e4,9d,ffffffff825e06cd) at __assert+0x29
assertwaitok() at assertwaitok+0xdc
mi_switch() at mi_switch+0x218
sched_idle(ffff8000491d2ff0) at sched_idle+0x13a
end trace frame: 0x0, count: -6

ddb{1}> show register
rdi                                0
rsi                             0x10
rbp               0xffff800049772d50
rbx                                0
rdx               0xfe00000000000000
rcx                            0x282
rax                             0x85
r8                 0x101010101010101
r9                0x8080808080808080
r10               0xf324a6b3a496092d
r11               0x22dfcd0f06da1750
r12                                0
r13               0xffff8000491d32a0
r14                                0
r15               0xffff8000491d3bd8
rip               0xffffffff8113a474    db_enter+0x14
cs                               0x8
rflags                         0x206
rsp               0xffff800049772d50
ss                              0x10
db_enter+0x14:  popq    %rbp

ddb{1}> ps
   PID     TID   PPID    UID  S       FLAGS  WAIT          COMMAND
 65031  167185  95186      0  3    0x10008a  sigsusp       sh
 95186  413483  21722      0  3    0x10008a  sigsusp       make
 21722  181825  51668      0  3    0x10008a  sigsusp       sh
 51668  167105  35628      0  3    0x10008a  sigsusp       make
 79005  301972  51355      0  3    0x100082  piperd        gzip
 51355  408100  35628      0  3    0x100082  piperd        pax
 35628  214919   1303      0  3        0x82  piperd        perl
  1303  309262  77038      0  3    0x10008a  sigsusp       ksh
 77038  162015  15622      0  3        0x98  kqread        sshd-session
 15622  389973  71872      0  3        0x92  kqread        sshd-session
 29455  107815      1      0  3    0x100083  ttyin         getty
 46146   24277      1      0  3    0x100083  ttyin         getty
 38343  389764      1      0  3    0x100083  ttyin         getty
 70066   86976      1      0  3    0x100083  ttyin         getty
 55197  306882      1      0  3    0x100083  ttyin         getty
 31968   35364      1      0  3    0x100083  ttyin         getty
 52630  262572      1      0  3    0x100098  kqread        cron
 10440  346796      1     99  3   0x1100090  kqread        sndiod
 30983  382603      1    110  3    0x100090  kqread        sndiod
 75328  424278   7765     95  3   0x1100092  kqread        smtpd
 81455   23880   7765    103  3   0x1100092  kqread        smtpd
 60857  393451   7765     95  3   0x1100092  kqread        smtpd
 45873  224221   7765     95  3    0x100092  kqread        smtpd
 52702   99794   7765     95  3   0x1100092  kqread        smtpd
 58994  265172   7765     95  3   0x1100092  kqread        smtpd
  7765  434673      1      0  3    0x100080  kqread        smtpd
 48310  318854  73410     91  3        0x92  kqread        snmpd_metrics
 53609  308665  73410     91  3   0x1100092  kqread        snmpd
 73410  483655      1      0  3    0x100080  kqread        snmpd
 71872  506089      1      0  3        0x88  kqread        sshd
 64050   11918      0      0  3     0x14200  acct          acct
 95501  445237      0      0  3     0x14280  nfsidl        nfsio
 88388   89678      0      0  3     0x14280  nfsidl        nfsio
 98128  330474      0      0  3     0x14280  nfsidl        nfsio
 35300  299285      0      0  3     0x14280  nfsidl        nfsio
 43217  499466      1      0  3    0x100080  kqread        ntpd
 35901  227030  29522     83  3    0x100092  kqread        ntpd
 29522   54779      1     83  3   0x1100092  kqread        ntpd
 98047   59907  98236     74  3   0x1100092  bpf           pflogd
 98236  483990      1      0  3        0x80  sbwait        pflogd
 34550   98595  74398     73  3   0x1100090  kqread        syslogd
 74398  170351      1      0  3    0x100082  sbwait        syslogd
  9287  276295      1      0  3    0x100080  kqread        resolvd
 42289   49490  84025     77  3    0x100092  kqread        dhcpleased
 89025  311440  84025     77  3    0x100092  kqread        dhcpleased
 84025  198976      1      0  3        0x80  kqread        dhcpleased
  7560   51967  28967    115  3    0x100092  kqread        slaacd
 47554  502560  28967    115  3    0x100092  kqread        slaacd
 28967  140135      1      0  3    0x100080  kqread        slaacd
 80546  329744      0      0  3     0x14200  bored         smr
 47993   87991      0      0  3     0x14200  pgzero        zerothread
 94383   90344      0      0  3     0x14200  aiodoned      aiodoned
  9809  358278      0      0  3     0x14200  syncer        update
 59089   11376      0      0  3     0x14200  cleaner       cleaner
  5959   66152      0      0  3     0x14200  reaper        reaper
 31905  499218      0      0  3     0x14200  pgdaemon      pagedaemon
 34584     291      0      0  3     0x14200  bored         wsdisplay0
 73685  407600      0      0  3     0x14200  mmctsk        sdmmc0
 13891  413451      0      0  3     0x14200  usbtsk        usbtask
 30279  444504      0      0  3     0x14200  usbatsk       usbatsk
 68270  152571      0      0  3  0x40014200  acpi0         acpi0
 10764  101012      0      0  7  0x40014200                idle11
 57461  101227      0      0  7  0x40014200                idle10
 55800  147769      0      0  7  0x40014200                idle9
 93447  192884      0      0  7  0x40014200                idle8
  7223  454164      0      0  7  0x40014200                idle7
 72300  126457      0      0  7  0x40014200                idle6
 74256  216536      0      0  7  0x40014200                idle5
 32561  107546      0      0  3  0x40014200                idle4
 64675  408456      0      0  7  0x40014200                idle3
 52624  274420      0      0  7  0x40014200                idle2
*47718   72204      0      0  7  0x40014200                idle1
 76970   57177      0      0  7     0x14200                sensors
 78440  368691      0      0  3     0x14200  bored         softnet3
 79641   80710      0      0  3     0x14200  bored         softnet2
 71296  297826      0      0  3     0x14200  bored         softnet1
 36143    4573      0      0  3     0x14200  bored         softnet0
 23881  430264      0      0  3     0x14200  smrbar        systqmp
 27179  278673      0      0  3     0x14200  bored         systq
 83106  261401      0      0  3     0x14200  tmoslp        softclockmp
 76184  188269      0      0  3  0x40014200  tmoslp        softclock
 87575  477579      0      0  7  0x40014200                idle0
     1  301649      0      0  3        0x82  wait          init
     0       0     -1      0  3  0x10010200  scheduler     swapper

ddb{1}> show all locks
Process 23881 (systqmp) thread 0xffff8000ffffe7b0 (430264)
shared rwlock systqmp r = 0 (0xffffffff82a2e5b0)

ddb{1}> show all routes
Route tree for af 2, rtableid 0
rtentry=0xfffffd8777f17bb8 flags=0x803 refcnt=2 use=71 expire=0
 key=[16,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=0 gw=[16,2,0,0,10,0,1,1,0,0,0,0,0,0,0,0]
 ifidx=1  ifa=0xffff8000012e2c00
  ifa_addr=[16,2,0,0,10,0,1,49,0,0,0,0,0,0,0,0]
  ifa_dsta=[16,2,0,0,10,0,1,255,0,0,0,0,0,0,0,0]
  ifa_mask=[7,0,0,0,255,255,255]
  flags=0x0, refcnt=7, metric=0
 gwroute=0xfffffd8777f17dc8 llinfo=0x0 priority=8
rtentry=0xfffffd8777f17a58 flags=0x809 refcnt=2 use=1 expire=0
 key=[16,2,0,0,224,0,0,0,0,0,0,0,0,0,0,0]
 plen=4 gw=[16,2,0,0,127,0,0,1,0,0,0,0,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2500
  ifa_addr=[16,2,0,0,127,0,0,1,0,0,0,0,0,0,0,0]
  ifa_dsta=[]
  ifa_mask=[5,0,0,0,255]
  flags=0x0, refcnt=4, metric=0
 gwroute=0x0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b79a0 flags=0x800101 refcnt=4 use=1581 expire=0
 key=[16,2,0,0,10,0,1,0,0,0,0,0,0,0,0,0]
 plen=24 gw=[16,2,0,0,10,0,1,49,0,0,0,0,0,0,0,0]
 ifidx=1  ifa=0xffff8000012e2c00
  ifa_addr=[16,2,0,0,10,0,1,49,0,0,0,0,0,0,0,0]
  ifa_dsta=[16,2,0,0,10,0,1,255,0,0,0,0,0,0,0,0]
  ifa_mask=[7,0,0,0,255,255,255]
  flags=0x0, refcnt=7, metric=0
 gwroute=0x0 llinfo=0x0 priority=4
rtentry=0xfffffd8777f17dc8 flags=0x30405 refcnt=5 use=834 expire=6746
 key=[16,2,0,0,10,0,1,1,0,0,0,0,0,0,0,0]
 plen=32 gw=[32,18,1,0,6,0,6,0,144,226,186,157,240,63,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 ifidx=1  ifa=0xffff8000012e2c00
  ifa_addr=[16,2,0,0,10,0,1,49,0,0,0,0,0,0,0,0]
  ifa_dsta=[16,2,0,0,10,0,1,255,0,0,0,0,0,0,0,0]
  ifa_mask=[7,0,0,0,255,255,255]
  flags=0x0, refcnt=7, metric=0
 gwroute=0x0 llinfo=0xfffffd8778198e00 priority=3
rtentry=0xfffffd8777f17798 flags=0x10405 refcnt=7 use=254 expire=6707
 key=[16,2,0,0,10,0,1,2,0,0,0,0,0,0,0,0]
 plen=32 gw=[32,18,1,0,6,0,6,0,0,3,186,12,11,82,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 ifidx=1  ifa=0xffff8000012e2c00
  ifa_addr=[16,2,0,0,10,0,1,49,0,0,0,0,0,0,0,0]
  ifa_dsta=[16,2,0,0,10,0,1,255,0,0,0,0,0,0,0,0]
  ifa_mask=[7,0,0,0,255,255,255]
  flags=0x0, refcnt=7, metric=0
 gwroute=0x0 llinfo=0xfffffd8778198d80 priority=3
rtentry=0xfffffd87784b78f0 flags=0x200405 refcnt=3 use=1834 expire=0
 key=[16,2,0,0,10,0,1,49,0,0,0,0,0,0,0,0]
 plen=32 gw=[32,18,1,0,6,3,6,0,101,109,48,208,80,153,249,215,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 ifidx=1  ifa=0xffff8000012e2c00
  ifa_addr=[16,2,0,0,10,0,1,49,0,0,0,0,0,0,0,0]
  ifa_dsta=[16,2,0,0,10,0,1,255,0,0,0,0,0,0,0,0]
  ifa_mask=[7,0,0,0,255,255,255]
  flags=0x0, refcnt=7, metric=0
 gwroute=0x0 llinfo=0xfffffd8778198f00 priority=1
rtentry=0xfffffd87784b7b00 flags=0x400005 refcnt=2 use=0 expire=0
 key=[16,2,0,0,10,0,1,255,0,0,0,0,0,0,0,0]
 plen=32 gw=[16,2,0,0,10,0,1,49,0,0,0,0,0,0,0,0]
 ifidx=1  ifa=0xffff8000012e2c00
  ifa_addr=[16,2,0,0,10,0,1,49,0,0,0,0,0,0,0,0]
  ifa_dsta=[16,2,0,0,10,0,1,255,0,0,0,0,0,0,0,0]
  ifa_mask=[7,0,0,0,255,255,255]
  flags=0x0, refcnt=7, metric=0
 gwroute=0x0 llinfo=0x0 priority=1
rtentry=0xfffffd8777f17008 flags=0x200005 refcnt=2 use=0 expire=0
 key=[16,2,0,0,10,188,253,1,0,0,0,0,0,0,0,0]
 plen=32 gw=[16,2,0,0,10,188,253,1,0,0,0,0,0,0,0,0]
 ifidx=46  ifa=0xffff8000018b4400
  ifa_addr=[16,2,0,0,10,188,253,1,0,0,0,0,0,0,0,0]
  ifa_dsta=[]
  ifa_mask=[5,0,0,0,255]
  flags=0x0, refcnt=2, metric=0
 gwroute=0x0 llinfo=0x0 priority=1
rtentry=0xfffffd8777f178f8 flags=0x80b refcnt=2 use=0 expire=0
 key=[16,2,0,0,127,0,0,0,0,0,0,0,0,0,0,0]
 plen=8 gw=[16,2,0,0,127,0,0,1,0,0,0,0,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2500
  ifa_addr=[16,2,0,0,127,0,0,1,0,0,0,0,0,0,0,0]
  ifa_dsta=[]
  ifa_mask=[5,0,0,0,255]
  flags=0x0, refcnt=4, metric=0
 gwroute=0xfffffd87784b7160 llinfo=0x0 priority=8
rtentry=0xfffffd87784b7160 flags=0x220005 refcnt=6 use=15806 expire=0
 key=[16,2,0,0,127,0,0,1,0,0,0,0,0,0,0,0]
 plen=32 gw=[16,2,0,0,127,0,0,1,0,0,0,0,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2500
  ifa_addr=[16,2,0,0,127,0,0,1,0,0,0,0,0,0,0,0]
  ifa_dsta=[]
  ifa_mask=[5,0,0,0,255]
  flags=0x0, refcnt=4, metric=0
 gwroute=0x0 llinfo=0x0 priority=1
Route tree for af 24, rtableid 0
rtentry=0xfffffd87784b7840 flags=0x80b refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=96 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0xfffffd87784b72c0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b72c0 flags=0x220005 refcnt=12 use=50 expire=0
 key=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 plen=128 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0x0 llinfo=0x0 priority=1
rtentry=0xfffffd87784b7210 flags=0x80b refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0]
 plen=96 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0xfffffd87784b72c0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b74d0 flags=0x80b refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,32,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=24 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0xfffffd87784b72c0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b7420 flags=0x80b refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,32,2,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=24 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0xfffffd87784b72c0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b7370 flags=0x80b refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,32,2,224,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=20 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0xfffffd87784b72c0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b7580 flags=0x80b refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,32,2,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=24 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0xfffffd87784b72c0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b70b0 flags=0x80b refcnt=2 use=5 expire=0
 key=[28,24,0,0,0,0,0,0,254,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=10 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0xfffffd87784b72c0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b7000 flags=0x80b refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,254,192,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=10 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0xfffffd87784b72c0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b7e70 flags=0x800100 refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,254,128,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=64 gw=[28,24,0,0,0,0,0,0,254,128,0,2,0,0,0,0,210,80,153,255,254,249,215,12,0,0,0,0]
 ifidx=2  ifa=0xffff80000056eb00
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,2,0,0,0,0,210,80,153,255,254,249,215,12,0,0,0,0]
  ifa_dsta=[NULL]  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=5, metric=0
 gwroute=0x0 llinfo=0x0 priority=132
rtentry=0xfffffd87784b7c60 flags=0x200405 refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,254,128,0,2,0,0,0,0,210,80,153,255,254,249,215,12,0,0,0,0]
 plen=128 gw=[32,18,2,0,6,3,6,0,101,109,49,208,80,153,249,215,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 ifidx=2  ifa=0xffff80000056eb00
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,2,0,0,0,0,210,80,153,255,254,249,215,12,0,0,0,0]
  ifa_dsta=[NULL]  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=5, metric=0
 gwroute=0x0 llinfo=0xfffffd8777f6cea0 priority=1
rtentry=0xfffffd87784b7bb0 flags=0x200005 refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,254,128,0,6,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 plen=128 gw=[28,24,0,0,0,0,0,0,254,128,0,6,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2100
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,6,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=4, metric=0
 gwroute=0x0 llinfo=0x0 priority=1
rtentry=0xfffffd8777f17168 flags=0x800101 refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,254,128,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=64 gw=[28,24,0,0,0,0,0,0,254,128,0,52,0,0,0,0,252,225,186,255,254,210,86,226,0,0,0,0]
 ifidx=52  ifa=0xffff8000018c2c00
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,52,0,0,0,0,252,225,186,255,254,210,86,226,0,0,0,0]
  ifa_dsta=[NULL]  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=5, metric=0
 gwroute=0x0 llinfo=0x0 priority=4
rtentry=0xfffffd8777f170b8 flags=0x200405 refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,254,128,0,52,0,0,0,0,252,225,186,255,254,210,86,226,0,0,0,0]
 plen=128 gw=[32,18,52,0,6,8,6,0,118,101,116,104,101,114,57,57,254,225,186,210,86,226,0,0,0,0,0,0,0,0,0,0]
 ifidx=52  ifa=0xffff8000018c2c00
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,52,0,0,0,0,252,225,186,255,254,210,86,226,0,0,0,0]
  ifa_dsta=[NULL]  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=5, metric=0
 gwroute=0x0 llinfo=0xfffffd8777f6ce10 priority=1
rtentry=0xfffffd87784b7630 flags=0x80b refcnt=2 use=48 expire=0
 key=[28,24,0,0,0,0,0,0,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=16 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0xfffffd87784b72c0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b7dc0 flags=0x200 refcnt=2 use=0 expire=0
 key=[28,24,0,0,0,0,0,0,255,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=32 gw=[28,24,0,0,0,0,0,0,254,128,0,2,0,0,0,0,210,80,153,255,254,249,215,12,0,0,0,0]
 ifidx=2  ifa=0xffff80000056eb00
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,2,0,0,0,0,210,80,153,255,254,249,215,12,0,0,0,0]
  ifa_dsta=[NULL]  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=5, metric=0
 gwroute=0x0 llinfo=0x0 priority=132
rtentry=0xfffffd87784b7790 flags=0x201 refcnt=2 use=1 expire=0
 key=[28,24,0,0,0,0,0,0,255,1,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=32 gw=[28,24,0,0,0,0,0,0,254,128,0,6,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2100
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,6,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=4, metric=0
 gwroute=0x0 llinfo=0x0 priority=4
rtentry=0xfffffd8777f17638 flags=0x201 refcnt=2 use=2 expire=0
 key=[28,24,0,0,0,0,0,0,255,1,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=32 gw=[28,24,0,0,0,0,0,0,254,128,0,52,0,0,0,0,252,225,186,255,254,210,86,226,0,0,0,0]
 ifidx=52  ifa=0xffff8000018c2c00
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,52,0,0,0,0,252,225,186,255,254,210,86,226,0,0,0,0]
  ifa_dsta=[NULL]  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=5, metric=0
 gwroute=0x0 llinfo=0x0 priority=4
rtentry=0xfffffd87784b76e0 flags=0x80b refcnt=2 use=48 expire=0
 key=[28,24,0,0,0,0,0,0,255,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=16 gw=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2700
  ifa_addr=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[28,24,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0]
  flags=0x0, refcnt=12, metric=0
 gwroute=0xfffffd87784b72c0 llinfo=0x0 priority=8
rtentry=0xfffffd87784b7d10 flags=0x200 refcnt=2 use=1 expire=0
 key=[28,24,0,0,0,0,0,0,255,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=32 gw=[28,24,0,0,0,0,0,0,254,128,0,2,0,0,0,0,210,80,153,255,254,249,215,12,0,0,0,0]
 ifidx=2  ifa=0xffff80000056eb00
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,2,0,0,0,0,210,80,153,255,254,249,215,12,0,0,0,0]
  ifa_dsta=[NULL]  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=5, metric=0
 gwroute=0x0 llinfo=0x0 priority=132
rtentry=0xfffffd87784b7a50 flags=0x201 refcnt=2 use=1 expire=0
 key=[28,24,0,0,0,0,0,0,255,2,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=32 gw=[28,24,0,0,0,0,0,0,254,128,0,6,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
 ifidx=6  ifa=0xffff8000012e2100
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,6,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0]
  ifa_dsta=[]
  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=4, metric=0
 gwroute=0x0 llinfo=0x0 priority=4
rtentry=0xfffffd8777f172c8 flags=0x201 refcnt=2 use=2 expire=0
 key=[28,24,0,0,0,0,0,0,255,2,0,52,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
 plen=32 gw=[28,24,0,0,0,0,0,0,254,128,0,52,0,0,0,0,252,225,186,255,254,210,86,226,0,0,0,0]
 ifidx=52  ifa=0xffff8000018c2c00
  ifa_addr=[28,24,0,0,0,0,0,0,254,128,0,52,0,0,0,0,252,225,186,255,254,210,86,226,0,0,0,0]
  ifa_dsta=[NULL]  ifa_mask=[28,24,0,0,0,0,0,0,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0,0,0]
  flags=0x0, refcnt=5, metric=0
 gwroute=0x0 llinfo=0x0 priority=4

Can you reporduce it?  Anything else I should collect from ddb?

bluhm

> Index: art.c
> ===================================================================
> RCS file: /cvs/src/sys/net/art.c,v
> diff -u -p -r1.31 art.c
> --- art.c	11 Nov 2023 12:17:50 -0000	1.31
> +++ art.c	13 Jun 2025 06:40:59 -0000
> @@ -22,6 +22,58 @@
>   *
>   * Yoichi Hariguchi paper can be found at:
>   *	http://www.hariguchi.org/art/art.pdf
> + *
> + * This implementation is tweaked to minimise pointer traversal
> + * during lookups by only accessing the heaps it avoids following
> + * pointers to or through the art_table structures.
> + *
> + * The heap is an array of art_heap_entry values which have different
> + * meanings depending on their location. The majority of entries in
> + * the heap are used as pointers to nodes (struct an_node, aka leaf
> + * entries), or the next heap to traverse. These pointers are typed/
> + * tagged by the low bit in their heap value, and must be masked
> + * appropriately before use.
> + *
> + * The first two entries in the heap are not used by the search
> + * algorithm, so they are used to store data associated with this
> + * part of the heap. The first entry (heap[0]) stores a pointer to
> + * an art_table struct associated with this heap. The second entry
> + * (heap[1]) stores the node pointer which would be in the parent
> + * heap if this table wasn't using it. This node pointer is also known
> + * as the default entry/route for the current table in the ART.
> + *
> + * Nodes contain the exact information about the prefix they represent
> + * in the ART, ie, the address and prefix length, and a pointer to
> + * data associated with that prefix (an_value).
> + *
> + * The relationships between the separate data structures looks
> + * like the following:
> + *
> + * +------------------+
> + * |    struct art    |
> + * +------------------+              NULL
> + *   |                                ^
> + *   | art_root                       | at_parent
> + *   v                                |
> + * +------------------+   heap[0]   +------------------+
> + * |       heap       |------------>| struct art_table |
> + * +------------------+             +------------------+
> + *   |                                ^
> + *   | heap entry                     | at_parent
> + *   v                                |
> + * +------------------+   heap[0]   +------------------+
> + * |       heap       |------------>| struct art_table |
> + * +------------------+             +------------------+
> + *   |
> + *   | node entry
> + *   v
> + * +------------------+
> + * | struct art_node  |
> + * +------------------+
> + *   |
> + *   | an_value
> + *   v
> + * "user" data
>   */
>  
>  #ifndef _KERNEL
> @@ -33,43 +85,96 @@
>  #include <sys/pool.h>
>  #include <sys/task.h>
>  #include <sys/socket.h>
> +#include <sys/smr.h>
>  #endif
>  
>  #include <net/art.h>
>  
> -int			 art_bindex(struct art_table *, const uint8_t *, int);
> -void			 art_allot(struct art_table *at, int, struct art_node *,
> -			     struct art_node *);
> -struct art_table	*art_table_get(struct art_root *, struct art_table *,
> -			     int);
> -struct art_table	*art_table_put(struct art_root *, struct art_table *);
> -struct art_node		*art_table_insert(struct art_root *, struct art_table *,
> +/*
> + * Allotment Table.
> + */
> +struct art_table {
> +	art_heap_entry		*at_heap;
> +	struct art_table	*at_parent;	/* Parent table */
> +
> +	unsigned int		 at_index;	/* Index in the parent table */
> +	unsigned int		 at_minfringe;	/* Index that fringe begins */
> +
> +	unsigned int		 at_level;	/* Level of the table */
> +	unsigned int		 at_bits;	/* Stride length of the table */
> +	unsigned int		 at_offset;	/* Sum of parents' stride len */
> +
> +	unsigned int		 at_refcnt;
> +};
> +
> +static void		 art_allot(struct art_table *at, unsigned int,
> +			     art_heap_entry, art_heap_entry);
> +struct art_table	*art_table_get(struct art *, struct art_table *,
> +			     unsigned int);
> +struct art_table	*art_table_put(struct art  *, struct art_table *);
> +struct art_node		*art_table_insert(struct art *, struct art_table *,
>  			     int, struct art_node *);
> -struct art_node		*art_table_delete(struct art_root *, struct art_table *,
> +struct art_node		*art_table_delete(struct art *, struct art_table *,
>  			     int, struct art_node *);
> -struct art_table	*art_table_ref(struct art_root *, struct art_table *);
> -int			 art_table_free(struct art_root *, struct art_table *);
> -int			 art_table_walk(struct art_root *, struct art_table *,
> -			     int (*f)(struct art_node *, void *), void *);
> -int			 art_walk_apply(struct art_root *,
> -			     struct art_node *, struct art_node *,
> -			     int (*f)(struct art_node *, void *), void *);
> +struct art_table	*art_table_ref(struct art *, struct art_table *);
> +int			 art_table_free(struct art *, struct art_table *);
>  void			 art_table_gc(void *);
>  void			 art_gc(void *);
>  
> -struct pool		an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
> +static struct pool	 an_pool, at_pool, at_heap_4_pool, at_heap_8_pool;
>  
> -struct art_table	*art_table_gc_list = NULL;
> -struct mutex		 art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> -struct task		 art_table_gc_task =
> +static struct art_table	*art_table_gc_list = NULL;
> +static struct mutex	 art_table_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> +static struct task	 art_table_gc_task =
>  			     TASK_INITIALIZER(art_table_gc, NULL);
>  
> -struct art_node		*art_node_gc_list = NULL;
> -struct mutex		 art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> -struct task		 art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
> +static struct art_node	*art_node_gc_list = NULL;
> +static struct mutex	 art_node_gc_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> +static struct task	 art_node_gc_task = TASK_INITIALIZER(art_gc, NULL);
> +
> +#define ART_HEAP_IDX_TABLE	0
> +#define ART_HEAP_IDX_DEFAULT	1
> +
> +#define	AT_HEAPSIZE(bits)	((1 << ((bits) + 1)) * sizeof(art_heap_entry))
> +
> +static inline struct art_table *
> +art_heap_to_table(art_heap_entry *heap)
> +{
> +	return ((struct art_table *)heap[ART_HEAP_IDX_TABLE]);
> +}
> +
> +static inline int
> +art_heap_entry_is_node(art_heap_entry ahe)
> +{
> +	return (!ISSET(ahe, 1));
> +}
> +
> +static inline struct art_node *
> +art_heap_entry_to_node(art_heap_entry ahe)
> +{
> +	return ((struct art_node *)ahe);
> +}
> +
> +static inline art_heap_entry *
> +art_heap_entry_to_heap(art_heap_entry ahe)
> +{
> +	return ((art_heap_entry *)(ahe & ~1ULL));
> +}
> +
> +static inline art_heap_entry
> +art_node_to_heap_entry(struct art_node *an)
> +{
> +	return ((art_heap_entry)an);
> +}
> +
> +static inline art_heap_entry
> +art_heap_to_heap_entry(art_heap_entry *heap)
> +{
> +	return ((art_heap_entry)heap | 1ULL);
> +}
>  
>  void
> -art_init(void)
> +art_boot(void)
>  {
>  	pool_init(&an_pool, sizeof(struct art_node), 0, IPL_SOFTNET, 0,
>  	    "art_node", NULL);
> @@ -84,56 +189,74 @@ art_init(void)
>  /*
>   * Per routing table initialization API function.
>   */
> -struct art_root *
> -art_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
> -{
> -	struct art_root		*ar;
> -	int			 i;
>  
> -	ar = malloc(sizeof(*ar), M_RTABLE, M_NOWAIT|M_ZERO);
> -	if (ar == NULL)
> -		return (NULL);
> +static const unsigned int art_plen32_levels[] = {
> +	8,  4, 4,  4, 4,  4, 4
> +};
> +
> +static const unsigned int art_plen128_levels[] = {
> +	4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4,
> +	4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4,  4, 4
> +};
> +
> +static const unsigned int art_plen20_levels[] = {
> +	4, 4, 4, 4, 4
> +};
> +
> +void
> +art_init(struct art *art, unsigned int alen)
> +{
> +	const unsigned int *levels;
> +	unsigned int nlevels;
> +#ifdef DIAGNOSTIC
> +	unsigned int		 i;
> +	unsigned int		 bits = 0;
> +#endif
>  
>  	switch (alen) {
>  	case 32:
> -		ar->ar_alen = 32;
> -		ar->ar_nlvl = 7;
> -		ar->ar_bits[0] = 8;
> -		for (i = 1; i < ar->ar_nlvl; i++)
> -			ar->ar_bits[i] = 4;
> +		levels = art_plen32_levels;
> +		nlevels = nitems(art_plen32_levels);
>  		break;
>  	case 128:
> -		ar->ar_alen = 128;
> -		ar->ar_nlvl = 32;
> -		for (i = 0; i < ar->ar_nlvl; i++)
> -			ar->ar_bits[i] = 4;
> +		levels = art_plen128_levels;
> +		nlevels = nitems(art_plen128_levels);
> +		break;
> +	case 20:
> +		levels = art_plen20_levels;
> +		nlevels = nitems(art_plen20_levels);
>  		break;
>  	default:
> -		printf("%s: incorrect address length %u\n", __func__, alen);
> -		free(ar, M_RTABLE, sizeof(*ar));
> -		return (NULL);
> +		panic("no configuration for alen %u", alen);
> +		/* NOTREACHED */
>  	}
>  
> -	ar->ar_off = off;
> -	rw_init(&ar->ar_lock, "art");
> +#ifdef DIAGNOSTIC
> +	for (i = 0; i < nlevels; i++)
> +		bits += levels[i];
> +
> +	if (alen != bits)
> +		panic("sum of levels %u != address len %u", bits, alen);
> +#endif /* DIAGNOSTIC */
>  
> -	return (ar);
> +	art->art_root = 0;
> +	art->art_levels = levels;
> +	art->art_nlevels = nlevels;
> +	art->art_alen = alen;
>  }
>  
> -/*
> - * Return 1 if ``old'' and ``new`` are identical, 0 otherwise.
> - */
> -static inline int
> -art_check_duplicate(struct art_root *ar, struct art_node *old,
> -    struct art_node *new)
> +struct art *
> +art_alloc(unsigned int alen)
>  {
> -	if (old == NULL)
> -		return (0);
> +	struct art		*art;
>  
> -	if (old->an_plen == new->an_plen)
> -		return (1);
> +	art = malloc(sizeof(*art), M_RTABLE, M_NOWAIT|M_ZERO);
> +	if (art == NULL)
> +		return (NULL);
>  
> -	return (0);
> +	art_init(art, alen);
> +
> +	return (art);
>  }
>  
>  /*
> @@ -148,30 +271,31 @@ art_check_duplicate(struct art_root *ar,
>   * 8bit-long tables, there's a maximum of 4 base indexes if the
>   * prefix length is > 24.
>   */
> -int
> -art_bindex(struct art_table *at, const uint8_t *addr, int plen)
> +static unsigned int __pure
> +art_bindex(unsigned int offset, unsigned int bits,
> +    const uint8_t *addr, unsigned int plen)
>  {
> -	uint8_t			boff, bend;
> +	unsigned int		boff, bend;
>  	uint32_t		k;
>  
> -	if (plen < at->at_offset || plen > (at->at_offset + at->at_bits))
> -		return (-1);
> +	KASSERT(plen >= offset);
> +	KASSERT(plen <= (offset + bits));
>  
>  	/*
>  	 * We are only interested in the part of the prefix length
>  	 * corresponding to the range of this table.
>  	 */
> -	plen -= at->at_offset;
> +	plen -= offset;
>  
>  	/*
>  	 * Jump to the first byte of the address containing bits
>  	 * covered by this table.
>  	 */
> -	addr += (at->at_offset / 8);
> +	addr += (offset / 8);
>  
>  	/* ``at'' covers the bit range between ``boff'' & ``bend''. */
> -	boff = (at->at_offset % 8);
> -	bend = (at->at_bits + boff);
> +	boff = (offset % 8);
> +	bend = (bits + boff);
>  
>  	KASSERT(bend <= 32);
>  
> @@ -188,25 +312,13 @@ art_bindex(struct art_table *at, const u
>  		k = (addr[0] & ((1 << (8 - boff)) - 1)) << (bend - 8);
>  		k |= addr[1] >> (16 - bend);
>  	} else {
> -		k = (addr[0] >> (8 - bend)) & ((1 << at->at_bits) - 1);
> +		k = (addr[0] >> (8 - bend)) & ((1 << bits) - 1);
>  	}
>  
>  	/*
>  	 * Single level base index formula:
>  	 */
> -	return ((k >> (at->at_bits - plen)) + (1 << plen));
> -}
> -
> -/*
> - * Single level lookup function.
> - *
> - * Return the fringe index of the part of ``addr''
> - * corresponding to the range covered by the table ``at''.
> - */
> -static inline int
> -art_findex(struct art_table *at, const uint8_t *addr)
> -{
> -	return art_bindex(at, addr, (at->at_offset + at->at_bits));
> +	return ((k >> (bits - plen)) + (1 << plen));
>  }
>  
>  /*
> @@ -215,60 +327,94 @@ art_findex(struct art_table *at, const u
>   * Return the best existing match for a destination.
>   */
>  struct art_node *
> -art_match(struct art_root *ar, const void *addr, struct srp_ref *nsr)
> +art_match(struct art *art, const void *addr)
>  {
> -	struct srp_ref		dsr, ndsr;
> -	void			*entry;
> -	struct art_table	*at;
> -	struct art_node		*dflt, *ndflt;
> -	int			j;
> -
> -	entry = srp_enter(nsr, &ar->ar_root);
> -	at = entry;
> +	unsigned int		 offset = 0;
> +	unsigned int		 level = 0;
> +	art_heap_entry		*heap;
> +	art_heap_entry		 ahe, dahe = 0;
> +	struct art_node		*an;
> +	int			 j;
>  
> -	if (at == NULL)
> -		goto done;
> +	SMR_ASSERT_CRITICAL();
>  
> -	/*
> -	 * Remember the default route of each table we visit in case
> -	 * we do not find a better matching route.
> -	 */
> -	dflt = srp_enter(&dsr, &at->at_default);
> +	heap = SMR_PTR_GET(&art->art_root);
> +	if (heap == NULL)
> +		return (NULL);
>  
>  	/*
>  	 * Iterate until we find a leaf.
>  	 */
> -	while (1) {
> +	for (;;) {
> +		unsigned int bits = art->art_levels[level];
> +		unsigned int p = offset + bits;
> +
> +		/*
> +		 * Remember the default route of each table we visit in case
> +		 * we do not find a better matching route.
> +		 */
> +		ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]);
> +		if (ahe != 0)
> +			dahe = ahe;
> +		
>  		/* Do a single level route lookup. */
> -		j = art_findex(at, addr);
> -		entry = srp_follow(nsr, &at->at_heap[j].node);
> +		j = art_bindex(offset, bits, addr, p);
> +		ahe = SMR_PTR_GET(&heap[j]);
>  
>  		/* If this is a leaf (NULL is a leaf) we're done. */
> -		if (ISLEAF(entry))
> +		if (art_heap_entry_is_node(ahe))
>  			break;
>  
> -		at = SUBTABLE(entry);
> +		heap = art_heap_entry_to_heap(ahe);
> +		offset = p;
> +		level++;
> +
> +		KASSERT(level < art->art_nlevels);
> +	}
> +
> +	an = art_heap_entry_to_node(ahe);
> +	if (an != NULL)
> +		return (an);
> +
> +	return art_heap_entry_to_node(dahe);
> +}
> +
> +static int
> +art_node_check(const struct art_node *an,
> +    const uint8_t *addr, unsigned int plen)
> +{
> +	const uint32_t *wan = (const uint32_t *)an->an_addr;
> +	const uint32_t *waddr = (const uint32_t *)addr;
> +	unsigned int i = 0;
> +	unsigned int shift;
> +
> +	if (an->an_plen != plen)
> +		return (0);
> +
> +	while (plen >= 32) {
> +		if (wan[i] != waddr[i])
> +			return (0);
> +
> +		i++;
> +		plen -= 32;
> +	}
> +	if (plen == 0)
> +		return (1);
> +
> +	i *= 4;
> +	while (plen >= 8) {
> +		if (an->an_addr[i] != addr[i])
> +			return (0);
> +		
> +		i++;
> +		plen -= 8;
> +	}
> +	if (plen == 0)
> +		return (1);
> +
> +	shift = 8 - plen;
>  
> -		ndflt = srp_enter(&ndsr, &at->at_default);
> -		if (ndflt != NULL) {
> -			srp_leave(&dsr);
> -			dsr = ndsr;
> -			dflt = ndflt;
> -		} else
> -			srp_leave(&ndsr);
> -	}
> -
> -	if (entry == NULL) {
> -		srp_leave(nsr);
> -		*nsr = dsr;
> -		KASSERT(ISLEAF(dflt));
> -		return (dflt);
> -	}
> -
> -	srp_leave(&dsr);
> -done:
> -	KASSERT(ISLEAF(entry));
> -	return (entry);
> +	return ((an->an_addr[i] >> shift) == (addr[i] >> shift));
>  }
>  
>  /*
> @@ -278,24 +424,28 @@ done:
>   * it does not exist.
>   */
>  struct art_node *
> -art_lookup(struct art_root *ar, const void *addr, int plen, struct srp_ref *nsr)
> +art_lookup(struct art *art, const void *addr, unsigned int plen)
>  {
> -	void			*entry;
> -	struct art_table	*at;
> -	int			 i, j;
> +	unsigned int		 offset = 0;
> +	unsigned int		 bits, p;
> +	unsigned int		 level = 0;
> +	art_heap_entry		*heap;
> +	art_heap_entry		 ahe;
> +	struct art_node		*an;
> +	unsigned int		 i, j;
>  
> -	KASSERT(plen >= 0 && plen <= ar->ar_alen);
> +	KASSERT(plen <= art->art_alen);
>  
> -	entry = srp_enter(nsr, &ar->ar_root);
> -	at = entry;
> +	SMR_ASSERT_CRITICAL();
>  
> -	if (at == NULL)
> -		goto done;
> +	heap = SMR_PTR_GET(&art->art_root);
> +	if (heap == NULL)
> +		return (NULL);
>  
>  	/* Default route */
>  	if (plen == 0) {
> -		entry = srp_follow(nsr, &at->at_default);
> -		goto done;
> +		ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]);
> +		return art_heap_entry_to_node(ahe);
>  	}
>  
>  	/*
> @@ -303,31 +453,47 @@ art_lookup(struct art_root *ar, const vo
>  	 * the stride length at this level the entry must
>  	 * be in the current table.
>  	 */
> -	while (plen > (at->at_offset + at->at_bits)) {
> +	for (;;) {
> +		bits = art->art_levels[level];
> +		p = offset + bits;
> +		if (plen <= p)
> +			break;
> +
>  		/* Do a single level route lookup. */
> -		j = art_findex(at, addr);
> -		entry = srp_follow(nsr, &at->at_heap[j].node);
> +		j = art_bindex(offset, bits, addr, p);
> +		ahe = SMR_PTR_GET(&heap[j]);
>  
>  		/* A leaf is a match, but not a perfect one, or NULL */
> -		if (ISLEAF(entry))
> +		if (art_heap_entry_is_node(ahe))
>  			return (NULL);
>  
> -		at = SUBTABLE(entry);
> +		heap = art_heap_entry_to_heap(ahe);
> +		offset = p;
> +		level++;
> +
> +		KASSERT(level < art->art_nlevels);
>  	}
>  
> -	i = art_bindex(at, addr, plen);
> -	if (i == -1)
> -		return (NULL);
> +	i = art_bindex(offset, bits, addr, plen);
> +	ahe = SMR_PTR_GET(&heap[i]);
> +	if (!art_heap_entry_is_node(ahe)) {
> +		heap = art_heap_entry_to_heap(ahe);
> +		ahe = SMR_PTR_GET(&heap[ART_HEAP_IDX_DEFAULT]);
> +	}
> +
> +	/* Make sure we've got a perfect match */
> +	an = art_heap_entry_to_node(ahe);
> +	if (an != NULL && art_node_check(an, addr, plen))
> +		return (an);
>  
> -	entry = srp_follow(nsr, &at->at_heap[i].node);
> -	if (!ISLEAF(entry))
> -		entry = srp_follow(nsr, &SUBTABLE(entry)->at_default);
> -
> -done:
> -	KASSERT(ISLEAF(entry));
> -	return (entry);
> +	return (NULL);
>  }
>  
> +int
> +art_is_empty(struct art *art)
> +{
> +	return (SMR_PTR_GET_LOCKED(&art->art_root) == NULL);
> +}
>  
>  /*
>   * Insertion API function.
> @@ -336,32 +502,38 @@ done:
>   * same destination/mask pair is already present.
>   */
>  struct art_node *
> -art_insert(struct art_root *ar, struct art_node *an, const void *addr, int plen)
> +art_insert(struct art *art, struct art_node *an)
>  {
> -	struct art_table	*at, *child;
> -	struct art_node		*node;
> -	int			 i, j;
> -
> -	rw_assert_wrlock(&ar->ar_lock);
> -	KASSERT(plen >= 0 && plen <= ar->ar_alen);
> -
> -	at = srp_get_locked(&ar->ar_root);
> -	if (at == NULL) {
> -		at = art_table_get(ar, NULL, -1);
> +	unsigned int		 p;
> +	art_heap_entry		 ahe, oahe, *ahep;
> +	art_heap_entry		*heap;
> +	struct art_node		*oan;
> +	struct art_table	*at;
> +	unsigned int		 i, j;
> +
> +	KASSERT(an->an_plen <= art->art_alen);
> +
> +	heap = SMR_PTR_GET_LOCKED(&art->art_root);
> +	if (heap == NULL) {
> +		at = art_table_get(art, NULL, -1);
>  		if (at == NULL)
>  			return (NULL);
>  
> -		srp_swap_locked(&ar->ar_root, at);
> -	}
> +		heap = at->at_heap;
> +		SMR_PTR_SET_LOCKED(&art->art_root, heap);
> +	} else
> +		at = art_heap_to_table(heap);
>  
>  	/* Default route */
> -	if (plen == 0) {
> -		node = srp_get_locked(&at->at_default);
> -		if (node != NULL)
> -			return (node);
> +	if (an->an_plen == 0) {
> +		ahep = &heap[ART_HEAP_IDX_DEFAULT];
> +		oahe = SMR_PTR_GET_LOCKED(ahep);
> +		oan = art_heap_entry_to_node(oahe);
> +		if (oan != NULL)
> +			return (oan);
>  
> -		art_table_ref(ar, at);
> -		srp_swap_locked(&at->at_default, an);
> +		art_table_ref(art, at);
> +		SMR_PTR_SET_LOCKED(ahep, art_node_to_heap_entry(an));
>  		return (an);
>  	}
>  
> @@ -370,10 +542,11 @@ art_insert(struct art_root *ar, struct a
>  	 * the stride length at this level the entry must
>  	 * be in the current table.
>  	 */
> -	while (plen > (at->at_offset + at->at_bits)) {
> +	while ((p = at->at_offset + at->at_bits) < an->an_plen) {
>  		/* Do a single level route lookup. */
> -		j = art_findex(at, addr);
> -		node = srp_get_locked(&at->at_heap[j].node);
> +		j = art_bindex(at->at_offset, at->at_bits, an->an_addr, p);
> +		ahep = &heap[j];
> +		ahe = SMR_PTR_GET_LOCKED(ahep);
>  
>  		/*
>  		 * If the node corresponding to the fringe index is
> @@ -381,84 +554,86 @@ art_insert(struct art_root *ar, struct a
>  		 * entry of this node will then become the default
>  		 * route of the subtable.
>  		 */
> -		if (ISLEAF(node)) {
> -			child = art_table_get(ar, at, j);
> +		if (art_heap_entry_is_node(ahe)) {
> +			struct art_table *child = art_table_get(art, at, j);
>  			if (child == NULL)
>  				return (NULL);
>  
> -			art_table_ref(ar, at);
> -			srp_swap_locked(&at->at_heap[j].node, ASNODE(child));
> +			art_table_ref(art, at);
> +
>  			at = child;
> -		} else
> -			at = SUBTABLE(node);
> +			heap = at->at_heap;
> +			SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe);
> +			SMR_PTR_SET_LOCKED(ahep, art_heap_to_heap_entry(heap));
> +		} else {
> +			heap = art_heap_entry_to_heap(ahe);
> +			at = art_heap_to_table(heap);
> +		}
>  	}
>  
> -	i = art_bindex(at, addr, plen);
> -	if (i == -1)
> -		return (NULL);
> -
> -	return (art_table_insert(ar, at, i, an));
> -}
> -
> -/*
> - * Single level insertion.
> - */
> -struct art_node *
> -art_table_insert(struct art_root *ar, struct art_table *at, int i,
> -    struct art_node *an)
> -{
> -	struct art_node	*prev, *node;
> -
> -	node = srp_get_locked(&at->at_heap[i].node);
> -	if (!ISLEAF(node))
> -		prev = srp_get_locked(&SUBTABLE(node)->at_default);
> -	else
> -		prev = node;
> -
> -	if (art_check_duplicate(ar, prev, an))
> -		return (prev);
> +	i = art_bindex(at->at_offset, at->at_bits, an->an_addr, an->an_plen);
> +	ahep = &heap[i];
> +	oahe = SMR_PTR_GET_LOCKED(ahep);
> +	if (!art_heap_entry_is_node(oahe)) {
> +		heap = art_heap_entry_to_heap(oahe);
> +		ahep = &heap[ART_HEAP_IDX_DEFAULT];
> +		oahe = SMR_PTR_GET_LOCKED(ahep);
> +	}
>  
> -	art_table_ref(ar, at);
> +	/* Check if there's an existing node */
> +	oan = art_heap_entry_to_node(oahe);
> +	if (oan != NULL && art_node_check(oan, an->an_addr, an->an_plen))
> +		return (oan);
>  
>  	/*
>  	 * If the index `i' of the route that we are inserting is not
>  	 * a fringe index, we need to allot this new route pointer to
>  	 * all the corresponding fringe indices.
>  	 */
> +	art_table_ref(art, at);
> +	ahe = art_node_to_heap_entry(an);
>  	if (i < at->at_minfringe)
> -		art_allot(at, i, prev, an);
> -	else if (!ISLEAF(node))
> -		srp_swap_locked(&SUBTABLE(node)->at_default, an);
> +		art_allot(at, i, oahe, ahe);
>  	else
> -		srp_swap_locked(&at->at_heap[i].node, an);
> +		SMR_PTR_SET_LOCKED(ahep, ahe);
>  
>  	return (an);
>  }
>  
> -
>  /*
>   * Deletion API function.
>   */
>  struct art_node *
> -art_delete(struct art_root *ar, struct art_node *an, const void *addr, int plen)
> +art_delete(struct art *art, const void *addr, unsigned int plen)
>  {
> +	unsigned int		 p;
> +	art_heap_entry		*heap;
>  	struct art_table	*at;
> -	struct art_node		*node;
> -	int			 i, j;
> +	art_heap_entry		 ahe, pahe, *ahep;
> +	struct art_node		*an;
> +	unsigned int		 i, j;
>  
> -	rw_assert_wrlock(&ar->ar_lock);
> -	KASSERT(plen >= 0 && plen <= ar->ar_alen);
> +	KASSERT(plen <= art->art_alen);
>  
> -	at = srp_get_locked(&ar->ar_root);
> -	if (at == NULL)
> +	heap = SMR_PTR_GET_LOCKED(&art->art_root);
> +	if (heap == NULL)
>  		return (NULL);
>  
> +	at = art_heap_to_table(heap);
> +
>  	/* Default route */
>  	if (plen == 0) {
> -		node = srp_get_locked(&at->at_default);
> -		srp_swap_locked(&at->at_default, NULL);
> -		art_table_free(ar, at);
> -		return (node);
> +		ahep = &heap[ART_HEAP_IDX_DEFAULT];
> +
> +		ahe = SMR_PTR_GET_LOCKED(ahep);
> +		an = art_heap_entry_to_node(ahe);
> +		if (an == NULL)
> +			return (NULL);
> +
> +		SMR_PTR_SET_LOCKED(ahep, 0);
> +		art_table_free(art, at);
> +
> +		return (an);
>  	}
>  
>  	/*
> @@ -466,53 +641,37 @@ art_delete(struct art_root *ar, struct a
>  	 * the stride length at this level the entry must
>  	 * be in the current table.
>  	 */
> -	while (plen > (at->at_offset + at->at_bits)) {
> +	while ((p = at->at_offset + at->at_bits) < plen) {
>  		/* Do a single level route lookup. */
> -		j = art_findex(at, addr);
> -		node = srp_get_locked(&at->at_heap[j].node);
> +		j = art_bindex(at->at_offset, at->at_bits, addr, p);
> +		ahe = SMR_PTR_GET_LOCKED(&heap[j]);
>  
>  		/* If this is a leaf, there is no route to delete. */
> -		if (ISLEAF(node))
> +		if (art_heap_entry_is_node(ahe))
>  			return (NULL);
>  
> -		at = SUBTABLE(node);
> +		heap = art_heap_entry_to_heap(ahe);
> +		at = art_heap_to_table(heap);
>  	}
>  
> -	i = art_bindex(at, addr, plen);
> -	if (i == -1)
> +	i = art_bindex(at->at_offset, at->at_bits, addr, plen);
> +	ahep = &heap[i];
> +	ahe = SMR_PTR_GET_LOCKED(ahep);
> +	if (!art_heap_entry_is_node(ahe)) {
> +		art_heap_entry *nheap = art_heap_entry_to_heap(ahe);
> +		ahep = &nheap[ART_HEAP_IDX_DEFAULT];
> +		ahe = SMR_PTR_GET_LOCKED(ahep);
> +	}
> +
> +	an = art_heap_entry_to_node(ahe);
> +	if (an == NULL) {
> +		/* No route to delete */
>  		return (NULL);
> -
> -	return (art_table_delete(ar, at, i, an));
> -}
> -
> -/*
> - * Single level deletion.
> - */
> -struct art_node *
> -art_table_delete(struct art_root *ar, struct art_table *at, int i,
> -    struct art_node *an)
> -{
> -	struct art_node		*next, *node;
> -
> -#ifdef DIAGNOSTIC
> -	struct art_node		*prev;
> -#endif
> -
> -	node = srp_get_locked(&at->at_heap[i].node);
> -#ifdef DIAGNOSTIC
> -	if (!ISLEAF(node))
> -		prev = srp_get_locked(&SUBTABLE(node)->at_default);
> -	else
> -		prev = node;
> -
> -	KASSERT(prev == an);
> -#endif
> +	}
>  
>  	/* Get the next most specific route for the index `i'. */
> -	if ((i >> 1) > 1)
> -		next = srp_get_locked(&at->at_heap[i >> 1].node);
> -	else
> -		next = NULL;
> +	j = i >> 1;
> +	pahe = (j > 1) ? SMR_PTR_GET_LOCKED(&heap[j]) : 0;
>  
>  	/*
>  	 * If the index `i' of the route that we are removing is not
> @@ -520,20 +679,18 @@ art_table_delete(struct art_root *ar, st
>  	 * route pointer to all the corresponding fringe indices.
>  	 */
>  	if (i < at->at_minfringe)
> -		art_allot(at, i, an, next);
> -	else if (!ISLEAF(node))
> -		srp_swap_locked(&SUBTABLE(node)->at_default, next);
> +		art_allot(at, i, ahe, pahe);
>  	else
> -		srp_swap_locked(&at->at_heap[i].node, next);
> +		SMR_PTR_SET_LOCKED(ahep, pahe);
>  
>  	/* We have removed an entry from this table. */
> -	art_table_free(ar, at);
> +	art_table_free(art, at);
>  
>  	return (an);
>  }
>  
>  struct art_table *
> -art_table_ref(struct art_root *ar, struct art_table *at)
> +art_table_ref(struct art *art, struct art_table *at)
>  {
>  	at->at_refcnt++;
>  	return (at);
> @@ -544,12 +701,11 @@ art_table_rele(struct art_table *at)
>  {
>  	if (at == NULL)
>  		return (0);
> -
>  	return (--at->at_refcnt == 0);
>  }
>  
>  int
> -art_table_free(struct art_root *ar, struct art_table *at)
> +art_table_free(struct art *art, struct art_table *at)
>  {
>  	if (art_table_rele(at)) {
>  		/*
> @@ -557,7 +713,7 @@ art_table_free(struct art_root *ar, stru
>  		 * that are empty.
>  		 */
>  		do {
> -			at = art_table_put(ar, at);
> +			at = art_table_put(art, at);
>  		} while (art_table_rele(at));
>  
>  		return (1);
> @@ -569,116 +725,187 @@ art_table_free(struct art_root *ar, stru
>  /*
>   * Iteration API function.
>   */
> -int
> -art_walk(struct art_root *ar, int (*f)(struct art_node *, void *), void *arg)
> +
> +static struct art_node *
> +art_iter_descend(struct art_iter *ai, art_heap_entry *heap,
> +    art_heap_entry pahe)
>  {
> -	struct srp_ref		 sr;
> -	struct art_table	*at;
> -	struct art_node		*node;
> -	int			 error = 0;
> +	struct art_table *at;
> +	art_heap_entry ahe;
>  
> -	rw_enter_write(&ar->ar_lock);
> -	at = srp_get_locked(&ar->ar_root);
> -	if (at != NULL) {
> -		art_table_ref(ar, at);
> +	at = art_heap_to_table(heap);
> +	ai->ai_table = art_table_ref(ai->ai_art, at);
>  
> -		/*
> -		 * The default route should be processed here because the root
> -		 * table does not have a parent.
> -		 */
> -		node = srp_enter(&sr, &at->at_default);
> -		error = art_walk_apply(ar, node, NULL, f, arg);
> -		srp_leave(&sr);
> +	/*
> +	 * Start looking at non-fringe nodes.
> +	 */
> +	ai->ai_j = 1;
> +
> +	/*
> +	 * The default route (index 1) is processed by the
> +	 * parent table (where it belongs) otherwise it could
> +	 * be processed more than once.
> +	 */
> +	ai->ai_i = 2;
>  
> -		if (error == 0)
> -			error = art_table_walk(ar, at, f, arg);
> +	/*
> +	 * Process the default route now.
> +	 */
> +	ahe = SMR_PTR_GET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT]);
> +	if (ahe != 0 && ahe != pahe)
> +		return (art_heap_entry_to_node(ahe));
> +
> +	/*
> +	 * Tell the caller to proceed with art_iter_next.
> +	 */
> +	return (NULL);
> +}
> +
> +struct art_node *
> +art_iter_open(struct art *art, struct art_iter *ai)
> +{
> +	art_heap_entry *heap = SMR_PTR_GET(&art->art_root);
> +	struct art_node *an;
>  
> -		art_table_free(ar, at);
> +	ai->ai_art = art;
> +
> +	if (heap == NULL) {
> +		/* empty, we're already done */
> +		ai->ai_table = NULL;
> +		return (NULL);
>  	}
> -	rw_exit_write(&ar->ar_lock);
>  
> -	return (error);
> +	an = art_iter_descend(ai, heap, 0);
> +	if (an != NULL)
> +		return (an); /* default route */
> +
> +	return (art_iter_next(ai));
>  }
>  
> -int
> -art_table_walk(struct art_root *ar, struct art_table *at,
> -    int (*f)(struct art_node *, void *), void *arg)
> +struct art_node *
> +art_iter_next(struct art_iter *ai)
>  {
> -	struct srp_ref		 sr;
> -	struct art_node		*node, *next;
> -	struct art_table	*nat;
> -	int			 i, j, error = 0;
> -	uint32_t		 maxfringe = (at->at_minfringe << 1);
> +	struct art_table *at = ai->ai_table;
> +	art_heap_entry *heap = at->at_heap;
> +	art_heap_entry ahe, pahe;
> +	unsigned int i;
>  
> +descend:
>  	/*
>  	 * Iterate non-fringe nodes in ``natural'' order.
>  	 */
> -	for (j = 1; j < at->at_minfringe; j += 2) {
> -		/*
> -		 * The default route (index 1) is processed by the
> -		 * parent table (where it belongs) otherwise it could
> -		 * be processed more than once.
> -		 */
> -		for (i = max(j, 2); i < at->at_minfringe; i <<= 1) {
> -			next = srp_get_locked(&at->at_heap[i >> 1].node);
> -
> -			node = srp_enter(&sr, &at->at_heap[i].node);
> -			error = art_walk_apply(ar, node, next, f, arg);
> -			srp_leave(&sr);
> +	if  (ai->ai_j < at->at_minfringe) {
> +		for (;;) {
> +			while ((i = ai->ai_i) < at->at_minfringe) {
> +				ai->ai_i = i << 1;
> +
> +				pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]);
> +				ahe = SMR_PTR_GET_LOCKED(&heap[i]);
> +				if (ahe != 0 && ahe != pahe)
> +					return (art_heap_entry_to_node(ahe));
> +			}
>  
> -			if (error != 0)
> -				return (error);
> +			ai->ai_j += 2;
> +			if (ai->ai_j < at->at_minfringe)
> +				ai->ai_i = ai->ai_j;
> +			else {
> +				/* Set up the fringe loop */
> +				ai->ai_i = at->at_minfringe;
> +				break;
> +			}
>  		}
>  	}
>  
>  	/*
> -	 * Iterate fringe nodes.
> +	 * Descendent tables are only found on fringe nodes, and
> +	 * they record where they were placed in their parent. This
> +	 * allows the iterator to know where to resume traversal when
> +	 * it ascends back to the parent table. By keeping the table
> +	 * refs when going down the tree, the topolgy is preserved
> +	 * even if all the nodes are removed.
>  	 */
> -	for (i = at->at_minfringe; i < maxfringe; i++) {
> -		next = srp_get_locked(&at->at_heap[i >> 1].node);
>  
> -		node = srp_enter(&sr, &at->at_heap[i].node);
> -		if (!ISLEAF(node)) {
> -			nat = art_table_ref(ar, SUBTABLE(node));
> -			node = srp_follow(&sr, &nat->at_default);
> -		} else
> -			nat = NULL;
> +	for (;;) {
> +		unsigned int maxfringe = at->at_minfringe << 1;
> +		struct art_table *parent;
>  
> -		error = art_walk_apply(ar, node, next, f, arg);
> -		srp_leave(&sr);
> +		/*
> +		 * Iterate fringe nodes.
> +		 */
> +		while ((i = ai->ai_i) < maxfringe) {
> +			ai->ai_i = i + 1;
>  
> -		if (error != 0) {
> -			art_table_free(ar, nat);
> -			return (error);
> +			pahe = SMR_PTR_GET_LOCKED(&heap[i >> 1]);
> +			ahe = SMR_PTR_GET_LOCKED(&heap[i]);
> +			if (art_heap_entry_is_node(ahe)) {
> +				if (ahe != 0 && ahe != pahe)
> +					return (art_heap_entry_to_node(ahe));
> +			} else {
> +				struct art_node *an;
> + 
> +				heap = art_heap_entry_to_heap(ahe);
> +
> +				an = art_iter_descend(ai, heap, pahe);
> +				if (an != NULL) /* default route? */
> +					return (an);
> +
> +				/* Start looping over the child table */
> +				at = art_heap_to_table(heap);
> +				goto descend;
> +			}
>  		}
>  
> -		if (nat != NULL) {
> -			error = art_table_walk(ar, nat, f, arg);
> -			art_table_free(ar, nat);
> -			if (error != 0)
> -				return (error);
> +		/*
> +		 * Ascend back up to the parent
> +		 */
> +		parent = at->at_parent;
> +		ai->ai_i = at->at_index + 1;
> +		art_table_free(ai->ai_art, at);
> +
> +		ai->ai_table = parent;
> +		if (parent == NULL) {
> +			/* The root table has no parent */
> +			break;
>  		}
> +
> +		at = parent;
> +		ai->ai_j = at->at_minfringe;
> +		heap = at->at_heap;
>  	}
>  
> -	return (0);
> +	return (NULL);
>  }
>  
> -int
> -art_walk_apply(struct art_root *ar,
> -    struct art_node *an, struct art_node *next,
> -    int (*f)(struct art_node *, void *), void *arg)
> +void
> +art_iter_close(struct art_iter *ai)
>  {
> -	int error = 0;
> +	struct art_table *at, *parent;
>  
> -	if ((an != NULL) && (an != next)) {
> -		rw_exit_write(&ar->ar_lock);
> -		error = (*f)(an, arg);
> -		rw_enter_write(&ar->ar_lock);
> +	for (at = ai->ai_table; at != NULL; at = parent) {
> +		parent = at->at_parent;
> +		art_table_free(ai->ai_art, at);
>  	}
>  
> -	return (error);
> +	ai->ai_table = NULL;
>  }
>  
> +int
> +art_walk(struct art *art, int (*f)(struct art_node *, void *), void *arg)
> +{
> +	struct art_iter		 ai;
> +	struct art_node		*an;
> +	int			 error = 0;
> +
> +	ART_FOREACH(an, art, &ai) {
> +		error = f(an, arg);
> +		if (error != 0) {
> +			art_iter_close(&ai);
> +			return (error);
> +		}
> +	}
> +
> +	return (0);
> +}
>  
>  /*
>   * Create a table and use the given index to set its default route.
> @@ -686,89 +913,90 @@ art_walk_apply(struct art_root *ar,
>   * Note:  This function does not modify the root or the parent.
>   */
>  struct art_table *
> -art_table_get(struct art_root *ar, struct art_table *parent, int j)
> +art_table_get(struct art *art, struct art_table *parent, unsigned int j)
>  {
>  	struct art_table	*at;
> -	struct art_node		*node;
> -	void			*at_heap;
> -	uint32_t		 lvl;
> +	art_heap_entry		*heap;
> +	unsigned int		 level;
> +	unsigned int		 bits;
>  
> -	KASSERT(j != 0 && j != 1);
> +	KASSERT(j != ART_HEAP_IDX_TABLE && j != ART_HEAP_IDX_DEFAULT);
>  	KASSERT(parent != NULL || j == -1);
>  
> -	if (parent != NULL)
> -		lvl = parent->at_level + 1;
> -	else
> -		lvl = 0;
> -
> -	KASSERT(lvl < ar->ar_nlvl);
> +	level = (parent != NULL) ? parent->at_level + 1 : 0;
> +	KASSERT(level < art->art_nlevels);
>  
>  	at = pool_get(&at_pool, PR_NOWAIT|PR_ZERO);
>  	if (at == NULL)
>  		return (NULL);
>  
> -	switch (AT_HEAPSIZE(ar->ar_bits[lvl])) {
> -	case AT_HEAPSIZE(4):
> -		at_heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
> +	bits = art->art_levels[level];
> +	switch (bits) {
> +	case 4:
> +		heap = pool_get(&at_heap_4_pool, PR_NOWAIT|PR_ZERO);
>  		break;
> -	case AT_HEAPSIZE(8):
> -		at_heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
> +	case 8:
> +		heap = pool_get(&at_heap_8_pool, PR_NOWAIT|PR_ZERO);
>  		break;
>  	default:
> -		panic("incorrect stride length %u", ar->ar_bits[lvl]);
> +		panic("incorrect stride length %u", bits);
>  	}
>  
> -	if (at_heap == NULL) {
> +	if (heap == NULL) {
>  		pool_put(&at_pool, at);
>  		return (NULL);
>  	}
>  
> +	heap[ART_HEAP_IDX_TABLE] = (art_heap_entry)at;
> +
>  	at->at_parent = parent;
>  	at->at_index = j;
> -	at->at_minfringe = (1 << ar->ar_bits[lvl]);
> -	at->at_level = lvl;
> -	at->at_bits = ar->ar_bits[lvl];
> -	at->at_heap = at_heap;
> +	at->at_minfringe = (1 << bits);
> +	at->at_level = level;
> +	at->at_bits = bits;
> +	at->at_heap = heap;
>  	at->at_refcnt = 0;
>  
>  	if (parent != NULL) {
> -		node = srp_get_locked(&parent->at_heap[j].node);
> -		/* node isn't being deleted, no srp_finalize needed */
> -		srp_swap_locked(&at->at_default, node);
> +		art_heap_entry ahe;
> +
>  		at->at_offset = (parent->at_offset + parent->at_bits);
> +
> +		ahe = SMR_PTR_GET_LOCKED(&parent->at_heap[j]);
> +		SMR_PTR_SET_LOCKED(&heap[ART_HEAP_IDX_DEFAULT], ahe);
>  	}
>  
>  	return (at);
>  }
>  
> -
>  /*
>   * Delete a table and use its index to restore its parent's default route.
>   *
>   * Note:  Modify its parent to unlink the table from it.
>   */
>  struct art_table *
> -art_table_put(struct art_root *ar, struct art_table *at)
> +art_table_put(struct art *art, struct art_table *at)
>  {
>  	struct art_table	*parent = at->at_parent;
> -	struct art_node		*node;
> -	uint32_t		 j = at->at_index;
> +	unsigned int		 j = at->at_index;
>  
>  	KASSERT(at->at_refcnt == 0);
>  	KASSERT(j != 0 && j != 1);
>  
>  	if (parent != NULL) {
> +		art_heap_entry ahe;
> +
>  		KASSERT(j != -1);
>  		KASSERT(at->at_level == parent->at_level + 1);
>  		KASSERT(parent->at_refcnt >= 1);
>  
>  		/* Give the route back to its parent. */
> -		node = srp_get_locked(&at->at_default);
> -		srp_swap_locked(&parent->at_heap[j].node, node);
> +		ahe = SMR_PTR_GET_LOCKED(&at->at_heap[ART_HEAP_IDX_DEFAULT]);
> +		SMR_PTR_SET_LOCKED(&parent->at_heap[j], ahe);
>  	} else {
>  		KASSERT(j == -1);
>  		KASSERT(at->at_level == 0);
> -		srp_swap_locked(&ar->ar_root, NULL);
> +		SMR_PTR_SET_LOCKED(&art->art_root, NULL);
>  	}
>  
>  	mtx_enter(&art_table_gc_mtx);
> @@ -791,19 +1019,16 @@ art_table_gc(void *null)
>  	art_table_gc_list = NULL;
>  	mtx_leave(&art_table_gc_mtx);
>  
> +	smr_barrier();
> +
>  	while (at != NULL) {
>  		next = at->at_parent;
>  
> -		if (at->at_level == 0)
> -			srp_finalize(at, "arttfini");
> -		else
> -			srp_finalize(ASNODE(at), "arttfini");
> -
> -		switch (AT_HEAPSIZE(at->at_bits)) {
> -		case AT_HEAPSIZE(4):
> +		switch (at->at_bits) {
> +		case 4:
>  			pool_put(&at_heap_4_pool, at->at_heap);
>  			break;
> -		case AT_HEAPSIZE(8):
> +		case 8:
>  			pool_put(&at_heap_8_pool, at->at_heap);
>  			break;
>  		default:
> @@ -839,40 +1064,45 @@ art_table_gc(void *null)
>   *		art_allot(at, k, old, new);
>   *	}
>   */
> -void
> -art_allot(struct art_table *at, int i, struct art_node *old,
> -    struct art_node *new)
> -{
> -	struct art_node		*node, *dflt;
> -	int			 k = i;
> +static void
> +art_allot(struct art_table *at, unsigned int i,
> +    art_heap_entry oahe, art_heap_entry nahe)
> +{
> +	art_heap_entry		 ahe, *ahep;
> +	art_heap_entry		*heap;
> +	unsigned int		 k = i;
>  
>  	KASSERT(i < at->at_minfringe);
>  
> +	heap = at->at_heap;
> +
>  again:
>  	k <<= 1;
>  	if (k < at->at_minfringe)
>  		goto nonfringe;
>  
>  	/* Change fringe nodes. */
> -	while (1) {
> -		node = srp_get_locked(&at->at_heap[k].node);
> -		if (!ISLEAF(node)) {
> -			dflt = srp_get_locked(&SUBTABLE(node)->at_default);
> -			if (dflt == old) {
> -				srp_swap_locked(&SUBTABLE(node)->at_default,
> -				    new);
> -			}
> -		} else if (node == old) {
> -			srp_swap_locked(&at->at_heap[k].node, new);
> +	for (;;) {
> +		ahep = &heap[k];
> +		ahe = SMR_PTR_GET_LOCKED(ahep);
> +
> +		if (!art_heap_entry_is_node(ahe)) {
> +			art_heap_entry *child = art_heap_entry_to_heap(ahe);
> +			ahep = &child[ART_HEAP_IDX_DEFAULT];
> +			ahe = SMR_PTR_GET_LOCKED(ahep);
>  		}
> +
> +		if (ahe == oahe)
> +			SMR_PTR_SET_LOCKED(ahep, nahe);
> +
>  		if (k % 2)
>  			goto moveup;
>  		k++;
>  	}
>  
>  nonfringe:
> -	node = srp_get_locked(&at->at_heap[k].node);
> -	if (node == old)
> +	ahe = SMR_PTR_GET_LOCKED(&heap[k]);
> +	if (ahe == oahe)
>  		goto again;
>  moveon:
>  	if (k % 2)
> @@ -881,7 +1111,7 @@ moveon:
>  	goto nonfringe;
>  moveup:
>  	k >>= 1;
> -	srp_swap_locked(&at->at_heap[k].node, new);
> +	SMR_PTR_SET_LOCKED(&heap[k], nahe);
>  
>  	/* Change non-fringe node. */
>  	if (k != i)
> @@ -889,7 +1119,7 @@ moveup:
>  }
>  
>  struct art_node *
> -art_get(uint8_t plen)
> +art_get(const uint8_t *addr, unsigned int plen)
>  {
>  	struct art_node		*an;
>  
> @@ -897,17 +1127,25 @@ art_get(uint8_t plen)
>  	if (an == NULL)
>  		return (NULL);
>  
> -	an->an_plen = plen;
> -	SRPL_INIT(&an->an_rtlist);
> +	art_node_init(an, addr, plen);
>  
>  	return (an);
>  }
>  
>  void
> -art_put(struct art_node *an)
> +art_node_init(struct art_node *an, const uint8_t *addr, unsigned int plen)
>  {
> -	KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist));
> +	size_t len;
> +
> +	len = roundup(plen, 8) / 8;
> +	KASSERT(len <= sizeof(an->an_addr));
> +	memcpy(an->an_addr, addr, len);
> +	an->an_plen = plen;
> +}
>  
> +void
> +art_put(struct art_node *an)
> +{
>  	mtx_enter(&art_node_gc_mtx);
>  	an->an_gc = art_node_gc_list;
>  	art_node_gc_list = an;
> @@ -919,17 +1157,17 @@ art_put(struct art_node *an)
>  void
>  art_gc(void *null)
>  {
> -	struct art_node		*an, *next;
> +	struct art_node		*an;
>  
>  	mtx_enter(&art_node_gc_mtx);
>  	an = art_node_gc_list;
>  	art_node_gc_list = NULL;
>  	mtx_leave(&art_node_gc_mtx);
>  
> -	while (an != NULL) {
> -		next = an->an_gc;
> +	smr_barrier();
>  
> -		srp_finalize(an, "artnfini");
> +	while (an != NULL) {
> +		struct art_node *next = an->an_gc;
>  
>  		pool_put(&an_pool, an);
>  
> Index: art.h
> ===================================================================
> RCS file: /cvs/src/sys/net/art.h,v
> diff -u -p -r1.25 art.h
> --- art.h	11 Nov 2023 12:17:50 -0000	1.25
> +++ art.h	13 Jun 2025 06:40:59 -0000
> @@ -19,94 +19,90 @@
>  #ifndef _NET_ART_H_
>  #define _NET_ART_H_
>  
> -#include <sys/rwlock.h>
> -#include <sys/srp.h>
> -
> -#define ART_MAXLVL	32	/* We currently use 32 levels for IPv6. */
> -
>  /*
> - * Root of the ART tables, equivalent to the radix head.
> + * Allotment Routing Table (ART)
> + *
> + * Yoichi Hariguchi paper can be found at:
> + *      http://www.hariguchi.org/art/art.pdf
> + *
> + * Locking:
> + *
> + * Modification (ie, art_insert or art_delete) and iteration
> + * (art_iter_next, etc) over the ART must be serialised by the caller.
> + * Lookups (ie, art_match and art_lookup) run within an SMR critical
> + * section.
>   *
> - *  Locks used to protect struct members in this file:
> - *	I	immutable after creation
> - *	l	root's `ar_lock'
> - *	K	kernel lock
> - *  For SRP related structures that allow lock-free reads, the write lock
> - *  is indicated below.
> + * Iteration requires serialisation as it manipulates the reference
> + * counts on tables as it traverses the tree. The iterator maintains
> + * these references until it runs out of entries. This allows code
> + * iterating over the ART to release locks in between calls to
> + * art_iter_open and art_iter_next. The references may be dropped
> + * early with art_iter_close.
> + *
> + * Note, the iterator does not hold a reference to the art_node
> + * structure or the data hanging off the an_value pointer, they must
> + * be accounted for separately or their use must be serialised with
> + * art_delete.
>   */
> -struct art_root {
> -	struct srp		 ar_root;	/* [l] First table */
> -	struct rwlock		 ar_lock;	/* [] Serialise modifications */
> -	uint8_t			 ar_bits[ART_MAXLVL]; /* [I] Per level stride */
> -	uint8_t			 ar_nlvl;	/* [I] Number of levels */
> -	uint8_t			 ar_alen;	/* [I] Address length in bits */
> -	uint8_t			 ar_off;	/* [I] Offset of key in bytes */
> -	struct sockaddr		*ar_source;	/* [N] use optional src addr */
> -};
>  
> -#define ISLEAF(e)	(((unsigned long)(e) & 1) == 0)
> -#define SUBTABLE(e)	((struct art_table *)((unsigned long)(e) & ~1))
> -#define ASNODE(t)	((struct art_node *)((unsigned long)(t) | 1))
> +typedef uintptr_t		 art_heap_entry;
>  
>  /*
> - * Allotment Table.
> + * Root of the ART, equivalent to the radix head.
>   */
> -struct art_table {
> -	struct art_table	*at_parent;	/* Parent table */
> -	uint32_t		 at_index;	/* Index in the parent table */
> -	uint32_t		 at_minfringe;	/* Index that fringe begins */
> -	uint32_t		 at_level;	/* Level of the table */
> -	uint8_t			 at_bits;	/* Stride length of the table */
> -	uint8_t			 at_offset;	/* Sum of parents' stride len */
> -
> -	/*
> -	 * Items stored in the heap are pointers to nodes, in the leaf
> -	 * case, or tables otherwise.  One exception is index 0 which
> -	 * is a route counter.
> -	 */
> -	union {
> -		struct srp		 node;
> -		unsigned long		 count;
> -	} *at_heap;				/* Array of 2^(slen+1) items */
> -};
> -#define	at_refcnt	at_heap[0].count/* Refcounter (1 per different route) */
> -#define	at_default	at_heap[1].node	/* Default route (was in parent heap) */
> -
> -/* Heap size for an ART table of stride length ``slen''. */
> -#define AT_HEAPSIZE(slen)	((1 << ((slen) + 1)) * sizeof(void *))
>  
> -/*
> - * Forward declaration needed for the list of mpath routes
> - * attached to a single ART node.
> - */
> -struct rtentry;
> +struct art {
> +	art_heap_entry		*art_root;
> +	const unsigned int	*art_levels;
> +	unsigned int		 art_nlevels;
> +	unsigned int		 art_alen;
> +};
>  
>  /*
>   * A node is the internal representation of a route entry.
>   */
>  struct art_node {
> +	void			*an_value;
>  	union {
> -	    SRPL_HEAD(, rtentry) an__rtlist;	/* Route related to this node */
> -	    struct art_node	*an__gc;	/* Entry on GC list */
> -	}			 an_pointer;
> -	uint8_t			 an_plen;	/* Prefix length */
> +		struct art_node		*an__gc;
> +		uint8_t			 an__addr[16];
> +	}			 an__u;
> +#define an_gc			 an__u.an__gc
> +#define an_addr			 an__u.an__addr
> +	unsigned int		 an_plen;
>  };
> -#define an_rtlist	an_pointer.an__rtlist
> -#define an_gc		an_pointer.an__gc
> -
> -void		 art_init(void);
> -struct art_root *art_alloc(unsigned int, unsigned int, unsigned int);
> -struct art_node *art_insert(struct art_root *, struct art_node *, const void *,
> -		     int);
> -struct art_node *art_delete(struct art_root *, struct art_node *, const void *,
> -		     int);
> -struct art_node *art_match(struct art_root *, const void *, struct srp_ref *);
> -struct art_node *art_lookup(struct art_root *, const void *, int,
> -		     struct srp_ref *);
> -int		 art_walk(struct art_root *,
> -		     int (*)(struct art_node *, void *), void *);
>  
> -struct art_node *art_get(uint8_t);
> +void		 art_boot(void);
> +struct art	*art_alloc(unsigned int);
> +void		 art_init(struct art *, unsigned int);
> +struct art_node *art_insert(struct art *, struct art_node *);
> +struct art_node *art_delete(struct art *, const void *, unsigned int);
> +struct art_node *art_match(struct art *, const void *);
> +struct art_node *art_lookup(struct art *, const void *, unsigned int);
> +int		 art_is_empty(struct art *);
> +
> +struct art_node	*art_get(const uint8_t *, unsigned int);
> +void		 art_node_init(struct art_node *,
> +		     const uint8_t *, unsigned int);
>  void		 art_put(struct art_node *);
> +
> +struct art_iter {
> +	struct art		*ai_art;
> +	struct art_table	*ai_table;
> +	unsigned int		 ai_j;
> +	unsigned int		 ai_i;
> +};
> +
> +struct art_node	*art_iter_open(struct art *, struct art_iter *);
> +struct art_node	*art_iter_next(struct art_iter *);
> +void		 art_iter_close(struct art_iter *);
> +
> +#define ART_FOREACH(_an, _art, _ai)					\
> +	for ((_an) = art_iter_open((_art), (_ai));			\
> +	     (_an) != NULL;						\
> +	     (_an) = art_iter_next((_ai)))
> +
> +int		 art_walk(struct art *,
> +		     int (*)(struct art_node *, void *), void *);
>  
>  #endif /* _NET_ART_H_ */
> Index: if_tun.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_tun.c,v
> diff -u -p -r1.251 if_tun.c
> --- if_tun.c	2 Mar 2025 21:28:32 -0000	1.251
> +++ if_tun.c	13 Jun 2025 06:40:59 -0000
> @@ -318,8 +318,6 @@ tun_clone_destroy(struct ifnet *ifp)
>  
>  		if (vfinddev(dev, VCHR, &vp))
>  			VOP_REVOKE(vp, REVOKEALL);
> -
> -		KASSERT(sc->sc_dev == 0);
>  	}
>  
>  	/* prevent userland from getting to the device again */
> Index: if_wg.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_wg.c,v
> diff -u -p -r1.42 if_wg.c
> --- if_wg.c	17 Apr 2025 12:42:50 -0000	1.42
> +++ if_wg.c	13 Jun 2025 06:40:59 -0000
> @@ -31,6 +31,7 @@
>  #include <sys/ioctl.h>
>  #include <sys/mbuf.h>
>  #include <sys/syslog.h>
> +#include <sys/smr.h>
>  
>  #include <net/if.h>
>  #include <net/if_var.h>
> @@ -251,10 +252,11 @@ struct wg_softc {
>  	struct socket		*sc_so6;
>  #endif
>  
> +	struct rwlock		 sc_aip_lock;
>  	size_t			 sc_aip_num;
> -	struct art_root		*sc_aip4;
> +	struct art		*sc_aip4;
>  #ifdef INET6
> -	struct art_root		*sc_aip6;
> +	struct art		*sc_aip6;
>  #endif
>  
>  	struct rwlock		 sc_peer_lock;
> @@ -290,7 +292,7 @@ void	wg_peer_counters_add(struct wg_peer
>  
>  int	wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
>  struct wg_peer *
> -	wg_aip_lookup(struct art_root *, void *);
> +	wg_aip_lookup(struct art *, void *);
>  int	wg_aip_remove(struct wg_softc *, struct wg_peer *,
>  	    struct wg_aip_io *);
>  
> @@ -609,7 +611,7 @@ wg_peer_counters_add(struct wg_peer *pee
>  int
>  wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
>  {
> -	struct art_root	*root;
> +	struct art	*root;
>  	struct art_node	*node;
>  	struct wg_aip	*aip;
>  	int		 ret = 0;
> @@ -625,8 +627,10 @@ wg_aip_add(struct wg_softc *sc, struct w
>  	if ((aip = pool_get(&wg_aip_pool, PR_NOWAIT|PR_ZERO)) == NULL)
>  		return ENOBUFS;
>  
> -	rw_enter_write(&root->ar_lock);
> -	node = art_insert(root, &aip->a_node, &d->a_addr, d->a_cidr);
> +	art_node_init(&aip->a_node, &d->a_addr.addr_bytes, d->a_cidr);
> +
> +	rw_enter_write(&sc->sc_aip_lock);
> +	node = art_insert(root, &aip->a_node);
>  
>  	if (node == &aip->a_node) {
>  		aip->a_peer = peer;
> @@ -642,18 +646,18 @@ wg_aip_add(struct wg_softc *sc, struct w
>  			aip->a_peer = peer;
>  		}
>  	}
> -	rw_exit_write(&root->ar_lock);
> +	rw_exit_write(&sc->sc_aip_lock);
>  	return ret;
>  }
>  
>  struct wg_peer *
> -wg_aip_lookup(struct art_root *root, void *addr)
> +wg_aip_lookup(struct art *root, void *addr)
>  {
> -	struct srp_ref	 sr;
>  	struct art_node	*node;
>  
> -	node = art_match(root, addr, &sr);
> -	srp_leave(&sr);
> +	smr_read_enter();
> +	node = art_match(root, addr);
> +	smr_read_leave();
>  
>  	return node == NULL ? NULL : ((struct wg_aip *) node)->a_peer;
>  }
> @@ -661,8 +665,7 @@ wg_aip_lookup(struct art_root *root, voi
>  int
>  wg_aip_remove(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
>  {
> -	struct srp_ref	 sr;
> -	struct art_root	*root;
> +	struct art	*root;
>  	struct art_node	*node;
>  	struct wg_aip	*aip;
>  	int		 ret = 0;
> @@ -675,23 +678,21 @@ wg_aip_remove(struct wg_softc *sc, struc
>  	default: return EAFNOSUPPORT;
>  	}
>  
> -	rw_enter_write(&root->ar_lock);
> -	if ((node = art_lookup(root, &d->a_addr, d->a_cidr, &sr)) == NULL) {
> +	rw_enter_write(&sc->sc_aip_lock);
> +	if ((node = art_lookup(root, &d->a_addr, d->a_cidr)) == NULL) {
>  		ret = ENOENT;
>  	} else if (((struct wg_aip *) node)->a_peer != peer) {
>  		ret = EXDEV;
>  	} else {
>  		aip = (struct wg_aip *)node;
> -		if (art_delete(root, node, &d->a_addr, d->a_cidr) == NULL)
> +		if (art_delete(root, &d->a_addr, d->a_cidr) == NULL)
>  			panic("art_delete failed to delete node %p", node);
>  
>  		sc->sc_aip_num--;
>  		LIST_REMOVE(aip, a_entry);
>  		pool_put(&wg_aip_pool, aip);
>  	}
> -
> -	srp_leave(&sr);
> -	rw_exit_write(&root->ar_lock);
> +	rw_exit_write(&sc->sc_aip_lock);
>  	return ret;
>  }
>  
> @@ -2726,10 +2727,11 @@ wg_clone_create(struct if_clone *ifc, in
>  #endif
>  
>  	sc->sc_aip_num = 0;
> -	if ((sc->sc_aip4 = art_alloc(0, 32, 0)) == NULL)
> +	rw_init(&sc->sc_aip_lock, "wgaip");
> +	if ((sc->sc_aip4 = art_alloc(32)) == NULL)
>  		goto ret_02;
>  #ifdef INET6
> -	if ((sc->sc_aip6 = art_alloc(0, 128, 0)) == NULL)
> +	if ((sc->sc_aip6 = art_alloc(128)) == NULL)
>  		goto ret_03;
>  #endif
>  
> Index: if_wg.h
> ===================================================================
> RCS file: /cvs/src/sys/net/if_wg.h,v
> diff -u -p -r1.6 if_wg.h
> --- if_wg.h	13 Oct 2024 00:53:21 -0000	1.6
> +++ if_wg.h	13 Jun 2025 06:40:59 -0000
> @@ -46,6 +46,7 @@ struct wg_aip_io {
>  	sa_family_t	 a_af;
>  	int		 a_cidr;
>  	union wg_aip_addr {
> +		uint8_t			addr_bytes;
>  		struct in_addr		addr_ipv4;
>  		struct in6_addr		addr_ipv6;
>  	}		 a_addr;
> Index: route.h
> ===================================================================
> RCS file: /cvs/src/sys/net/route.h,v
> diff -u -p -r1.216 route.h
> --- route.h	16 Mar 2025 21:58:08 -0000	1.216
> +++ route.h	13 Jun 2025 06:40:59 -0000
> @@ -40,7 +40,7 @@
>   *	I	immutable after creation
>   *	N	net lock
>   *	X	exclusive net lock, or shared net lock + kernel lock
> - *	R	art (rtable) lock
> + *	R	rtable lock
>   *	r	per route entry mutex	rt_mtx
>   *	L	arp/nd6/etc lock for updates, net lock for reads
>   *	T	rttimer_mtx		route timer lists
> @@ -117,7 +117,7 @@ struct rttimer;
>  
>  struct rtentry {
>  	struct sockaddr	*rt_dest;	/* [I] destination */
> -	SRPL_ENTRY(rtentry) rt_next;	/* [R] next mpath entry to our dst */
> +	struct rtentry	*rt_next;	/* [R] next mpath entry to our dst */
>  	struct sockaddr	*rt_gateway;	/* [X] gateway address */
>  	struct ifaddr	*rt_ifa;	/* [N] interface addr to use */
>  	caddr_t		 rt_llinfo;	/* [L] pointer to link level info or
> Index: rtable.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtable.c,v
> diff -u -p -r1.87 rtable.c
> --- rtable.c	9 Apr 2024 12:53:08 -0000	1.87
> +++ rtable.c	13 Jun 2025 06:40:59 -0000
> @@ -26,12 +26,21 @@
>  #include <sys/queue.h>
>  #include <sys/domain.h>
>  #include <sys/srp.h>
> +#include <sys/smr.h>
>  #endif
>  
>  #include <net/rtable.h>
>  #include <net/route.h>
>  #include <net/art.h>
>  
> +struct rtable {
> +	struct rwlock		 r_lock;
> +	struct art		*r_art;		/* [I] */
> +	unsigned int		 r_off;		/* [I] Offset of key in bytes */
> +
> +	struct sockaddr		*r_source;	/* [N] use optional src addr */
> +};
> +
>  /*
>   * Structures used by rtable_get() to retrieve the corresponding
>   * routing table for a given pair of ``af'' and ``rtableid''.
> @@ -85,8 +94,8 @@ void		   rtmap_dtor(void *, void *);
>  struct srp_gc	   rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL);
>  
>  void		   rtable_init_backend(void);
> -void		  *rtable_alloc(unsigned int, unsigned int, unsigned int);
> -void		  *rtable_get(unsigned int, sa_family_t);
> +struct rtable	  *rtable_alloc(unsigned int, unsigned int, unsigned int);
> +struct rtable	  *rtable_get(unsigned int, sa_family_t);
>  
>  void
>  rtmap_init(void)
> @@ -193,7 +202,7 @@ int
>  rtable_add(unsigned int id)
>  {
>  	const struct domain	*dp;
> -	void			*tbl;
> +	struct rtable		*tbl;
>  	struct rtmap		*map;
>  	struct dommp		*dmm;
>  	sa_family_t		 af;
> @@ -244,11 +253,11 @@ out:
>  	return (error);
>  }
>  
> -void *
> +struct rtable *
>  rtable_get(unsigned int rtableid, sa_family_t af)
>  {
>  	struct rtmap	*map;
> -	void		*tbl = NULL;
> +	struct rtable	*tbl = NULL;
>  	struct srp_ref	 sr;
>  
>  	if (af >= nitems(af2idx) || af2idx[af] == 0)
> @@ -286,7 +295,7 @@ rtable_empty(unsigned int rtableid)
>  {
>  	const struct domain	*dp;
>  	int			 i;
> -	struct art_root		*tbl;
> +	struct rtable		*tbl;
>  
>  	for (i = 0; (dp = domains[i]) != NULL; i++) {
>  		if (dp->dom_rtoffset == 0)
> @@ -295,7 +304,7 @@ rtable_empty(unsigned int rtableid)
>  		tbl = rtable_get(rtableid, dp->dom_family);
>  		if (tbl == NULL)
>  			continue;
> -		if (tbl->ar_root.ref != NULL)
> +		if (!art_is_empty(tbl->r_art))
>  			return (0);
>  	}
>  
> @@ -350,40 +359,51 @@ rtable_l2set(unsigned int rtableid, unsi
>  }
>  
>  
> -static inline const uint8_t *satoaddr(struct art_root *,
> +static inline const uint8_t *satoaddr(struct rtable *,
>      const struct sockaddr *);
>  
> -int	an_match(struct art_node *, const struct sockaddr *, int);
> -void	rtentry_ref(void *, void *);
> -void	rtentry_unref(void *, void *);
> -
>  void	rtable_mpath_insert(struct art_node *, struct rtentry *);
>  
> -struct srpl_rc rt_rc = SRPL_RC_INITIALIZER(rtentry_ref, rtentry_unref, NULL);
> -
>  void
>  rtable_init_backend(void)
>  {
> -	art_init();
> +	art_boot();
>  }
>  
> -void *
> +struct rtable *
>  rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
>  {
> -	return (art_alloc(rtableid, alen, off));
> +	struct rtable *r;
> +
> +	r = malloc(sizeof(*r), M_RTABLE, M_NOWAIT|M_ZERO);
> +	if (r == NULL)
> +		return (r);
> +
> +
> +	r->r_art = art_alloc(alen);
> +	if (r->r_art == NULL) {
> +		free(r, M_RTABLE, sizeof(*r));
> +		return (NULL);
> +	}
> +
> +	rw_init(&r->r_lock, "rtable");
> +	r->r_off = off;
> +	r->r_source = NULL;
> +
> +	return (r);
>  }
>  
>  int
>  rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src)
>  {
> -	struct art_root		*ar;
> +	struct rtable		*r;
>  
>  	NET_ASSERT_LOCKED_EXCLUSIVE();
>  
> -	if ((ar = rtable_get(rtableid, af)) == NULL)
> +	if ((r = rtable_get(rtableid, af)) == NULL)
>  		return (EAFNOSUPPORT);
>  
> -	ar->ar_source = src;
> +	r->r_source = src;
>  
>  	return (0);
>  }
> @@ -391,15 +411,15 @@ rtable_setsource(unsigned int rtableid, 
>  struct sockaddr *
>  rtable_getsource(unsigned int rtableid, int af)
>  {
> -	struct art_root		*ar;
> +	struct rtable		*r;
>  
>  	NET_ASSERT_LOCKED();
>  
> -	ar = rtable_get(rtableid, af);
> -	if (ar == NULL)
> +	r = rtable_get(rtableid, af);
> +	if (r == NULL)
>  		return (NULL);
>  
> -	return (ar->ar_source);
> +	return (r->r_source);
>  }
>  
>  void
> @@ -419,37 +439,34 @@ struct rtentry *
>  rtable_lookup(unsigned int rtableid, const struct sockaddr *dst,
>      const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio)
>  {
> -	struct art_root			*ar;
> +	struct rtable			*r;
>  	struct art_node			*an;
>  	struct rtentry			*rt = NULL;
> -	struct srp_ref			 sr, nsr;
>  	const uint8_t			*addr;
>  	int				 plen;
>  
> -	ar = rtable_get(rtableid, dst->sa_family);
> -	if (ar == NULL)
> +	r = rtable_get(rtableid, dst->sa_family);
> +	if (r == NULL)
>  		return (NULL);
>  
> -	addr = satoaddr(ar, dst);
> +	addr = satoaddr(r, dst);
>  
> -	/* No need for a perfect match. */
> +	smr_read_enter();
>  	if (mask == NULL) {
> -		an = art_match(ar, addr, &nsr);
> -		if (an == NULL)
> -			goto out;
> +		/* No need for a perfect match. */
> +		an = art_match(r->r_art, addr);
>  	} else {
>  		plen = rtable_satoplen(dst->sa_family, mask);
>  		if (plen == -1)
>  			return (NULL);
>  
> -		an = art_lookup(ar, addr, plen, &nsr);
> -
> -		/* Make sure we've got a perfect match. */
> -		if (!an_match(an, dst, plen))
> -			goto out;
> +		an = art_lookup(r->r_art, addr, plen);
>  	}
> +	if (an == NULL)
> +		goto out;
>  
> -	SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) {
> +	for (rt = SMR_PTR_GET(&an->an_value); rt != NULL;
> +	    rt = SMR_PTR_GET(&rt->rt_next)) {
>  		if (prio != RTP_ANY &&
>  		    (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK))
>  			continue;
> @@ -464,9 +481,8 @@ rtable_lookup(unsigned int rtableid, con
>  	if (rt != NULL)
>  		rtref(rt);
>  
> -	SRPL_LEAVE(&sr);
>  out:
> -	srp_leave(&nsr);
> +	smr_read_leave();
>  
>  	return (rt);
>  }
> @@ -474,44 +490,41 @@ out:
>  struct rtentry *
>  rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src)
>  {
> -	struct art_root			*ar;
> +	struct rtable			*r;
>  	struct art_node			*an;
>  	struct rtentry			*rt = NULL;
> -	struct srp_ref			 sr, nsr;
>  	const uint8_t			*addr;
>  	int				 hash;
> +	uint8_t				 prio;
>  
> -	ar = rtable_get(rtableid, dst->sa_family);
> -	if (ar == NULL)
> +	r = rtable_get(rtableid, dst->sa_family);
> +	if (r == NULL)
>  		return (NULL);
>  
> -	addr = satoaddr(ar, dst);
> +	addr = satoaddr(r, dst);
>  
> -	an = art_match(ar, addr, &nsr);
> +	smr_read_enter();
> +	an = art_match(r->r_art, addr);
>  	if (an == NULL)
>  		goto out;
>  
> -	rt = SRPL_FIRST(&sr, &an->an_rtlist);
> -	if (rt == NULL) {
> -		SRPL_LEAVE(&sr);
> -		goto out;
> -	}
> -	rtref(rt);
> -	SRPL_LEAVE(&sr);
> +	rt = SMR_PTR_GET(&an->an_value);
> +	KASSERT(rt != NULL);
> +	prio = rt->rt_priority;
>  
>  	/* Gateway selection by Hash-Threshold (RFC 2992) */
>  	if ((hash = rt_hash(rt, dst, src)) != -1) {
>  		struct rtentry		*mrt;
> -		int			 threshold, npaths = 0;
> +		int			 threshold, npaths = 1;
>  
>  		KASSERT(hash <= 0xffff);
>  
> -		SRPL_FOREACH(mrt, &sr, &an->an_rtlist, rt_next) {
> -			/* Only count nexthops with the same priority. */
> -			if (mrt->rt_priority == rt->rt_priority)
> +		/* Only count nexthops with the same priority. */
> +		mrt = rt;
> +		while ((mrt = SMR_PTR_GET(&mrt->rt_next)) != NULL) {
> +			if (mrt->rt_priority == prio)
>  				npaths++;
>  		}
> -		SRPL_LEAVE(&sr);
>  
>  		threshold = (0xffff / npaths) + 1;
>  
> @@ -520,27 +533,22 @@ rtable_match(unsigned int rtableid, cons
>  		 * route list attached to the node, so we won't necessarily
>  		 * have the same number of routes.  for most modifications,
>  		 * we'll pick a route that we wouldn't have if we only saw the
> -		 * list before or after the change.  if we were going to use
> -		 * the last available route, but it got removed, we'll hit
> -		 * the end of the list and then pick the first route.
> +		 * list before or after the change.
>  		 */
> -
> -		mrt = SRPL_FIRST(&sr, &an->an_rtlist);
> -		while (hash > threshold && mrt != NULL) {
> -			if (mrt->rt_priority == rt->rt_priority)
> +		mrt = rt;
> +		while (hash > threshold) {
> +			if (mrt->rt_priority == prio) {
> +				rt = mrt;
>  				hash -= threshold;
> -			mrt = SRPL_FOLLOW(&sr, mrt, rt_next);
> -		}
> -
> -		if (mrt != NULL) {
> -			rtref(mrt);
> -			rtfree(rt);
> -			rt = mrt;
> +			}
> +			mrt = SMR_PTR_GET(&mrt->rt_next);
> +			if (mrt == NULL)
> +				break;
>  		}
> -		SRPL_LEAVE(&sr);
>  	}
> +	rtref(rt);
>  out:
> -	srp_leave(&nsr);
> +	smr_read_leave();
>  	return (rt);
>  }
>  
> @@ -549,35 +557,59 @@ rtable_insert(unsigned int rtableid, str
>      const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio,
>      struct rtentry *rt)
>  {
> -	struct rtentry			*mrt;
> -	struct srp_ref			 sr;
> -	struct art_root			*ar;
> +	struct rtable			*r;
>  	struct art_node			*an, *prev;
>  	const uint8_t			*addr;
>  	int				 plen;
>  	unsigned int			 rt_flags;
>  	int				 error = 0;
>  
> -	ar = rtable_get(rtableid, dst->sa_family);
> -	if (ar == NULL)
> +	r = rtable_get(rtableid, dst->sa_family);
> +	if (r == NULL)
>  		return (EAFNOSUPPORT);
>  
> -	addr = satoaddr(ar, dst);
> +	addr = satoaddr(r, dst);
>  	plen = rtable_satoplen(dst->sa_family, mask);
>  	if (plen == -1)
>  		return (EINVAL);
>  
> -	rtref(rt); /* guarantee rtfree won't do anything during insert */
> -	rw_enter_write(&ar->ar_lock);
> +	an = art_get(addr, plen);
> +	if (an == NULL)
> +		return (ENOMEM);
>  
> -	/* Do not permit exactly the same dst/mask/gw pair. */
> -	an = art_lookup(ar, addr, plen, &sr);
> -	srp_leave(&sr); /* an can't go away while we have the lock */
> -	if (an_match(an, dst, plen)) {
> -		struct rtentry  *mrt;
> -		int		 mpathok = ISSET(rt->rt_flags, RTF_MPATH);
> +	/* prepare for immediate operation if insert succeeds */
> +	rt_flags = rt->rt_flags;
> +	rt->rt_flags &= ~RTF_MPATH;
> +	rt->rt_dest = dst;
> +	rt->rt_plen = plen;
> +	rt->rt_next = NULL;
>  
> -		SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) {
> +	rtref(rt); /* take a ref for the table */
> +	an->an_value = rt;
> +
> +	rw_enter_write(&r->r_lock);
> +	prev = art_insert(r->r_art, an);
> +	if (prev == NULL) {
> +		error = ENOMEM;
> +		goto put;
> +	}
> +
> +	if (prev != an) {
> +		struct rtentry *mrt;
> +		int mpathok = ISSET(rt_flags, RTF_MPATH);
> +		int mpath = 0;
> +
> +		/*
> +		 * An ART node with the same destination/netmask already
> +		 * exists.
> +		 */
> +		art_put(an);
> +		an = prev;
> +
> +		/* Do not permit exactly the same dst/mask/gw pair. */
> +		for (mrt = SMR_PTR_GET_LOCKED(&an->an_value);
> +		     mrt != NULL;
> +		     mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) {
>  			if (prio != RTP_ANY &&
>  			    (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK))
>  				continue;
> @@ -589,63 +621,34 @@ rtable_insert(unsigned int rtableid, str
>  				error = EEXIST;
>  				goto leave;
>  			}
> +			mpath = RTF_MPATH;
>  		}
> -	}
>  
> -	an = art_get(plen);
> -	if (an == NULL) {
> -		error = ENOBUFS;
> -		goto leave;
> -	}
> -
> -	/* prepare for immediate operation if insert succeeds */
> -	rt_flags = rt->rt_flags;
> -	rt->rt_flags &= ~RTF_MPATH;
> -	rt->rt_dest = dst;
> -	rt->rt_plen = plen;
> -	SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
> -
> -	prev = art_insert(ar, an, addr, plen);
> -	if (prev != an) {
> -		SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
> -		    rt_next);
> -		rt->rt_flags = rt_flags;
> -		art_put(an);
> +		/* The new route can be added to the list. */
> +		if (mpath) {
> +			SET(rt->rt_flags, RTF_MPATH);
> +
> +			for (mrt = SMR_PTR_GET_LOCKED(&an->an_value);
> +			     mrt != NULL;
> +			     mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) {
> +				if ((mrt->rt_priority & RTP_MASK) !=
> +				    (prio & RTP_MASK))
> +					continue;
>  
> -		if (prev == NULL) {
> -			error = ESRCH;
> -			goto leave;
> -		}
> -
> -		an = prev;
> -
> -		mrt = SRPL_FIRST_LOCKED(&an->an_rtlist);
> -		KASSERT(mrt != NULL);
> -		KASSERT((rt->rt_flags & RTF_MPATH) || mrt->rt_priority != prio);
> -
> -		/*
> -		 * An ART node with the same destination/netmask already
> -		 * exists, MPATH conflict must have been already checked.
> -		 */
> -		if (rt->rt_flags & RTF_MPATH) {
> -			/*
> -			 * Only keep the RTF_MPATH flag if two routes have
> -			 * the same gateway.
> -			 */
> -			rt->rt_flags &= ~RTF_MPATH;
> -			SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next) {
> -				if (mrt->rt_priority == prio) {
> -					mrt->rt_flags |= RTF_MPATH;
> -					rt->rt_flags |= RTF_MPATH;
> -				}
> +				SET(mrt->rt_flags, RTF_MPATH);
>  			}
>  		}
>  
>  		/* Put newly inserted entry at the right place. */
>  		rtable_mpath_insert(an, rt);
>  	}
> +	rw_exit_write(&r->r_lock);
> +	return (error);
> +
> +put:
> +	art_put(an);
>  leave:
> -	rw_exit_write(&ar->ar_lock);
> +	rw_exit_write(&r->r_lock);
>  	rtfree(rt);
>  	return (error);
>  }
> @@ -654,118 +657,136 @@ int
>  rtable_delete(unsigned int rtableid, const struct sockaddr *dst,
>      const struct sockaddr *mask, struct rtentry *rt)
>  {
> -	struct art_root			*ar;
> +	struct rtable			*r;
>  	struct art_node			*an;
> -	struct srp_ref			 sr;
>  	const uint8_t			*addr;
>  	int				 plen;
>  	struct rtentry			*mrt;
> -	int				 npaths = 0;
> -	int				 error = 0;
>  
> -	ar = rtable_get(rtableid, dst->sa_family);
> -	if (ar == NULL)
> +	r = rtable_get(rtableid, dst->sa_family);
> +	if (r == NULL)
>  		return (EAFNOSUPPORT);
>  
> -	addr = satoaddr(ar, dst);
> +	addr = satoaddr(r, dst);
>  	plen = rtable_satoplen(dst->sa_family, mask);
>  	if (plen == -1)
>  		return (EINVAL);
>  
> -	rtref(rt); /* guarantee rtfree won't do anything under ar_lock */
> -	rw_enter_write(&ar->ar_lock);
> -	an = art_lookup(ar, addr, plen, &sr);
> -	srp_leave(&sr); /* an can't go away while we have the lock */
> -
> -	/* Make sure we've got a perfect match. */
> -	if (!an_match(an, dst, plen)) {
> -		error = ESRCH;
> -		goto leave;
> +	rw_enter_write(&r->r_lock);
> +	smr_read_enter();
> +	an = art_lookup(r->r_art, addr, plen);
> +	smr_read_leave();
> +	if (an == NULL) {
> +		rw_exit_write(&r->r_lock);
> +		return (ESRCH);
>  	}
>  
> -	/*
> -	 * If other multipath route entries are still attached to
> -	 * this ART node we only have to unlink it.
> -	 */
> -	SRPL_FOREACH_LOCKED(mrt, &an->an_rtlist, rt_next)
> -		npaths++;
> -
> -	if (npaths > 1) {
> -		KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
> -		SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry,
> -		    rt_next);
> -
> -		mrt = SRPL_FIRST_LOCKED(&an->an_rtlist);
> -		if (npaths == 2)
> -			mrt->rt_flags &= ~RTF_MPATH;
> +	/* If this is the only route in the list then we can delete the node */
> +	if (SMR_PTR_GET_LOCKED(&an->an_value) == rt &&
> +	    SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) {
> +		struct art_node *oan;
> +		oan = art_delete(r->r_art, addr, plen);
> +		if (oan != an)
> +			panic("art %p changed shape during delete", r->r_art);
> +		art_put(an);
> +		/*
> +		 * XXX an and the rt ref could still be alive on other cpus.
> +		 * this currently works because of the NET_LOCK/KERNEL_LOCK
> +		 * but should be fixed if we want to do route lookups outside
> +		 * these locks. - dlg@
> +		 */
> +	} else {
> +		struct rtentry **prt;
> +		struct rtentry *nrt;
> +		unsigned int found = 0;
> +		unsigned int npaths = 0;
>  
> -		goto leave;
> +		/*
> +		 * If other multipath route entries are still attached to
> +		 * this ART node we only have to unlink it.
> +		 */
> + 		prt = (struct rtentry **)&an->an_value;
> +		while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
> +			if (mrt == rt) {
> +				found = 1;
> +				SMR_PTR_SET_LOCKED(prt,
> +				    SMR_PTR_GET_LOCKED(&mrt->rt_next));
> +			} else if ((mrt->rt_priority & RTP_MASK) ==
> +			    (rt->rt_priority & RTP_MASK)) {
> +				npaths++;
> +				nrt = mrt;
> +			}
> +			prt = &mrt->rt_next;
> +		}
> +		if (!found)
> +			panic("removing non-existent route");
> +		if (npaths == 1)
> +			CLR(nrt->rt_flags, RTF_MPATH);
>  	}
> -
> -	if (art_delete(ar, an, addr, plen) == NULL)
> -		panic("art_delete failed to find node %p", an);
> -
>  	KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
> -	SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next);
> -	art_put(an);
> -
> -leave:
> -	rw_exit_write(&ar->ar_lock);
> +	rw_exit_write(&r->r_lock);
>  	rtfree(rt);
>  
> -	return (error);
> -}
> -
> -struct rtable_walk_cookie {
> -	int		(*rwc_func)(struct rtentry *, void *, unsigned int);
> -	void		 *rwc_arg;
> -	struct rtentry	**rwc_prt;
> -	unsigned int	  rwc_rid;
> -};
> -
> -/*
> - * Helper for rtable_walk to keep the ART code free from any "struct rtentry".
> - */
> -int
> -rtable_walk_helper(struct art_node *an, void *xrwc)
> -{
> -	struct srp_ref			 sr;
> -	struct rtable_walk_cookie	*rwc = xrwc;
> -	struct rtentry			*rt;
> -	int				 error = 0;
> -
> -	SRPL_FOREACH(rt, &sr, &an->an_rtlist, rt_next) {
> -		error = (*rwc->rwc_func)(rt, rwc->rwc_arg, rwc->rwc_rid);
> -		if (error != 0)
> -			break;
> -	}
> -	if (rwc->rwc_prt != NULL && rt != NULL) {
> -		rtref(rt);
> -		*rwc->rwc_prt = rt;
> -	}
> -	SRPL_LEAVE(&sr);
> -
> -	return (error);
> +	return (0);
>  }
>  
>  int
>  rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt,
>      int (*func)(struct rtentry *, void *, unsigned int), void *arg)
>  {
> -	struct art_root			*ar;
> -	struct rtable_walk_cookie	 rwc;
> +	struct rtable			*r;
> +	struct art_iter			 ai;
> +	struct art_node			*an;
>  	int				 error;
>  
> -	ar = rtable_get(rtableid, af);
> -	if (ar == NULL)
> +	r = rtable_get(rtableid, af);
> +	if (r == NULL)
>  		return (EAFNOSUPPORT);
>  
> -	rwc.rwc_func = func;
> -	rwc.rwc_arg = arg;
> -	rwc.rwc_prt = prt;
> -	rwc.rwc_rid = rtableid;
> +	rw_enter_write(&r->r_lock);
> +	ART_FOREACH(an, r->r_art, &ai) {
> +		/*
> +		 * ART nodes have a list of rtentries.
> +		 *
> +		 * art_iter holds references to the topology
> +		 * so it won't change, but not the an_node or rtentries.
> +		 */
> +		struct rtentry *rt = SMR_PTR_GET_LOCKED(&an->an_value);
> +		rtref(rt);
>  
> -	error = art_walk(ar, rtable_walk_helper, &rwc);
> +		rw_exit_write(&r->r_lock);
> +		do {
> +			struct rtentry *nrt;
> +
> +			smr_read_enter();
> +			/* Get ready for the next entry. */
> +			nrt = SMR_PTR_GET(&rt->rt_next);
> +			if (nrt != NULL)
> +				rtref(nrt);
> +			smr_read_leave();
> +
> +			error = func(rt, arg, rtableid);
> +			if (error != 0) {
> +				if (prt != NULL)
> +					*prt = rt;
> +				else
> +					rtfree(rt);
> +
> +				if (nrt != NULL)
> +					rtfree(nrt);
> +
> +				rw_enter_write(&r->r_lock);
> +				art_iter_close(&ai);
> +				rw_exit_write(&r->r_lock);
> +				return (error);
> +			}
> +
> +			rtfree(rt);
> +			rt = nrt;
> +		} while (rt != NULL);
> +		rw_enter_write(&r->r_lock);
> +	}
> +	rw_exit_write(&r->r_lock);
>  
>  	return (error);
>  }
> @@ -774,12 +795,12 @@ struct rtentry *
>  rtable_iterate(struct rtentry *rt0)
>  {
>  	struct rtentry *rt = NULL;
> -	struct srp_ref sr;
>  
> -	rt = SRPL_NEXT(&sr, rt0, rt_next);
> +	smr_read_enter();
> +	rt = SMR_PTR_GET(&rt0->rt_next);
>  	if (rt != NULL)
>  		rtref(rt);
> -	SRPL_LEAVE(&sr);
> +	smr_read_leave();
>  	rtfree(rt0);
>  	return (rt);
>  }
> @@ -794,27 +815,25 @@ int
>  rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst,
>      int plen, uint8_t prio, struct rtentry *rt)
>  {
> -	struct art_root			*ar;
> +	struct rtable			*r;
>  	struct art_node			*an;
> -	struct srp_ref			 sr;
>  	const uint8_t			*addr;
>  	int				 error = 0;
>  
> -	ar = rtable_get(rtableid, dst->sa_family);
> -	if (ar == NULL)
> +	r = rtable_get(rtableid, dst->sa_family);
> +	if (r == NULL)
>  		return (EAFNOSUPPORT);
>  
> -	addr = satoaddr(ar, dst);
> -
> -	rw_enter_write(&ar->ar_lock);
> -	an = art_lookup(ar, addr, plen, &sr);
> -	srp_leave(&sr); /* an can't go away while we have the lock */
> +	addr = satoaddr(r, dst);
>  
> -	/* Make sure we've got a perfect match. */
> -	if (!an_match(an, dst, plen)) {
> +	rw_enter_write(&r->r_lock);
> +	smr_read_enter();
> +	an = art_lookup(r->r_art, addr, plen);
> +	smr_read_leave();
> +	if (an == NULL) {
>  		error = ESRCH;
> -	} else if (SRPL_FIRST_LOCKED(&an->an_rtlist) == rt &&
> -		SRPL_NEXT_LOCKED(rt, rt_next) == NULL) {
> +	} else if (SMR_PTR_GET_LOCKED(&an->an_value) == rt &&
> +	    SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) {
>  		/*
>  		 * If there's only one entry on the list do not go
>  		 * through an insert/remove cycle.  This is done to
> @@ -823,15 +842,23 @@ rtable_mpath_reprio(unsigned int rtablei
>  		 */
>  		rt->rt_priority = prio;
>  	} else {
> -		rtref(rt); /* keep rt alive in between remove and insert */
> -		SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist,
> -		    rt, rtentry, rt_next);
> +		struct rtentry **prt;
> +		struct rtentry *mrt;
> +
> + 		prt = (struct rtentry **)&an->an_value;
> +		while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
> +			if (mrt == rt)
> +				break;
> +			prt = &mrt->rt_next;
> +		}
> +		KASSERT(mrt != NULL);
> +
> +		SMR_PTR_SET_LOCKED(prt, SMR_PTR_GET_LOCKED(&rt->rt_next));
>  		rt->rt_priority = prio;
>  		rtable_mpath_insert(an, rt);
> -		rtfree(rt);
>  		error = EAGAIN;
>  	}
> -	rw_exit_write(&ar->ar_lock);
> +	rw_exit_write(&r->r_lock);
>  
>  	return (error);
>  }
> @@ -839,63 +866,21 @@ rtable_mpath_reprio(unsigned int rtablei
>  void
>  rtable_mpath_insert(struct art_node *an, struct rtentry *rt)
>  {
> -	struct rtentry			*mrt, *prt = NULL;
> +	struct rtentry			*mrt, **prt;
>  	uint8_t				 prio = rt->rt_priority;
>  
> -	if ((mrt = SRPL_FIRST_LOCKED(&an->an_rtlist)) == NULL) {
> -		SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
> -		return;
> -	}
> -
>  	/* Iterate until we find the route to be placed after ``rt''. */
> -	while (mrt->rt_priority <= prio && SRPL_NEXT_LOCKED(mrt, rt_next)) {
> -		prt = mrt;
> -		mrt = SRPL_NEXT_LOCKED(mrt, rt_next);
> -	}
>  
> -	if (mrt->rt_priority <= prio) {
> -		SRPL_INSERT_AFTER_LOCKED(&rt_rc, mrt, rt, rt_next);
> -	} else if (prt != NULL) {
> -		SRPL_INSERT_AFTER_LOCKED(&rt_rc, prt, rt, rt_next);
> -	} else {
> -		SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next);
> -	}
> -}
> -
> -/*
> - * Returns 1 if ``an'' perfectly matches (``dst'', ``plen''), 0 otherwise.
> - */
> -int
> -an_match(struct art_node *an, const struct sockaddr *dst, int plen)
> -{
> -	struct rtentry			*rt;
> -	struct srp_ref			 sr;
> -	int				 match;
> -
> -	if (an == NULL || an->an_plen != plen)
> -		return (0);
> -
> -	rt = SRPL_FIRST(&sr, &an->an_rtlist);
> -	match = (rt != NULL && memcmp(rt->rt_dest, dst, dst->sa_len) == 0);
> -	SRPL_LEAVE(&sr);
> -
> -	return (match);
> -}
> -
> -void
> -rtentry_ref(void *null, void *xrt)
> -{
> -	struct rtentry *rt = xrt;
> -
> -	rtref(rt);
> -}
> +	prt = (struct rtentry **)&an->an_value;
> +	while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
> +		if (mrt->rt_priority >= prio)
> +			break;
>  
> -void
> -rtentry_unref(void *null, void *xrt)
> -{
> -	struct rtentry *rt = xrt;
> +		prt = &mrt->rt_next;
> +	}
>  
> -	rtfree(rt);
> +	SMR_PTR_SET_LOCKED(&rt->rt_next, mrt);
> +	SMR_PTR_SET_LOCKED(prt, rt);
>  }
>  
>  /*
> @@ -904,9 +889,9 @@ rtentry_unref(void *null, void *xrt)
>   * of "struct sockaddr" used by this routing table.
>   */
>  static inline const uint8_t *
> -satoaddr(struct art_root *at, const struct sockaddr *sa)
> +satoaddr(struct rtable *r, const struct sockaddr *sa)
>  {
> -	return (((const uint8_t *)sa) + at->ar_off);
> +	return (((const uint8_t *)sa) + r->r_off);
>  }
>  
>  /*
> Index: rtable.h
> ===================================================================
> RCS file: /cvs/src/sys/net/rtable.h,v
> diff -u -p -r1.30 rtable.h
> --- rtable.h	13 May 2024 01:15:53 -0000	1.30
> +++ rtable.h	13 Jun 2025 06:40:59 -0000
> @@ -28,6 +28,8 @@
>  #define	rt_plen(rt)	((rt)->rt_plen)
>  #define	RT_ROOT(rt)	(0)
>  
> +struct rtable;
> +
>  int		 rtable_satoplen(sa_family_t, const struct sockaddr *);
>  
>  void		 rtable_init(void);