From: Vitaliy Makkoveev Subject: Re: ARP list iterator To: Alexander Bluhm Cc: tech@openbsd.org Date: Tue, 2 Sep 2025 12:51:15 +0300 On Mon, Sep 01, 2025 at 09:37:25PM +0200, Alexander Bluhm wrote: > On Sat, Aug 30, 2025 at 08:22:41PM +0300, Vitaliy Makkoveev wrote: > > I like to put refcounting and `arp_mtx' release down to the 'if' > > block. You don't need to do this all the times. With this fix diff is OK > > mvs. > > Yes, that is more efficient. > > While porting this to ND6, it triggered a crash. Although not seen > with APR tests, the same race might be here, too. Between releasing > the mutex and grabbing the net lock another thread may free the ARP > entry. So we should check RTF_LLINFO in arptfree() while holding > the exclusive net lock. > > still ok? > Sure. > bluhm > > Index: netinet/if_ether.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v > diff -u -p -r1.276 if_ether.c > --- netinet/if_ether.c 17 Jul 2025 17:31:45 -0000 1.276 > +++ netinet/if_ether.c 1 Sep 2025 09:41:33 -0000 > @@ -73,8 +73,10 @@ > */ > > struct llinfo_arp { > - LIST_ENTRY(llinfo_arp) la_list; /* [mN] global arp_list */ > + LIST_ENTRY(llinfo_arp) la_list; /* [m] global arp_list */ > struct rtentry *la_rt; /* [I] backpointer to rtentry */ > + /* keep fields above in sync with struct llinfo_arp_iterator */ > + struct refcnt la_refcnt; /* entry refereced by list */ > struct mbuf_queue la_mq; /* packet hold queue */ > time_t la_refreshed; /* when was refresh sent */ > int la_asked; /* number of queries sent */ > @@ -82,6 +84,12 @@ struct llinfo_arp { > #define LA_HOLD_QUEUE 10 > #define LA_HOLD_TOTAL 100 > > +struct llinfo_arp_iterator { > + LIST_ENTRY(llinfo_arp) la_list; /* [m] global arp_list */ > + struct rtentry *la_rt; /* [I] always NULL */ > + /* keep fields above in sync with struct llinfo_arp */ > +}; > + > /* timer values */ > int arpt_prune = (5 * 60); /* [I] walk list every 5 minutes */ > int arpt_keep = (20 * 60); /* [a] once resolved, cache for 20 minutes */ > @@ -103,8 +111,8 @@ struct niqueue arpinq = NIQUEUE_INITIALI > /* llinfo_arp live time, rt_llinfo and RTF_LLINFO are protected by arp_mtx */ > struct mutex arp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); > > -LIST_HEAD(, llinfo_arp) arp_list = > - LIST_HEAD_INITIALIZER(arp_list); /* [mN] list of llinfo_arp structures */ > +LIST_HEAD(, llinfo_arp) arp_list = LIST_HEAD_INITIALIZER(arp_list); > + /* [m] list of llinfo_arp structures */ > struct pool arp_pool; /* [I] pool for llinfo_arp structures */ > int arp_maxtries = 5; /* [I] arp requests before set to rejected */ > unsigned int la_hold_total; /* [a] packets currently in the arp queue */ > @@ -116,27 +124,63 @@ int revarp_finished; > unsigned int revarp_ifidx; > #endif /* NFSCLIENT */ > > +static struct llinfo_arp * > +arpiterator(struct llinfo_arp *la, struct llinfo_arp_iterator *iter) > +{ > + struct llinfo_arp *tmp; > + > + MUTEX_ASSERT_LOCKED(&arp_mtx); > + > + if (la) > + tmp = LIST_NEXT((struct llinfo_arp *)iter, la_list); > + else > + tmp = LIST_FIRST(&arp_list); > + > + while (tmp && tmp->la_rt == NULL) > + tmp = LIST_NEXT(tmp, la_list); > + > + if (la) { > + LIST_REMOVE((struct llinfo_arp *)iter, la_list); > + if (refcnt_rele(&la->la_refcnt)) > + pool_put(&arp_pool, la); > + } > + if (tmp) { > + LIST_INSERT_AFTER(tmp, (struct llinfo_arp *)iter, la_list); > + refcnt_take(&tmp->la_refcnt); > + } > + > + return tmp; > +} > + > /* > - * Timeout routine. Age arp_tab entries periodically. > + * Timeout routine. Age arp table entries periodically. > */ > void > arptimer(void *arg) > { > struct timeout *to = arg; > - struct llinfo_arp *la, *nla; > + struct llinfo_arp_iterator iter = { .la_rt = NULL }; > + struct llinfo_arp *la = NULL; > time_t uptime; > > - NET_LOCK(); > uptime = getuptime(); > timeout_add_sec(to, arpt_prune); > - /* Net lock is exclusive, no arp mutex needed for arp_list here. */ > - LIST_FOREACH_SAFE(la, &arp_list, la_list, nla) { > + > + mtx_enter(&arp_mtx); > + while ((la = arpiterator(la, &iter)) != NULL) { > struct rtentry *rt = la->la_rt; > > - if (rt->rt_expire && rt->rt_expire < uptime) > + if (rt->rt_expire && rt->rt_expire < uptime) { > + rtref(rt); > + mtx_leave(&arp_mtx); > + NET_LOCK(); > arptfree(rt); /* timer has expired; clear */ > + NET_UNLOCK(); > + rtfree(rt); > + mtx_enter(&arp_mtx); > + } > } > - NET_UNLOCK(); > + mtx_leave(&arp_mtx); > } > > void > @@ -210,6 +254,7 @@ arp_rtrequest(struct ifnet *ifp, int req > pool_put(&arp_pool, la); > break; > } > + refcnt_init(&la->la_refcnt); > mq_init(&la->la_mq, LA_HOLD_QUEUE, IPL_SOFTNET); > rt->rt_llinfo = (caddr_t)la; > la->la_rt = rt; > @@ -235,7 +280,8 @@ arp_rtrequest(struct ifnet *ifp, int req > atomic_sub_int(&la_hold_total, mq_purge(&la->la_mq)); > mtx_leave(&arp_mtx); > > - pool_put(&arp_pool, la); > + if (refcnt_rele(&la->la_refcnt)) > + pool_put(&arp_pool, la); > break; > > case RTM_INVALIDATE: > @@ -749,6 +795,12 @@ void > arptfree(struct rtentry *rt) > { > struct ifnet *ifp; > + > + NET_ASSERT_LOCKED_EXCLUSIVE(); > + > + /* might have been freed between leave arp_mtx and enter net lock */ > + if (!ISSET(rt->rt_flags, RTF_LLINFO)) > + return; > > KASSERT(!ISSET(rt->rt_flags, RTF_LOCAL)); > arpinvalidate(rt);