Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: ND6 list iterator
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Sat, 13 Sep 2025 00:28:32 +0300

Download raw body.

Thread
On Fri, Sep 12, 2025 at 03:32:13PM +0200, Alexander Bluhm wrote:
> On Tue, Sep 09, 2025 at 02:05:00PM +0200, Alexander Bluhm wrote:
> > On Mon, Sep 01, 2025 at 09:47:43PM +0200, Alexander Bluhm wrote:
> > > Here is the iterator diff for the ND6 list.  It works more or less
> > > like the ARP diff I have sent before.
> > > 
> > > Note that I have changed the (secs < 0) check to 1 in nd6_timer().
> > > I have seen timer bursts before.  It is sufficient to check expiry
> > > every second.
> > 
> > The second chunk has been commited.  Updated diff below.
> > 
> > I need this change to allow multipath link layer again.  If multipath
> > is set on a llinfo route, ND6 timer may remove the wrong entry.  As
> > long the ND6 list is not MP safe, this can cause a crash.  By using
> > an iterator, any entry can be removed safely.
> > 
> > Currently regress/sbin/route fails, as I have disabled link layer
> > multipath.  After fixing ND6, it can be reenabled.
> > 
> > ok?
> 
> anyone?
> 

ok mvs

> > Index: netinet6/nd6.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.c,v
> > diff -u -p -r1.301 nd6.c
> > --- netinet6/nd6.c	9 Sep 2025 10:36:00 -0000	1.301
> > +++ netinet6/nd6.c	9 Sep 2025 11:27:27 -0000
> > @@ -73,12 +73,12 @@
> >  #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
> >  
> >  /* timer values */
> > -time_t	nd6_timer_next	= -1;	/* at which uptime nd6_timer runs */
> > +time_t	nd6_timer_next	= -1;	/* [N] at which uptime nd6_timer runs */
> >  time_t	nd6_expire_next	= -1;	/* at which uptime nd6_expire runs */
> >  int	nd6_delay	= 5;	/* [a] delay first probe time 5 second */
> >  int	nd6_umaxtries	= 3;	/* [a] maximum unicast query */
> >  int	nd6_mmaxtries	= 3;	/* [a] maximum multicast query */
> > -int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
> > +const int nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
> >  
> >  /* preventing too many loops in ND option parsing */
> >  int nd6_maxndopt = 10;	/* max # of ND options allowed */
> > @@ -89,7 +89,8 @@ int nd6_maxnudhint = 0;	/* [a] max # of 
> >  struct mutex nd6_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
> >  
> >  TAILQ_HEAD(llinfo_nd6_head, llinfo_nd6) nd6_list =
> > -    TAILQ_HEAD_INITIALIZER(nd6_list);	/* [mN] list of llinfo_nd6 structures */
> > +    TAILQ_HEAD_INITIALIZER(nd6_list);
> > +				/* [m] list of llinfo_nd6 structures */
> >  struct	pool nd6_pool;		/* [I] pool for llinfo_nd6 structures */
> >  int	nd6_inuse;		/* [m] limit neighbor discovery routes */
> >  unsigned int	ln_hold_total;	/* [a] packets currently in the nd6 queue */
> > @@ -241,39 +242,76 @@ nd6_llinfo_settimer(const struct llinfo_
> >  	}
> >  }
> >  
> > +static struct llinfo_nd6 *
> > +nd6_iterator(struct llinfo_nd6 *ln, struct llinfo_nd6_iterator *iter)
> > +{
> > +	struct llinfo_nd6 *tmp;
> > +
> > +	MUTEX_ASSERT_LOCKED(&nd6_mtx);
> > +
> > +	if (ln)
> > +		tmp = TAILQ_NEXT((struct llinfo_nd6 *)iter, ln_list);
> > +	else
> > +		tmp = TAILQ_FIRST(&nd6_list);
> > +
> > +	while (tmp && tmp->ln_rt == NULL)
> > +		tmp = TAILQ_NEXT(tmp, ln_list);
> > +
> > +	if (ln) {
> > +		TAILQ_REMOVE(&nd6_list, (struct llinfo_nd6 *)iter, ln_list);
> > +		if (refcnt_rele(&ln->ln_refcnt))
> > +			pool_put(&nd6_pool, ln);
> > +	}
> > +	if (tmp) {
> > +		TAILQ_INSERT_AFTER(&nd6_list, tmp, (struct llinfo_nd6 *)iter,
> > +		    ln_list);
> > +		refcnt_take(&tmp->ln_refcnt);
> > +	}
> > +
> > +	return tmp;
> > +}
> > +
> >  void
> >  nd6_timer(void *unused)
> >  {
> > -	struct llinfo_nd6 *ln, *nln;
> > +	struct llinfo_nd6_iterator iter = { .ln_rt = NULL };
> > +	struct llinfo_nd6 *ln = NULL;
> >  	time_t uptime, expire;
> >  	int i_am_router = (atomic_load_int(&ip6_forwarding) != 0);
> >  	int secs;
> >  
> > -	NET_LOCK();
> > -
> >  	uptime = getuptime();
> >  	expire = uptime + nd6_gctimer;
> >  
> > -	/* Net lock is exclusive, no nd6 mutex needed for nd6_list here. */
> > -	TAILQ_FOREACH_SAFE(ln, &nd6_list, ln_list, nln) {
> > +	mtx_enter(&nd6_mtx);
> > +	while ((ln = nd6_iterator(ln, &iter)) != NULL) {
> >  		struct rtentry *rt = ln->ln_rt;
> >  
> > -		if (rt->rt_expire && rt->rt_expire <= uptime)
> > -			if (nd6_llinfo_timer(rt, i_am_router))
> > -				continue;
> > -
> > -		if (rt->rt_expire && rt->rt_expire < expire)
> > +		if (rt->rt_expire && rt->rt_expire <= uptime) {
> > +			rtref(rt);
> > +			mtx_leave(&nd6_mtx);
> > +			NET_LOCK();
> > +			if (!nd6_llinfo_timer(rt, i_am_router)) {
> > +				if (rt->rt_expire && rt->rt_expire < expire)
> > +					expire = rt->rt_expire;
> > +			}
> > +			NET_UNLOCK();
> > +			rtfree(rt);
> > +			mtx_enter(&nd6_mtx);
> > +		} else if (rt->rt_expire && rt->rt_expire < expire)
> >  			expire = rt->rt_expire;
> >  	}
> > +	mtx_leave(&nd6_mtx);
> >  
> >  	secs = expire - uptime;
> >  	if (secs < 1)
> >  		secs = 1;
> > +
> > +	NET_LOCK();
> >  	if (!TAILQ_EMPTY(&nd6_list)) {
> >  		nd6_timer_next = uptime + secs;
> >  		timeout_add_sec(&nd6_timer_to, secs);
> >  	}
> > -
> >  	NET_UNLOCK();
> >  }
> >  
> > @@ -291,6 +329,10 @@ nd6_llinfo_timer(struct rtentry *rt, int
> >  
> >  	NET_ASSERT_LOCKED_EXCLUSIVE();
> >  
> > +	/* might have been freed between leave nd6_mtx and enter net lock */
> > +	if (!ISSET(rt->rt_flags, RTF_LLINFO))
> > +		return 0;
> > +
> >  	if ((ifp = if_get(rt->rt_ifidx)) == NULL)
> >  		return 1;
> >  
> > @@ -459,26 +501,31 @@ nd6_expire_timer(void *unused)
> >  void
> >  nd6_purge(struct ifnet *ifp)
> >  {
> > -	struct llinfo_nd6 *ln, *nln;
> > +	struct llinfo_nd6_iterator iter = { .ln_rt = NULL };
> > +	struct llinfo_nd6 *ln = NULL;
> >  	int i_am_router = (atomic_load_int(&ip6_forwarding) != 0);
> >  
> > -	NET_ASSERT_LOCKED_EXCLUSIVE();
> > -
> >  	/*
> >  	 * Nuke neighbor cache entries for the ifp.
> >  	 */
> > -	TAILQ_FOREACH_SAFE(ln, &nd6_list, ln_list, nln) {
> > -		struct rtentry *rt;
> > +	mtx_enter(&nd6_mtx);
> > +	while ((ln = nd6_iterator(ln, &iter)) != NULL) {
> > +		struct rtentry *rt = ln->ln_rt;
> >  		struct sockaddr_dl *sdl;
> >  
> > -		rt = ln->ln_rt;
> >  		if (rt != NULL && rt->rt_gateway != NULL &&
> >  		    rt->rt_gateway->sa_family == AF_LINK) {
> >  			sdl = satosdl(rt->rt_gateway);
> > -			if (sdl->sdl_index == ifp->if_index)
> > +			if (sdl->sdl_index == ifp->if_index) {
> > +				rtref(rt);
> > +				mtx_leave(&nd6_mtx);
> >  				nd6_free(rt, ifp, i_am_router);
> > +				rtfree(rt);
> > +				mtx_enter(&nd6_mtx);
> > +			}
> >  		}
> >  	}
> > +	mtx_leave(&nd6_mtx);
> >  }
> >  
> >  struct rtentry *
> > @@ -794,6 +841,7 @@ nd6_rtrequest(struct ifnet *ifp, int req
> >  			break;
> >  		}
> >  		nd6_inuse++;
> > +		refcnt_init(&ln->ln_refcnt);
> >  		mq_init(&ln->ln_mq, LN_HOLD_QUEUE, IPL_SOFTNET);
> >  		rt->rt_llinfo = (caddr_t)ln;
> >  		ln->ln_rt = rt;
> > @@ -900,7 +948,8 @@ nd6_rtrequest(struct ifnet *ifp, int req
> >  		atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq));
> >  		mtx_leave(&nd6_mtx);
> >  
> > -		pool_put(&nd6_pool, ln);
> > +		if (refcnt_rele(&ln->ln_refcnt))
> > +			pool_put(&nd6_pool, ln);
> >  
> >  		/* leave from solicited node multicast for proxy ND */
> >  		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
> > Index: netinet6/nd6.h
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.h,v
> > diff -u -p -r1.103 nd6.h
> > --- netinet6/nd6.h	14 Aug 2025 08:50:25 -0000	1.103
> > +++ netinet6/nd6.h	9 Sep 2025 11:26:40 -0000
> > @@ -79,9 +79,11 @@ struct	in6_ndireq {
> >  
> >  #include <sys/queue.h>
> >  
> > -struct	llinfo_nd6 {
> > -	TAILQ_ENTRY(llinfo_nd6)	 ln_list;	/* [mN] global nd6_list */
> > +struct llinfo_nd6 {
> > +	TAILQ_ENTRY(llinfo_nd6)	 ln_list;	/* [m] global nd6_list */
> >  	struct	rtentry		*ln_rt;		/* [I] backpointer to rtentry */
> > +	/* keep fields above in sync with struct llinfo_nd6_iterator */
> > +	struct	refcnt		 ln_refcnt;	/* entry refereced by list */
> >  	struct	mbuf_queue ln_mq;	/* hold packets until resolved */
> >  	struct	in6_addr ln_saddr6;	/* source of prompting packet */
> >  	long	ln_asked;	/* number of queries already sent for addr */
> > @@ -92,6 +94,12 @@ struct	llinfo_nd6 {
> >  #define LN_HOLD_QUEUE 10
> >  #define LN_HOLD_TOTAL 100
> >  
> > +struct llinfo_nd6_iterator {
> > +	TAILQ_ENTRY(llinfo_nd6)	 ln_list;	/* [m] global nd6_list */
> > +	struct	rtentry		*ln_rt;		/* [I] always NULL */
> > +	/* keep fields above in sync with struct llinfo_nd6 */
> > +};
> > +
> >  extern unsigned int ln_hold_total;
> >  
> >  #define ND6_LLINFO_PERMANENT(n)	((n)->ln_rt->rt_expire == 0)
> > @@ -109,7 +117,7 @@ extern int nd6_delay;
> >  extern int nd6_umaxtries;
> >  extern int nd6_mmaxtries;
> >  extern int nd6_maxnudhint;
> > -extern int nd6_gctimer;
> > +extern const int nd6_gctimer;
> >  
> >  struct nd_opts {
> >  	struct nd_opt_hdr *nd_opts_src_lladdr;
>