Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: ND6 list iterator
To:
tech@openbsd.org
Date:
Tue, 9 Sep 2025 14:05:00 +0200

Download raw body.

Thread
On Mon, Sep 01, 2025 at 09:47:43PM +0200, Alexander Bluhm wrote:
> Here is the iterator diff for the ND6 list.  It works more or less
> like the ARP diff I have sent before.
> 
> Note that I have changed the (secs < 0) check to 1 in nd6_timer().
> I have seen timer bursts before.  It is sufficient to check expiry
> every second.

The second chunk has been commited.  Updated diff below.

I need this change to allow multipath link layer again.  If multipath
is set on a llinfo route, ND6 timer may remove the wrong entry.  As
long the ND6 list is not MP safe, this can cause a crash.  By using
an iterator, any entry can be removed safely.

Currently regress/sbin/route fails, as I have disabled link layer
multipath.  After fixing ND6, it can be reenabled.

ok?

bluhm

Index: netinet6/nd6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.c,v
diff -u -p -r1.301 nd6.c
--- netinet6/nd6.c	9 Sep 2025 10:36:00 -0000	1.301
+++ netinet6/nd6.c	9 Sep 2025 11:27:27 -0000
@@ -73,12 +73,12 @@
 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
 
 /* timer values */
-time_t	nd6_timer_next	= -1;	/* at which uptime nd6_timer runs */
+time_t	nd6_timer_next	= -1;	/* [N] at which uptime nd6_timer runs */
 time_t	nd6_expire_next	= -1;	/* at which uptime nd6_expire runs */
 int	nd6_delay	= 5;	/* [a] delay first probe time 5 second */
 int	nd6_umaxtries	= 3;	/* [a] maximum unicast query */
 int	nd6_mmaxtries	= 3;	/* [a] maximum multicast query */
-int	nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
+const int nd6_gctimer	= (60 * 60 * 24); /* 1 day: garbage collection timer */
 
 /* preventing too many loops in ND option parsing */
 int nd6_maxndopt = 10;	/* max # of ND options allowed */
@@ -89,7 +89,8 @@ int nd6_maxnudhint = 0;	/* [a] max # of 
 struct mutex nd6_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
 
 TAILQ_HEAD(llinfo_nd6_head, llinfo_nd6) nd6_list =
-    TAILQ_HEAD_INITIALIZER(nd6_list);	/* [mN] list of llinfo_nd6 structures */
+    TAILQ_HEAD_INITIALIZER(nd6_list);
+				/* [m] list of llinfo_nd6 structures */
 struct	pool nd6_pool;		/* [I] pool for llinfo_nd6 structures */
 int	nd6_inuse;		/* [m] limit neighbor discovery routes */
 unsigned int	ln_hold_total;	/* [a] packets currently in the nd6 queue */
@@ -241,39 +242,76 @@ nd6_llinfo_settimer(const struct llinfo_
 	}
 }
 
+static struct llinfo_nd6 *
+nd6_iterator(struct llinfo_nd6 *ln, struct llinfo_nd6_iterator *iter)
+{
+	struct llinfo_nd6 *tmp;
+
+	MUTEX_ASSERT_LOCKED(&nd6_mtx);
+
+	if (ln)
+		tmp = TAILQ_NEXT((struct llinfo_nd6 *)iter, ln_list);
+	else
+		tmp = TAILQ_FIRST(&nd6_list);
+
+	while (tmp && tmp->ln_rt == NULL)
+		tmp = TAILQ_NEXT(tmp, ln_list);
+
+	if (ln) {
+		TAILQ_REMOVE(&nd6_list, (struct llinfo_nd6 *)iter, ln_list);
+		if (refcnt_rele(&ln->ln_refcnt))
+			pool_put(&nd6_pool, ln);
+	}
+	if (tmp) {
+		TAILQ_INSERT_AFTER(&nd6_list, tmp, (struct llinfo_nd6 *)iter,
+		    ln_list);
+		refcnt_take(&tmp->ln_refcnt);
+	}
+
+	return tmp;
+}
+
 void
 nd6_timer(void *unused)
 {
-	struct llinfo_nd6 *ln, *nln;
+	struct llinfo_nd6_iterator iter = { .ln_rt = NULL };
+	struct llinfo_nd6 *ln = NULL;
 	time_t uptime, expire;
 	int i_am_router = (atomic_load_int(&ip6_forwarding) != 0);
 	int secs;
 
-	NET_LOCK();
-
 	uptime = getuptime();
 	expire = uptime + nd6_gctimer;
 
-	/* Net lock is exclusive, no nd6 mutex needed for nd6_list here. */
-	TAILQ_FOREACH_SAFE(ln, &nd6_list, ln_list, nln) {
+	mtx_enter(&nd6_mtx);
+	while ((ln = nd6_iterator(ln, &iter)) != NULL) {
 		struct rtentry *rt = ln->ln_rt;
 
-		if (rt->rt_expire && rt->rt_expire <= uptime)
-			if (nd6_llinfo_timer(rt, i_am_router))
-				continue;
-
-		if (rt->rt_expire && rt->rt_expire < expire)
+		if (rt->rt_expire && rt->rt_expire <= uptime) {
+			rtref(rt);
+			mtx_leave(&nd6_mtx);
+			NET_LOCK();
+			if (!nd6_llinfo_timer(rt, i_am_router)) {
+				if (rt->rt_expire && rt->rt_expire < expire)
+					expire = rt->rt_expire;
+			}
+			NET_UNLOCK();
+			rtfree(rt);
+			mtx_enter(&nd6_mtx);
+		} else if (rt->rt_expire && rt->rt_expire < expire)
 			expire = rt->rt_expire;
 	}
+	mtx_leave(&nd6_mtx);
 
 	secs = expire - uptime;
 	if (secs < 1)
 		secs = 1;
+
+	NET_LOCK();
 	if (!TAILQ_EMPTY(&nd6_list)) {
 		nd6_timer_next = uptime + secs;
 		timeout_add_sec(&nd6_timer_to, secs);
 	}
-
 	NET_UNLOCK();
 }
 
@@ -291,6 +329,10 @@ nd6_llinfo_timer(struct rtentry *rt, int
 
 	NET_ASSERT_LOCKED_EXCLUSIVE();
 
+	/* might have been freed between leave nd6_mtx and enter net lock */
+	if (!ISSET(rt->rt_flags, RTF_LLINFO))
+		return 0;
+
 	if ((ifp = if_get(rt->rt_ifidx)) == NULL)
 		return 1;
 
@@ -459,26 +501,31 @@ nd6_expire_timer(void *unused)
 void
 nd6_purge(struct ifnet *ifp)
 {
-	struct llinfo_nd6 *ln, *nln;
+	struct llinfo_nd6_iterator iter = { .ln_rt = NULL };
+	struct llinfo_nd6 *ln = NULL;
 	int i_am_router = (atomic_load_int(&ip6_forwarding) != 0);
 
-	NET_ASSERT_LOCKED_EXCLUSIVE();
-
 	/*
 	 * Nuke neighbor cache entries for the ifp.
 	 */
-	TAILQ_FOREACH_SAFE(ln, &nd6_list, ln_list, nln) {
-		struct rtentry *rt;
+	mtx_enter(&nd6_mtx);
+	while ((ln = nd6_iterator(ln, &iter)) != NULL) {
+		struct rtentry *rt = ln->ln_rt;
 		struct sockaddr_dl *sdl;
 
-		rt = ln->ln_rt;
 		if (rt != NULL && rt->rt_gateway != NULL &&
 		    rt->rt_gateway->sa_family == AF_LINK) {
 			sdl = satosdl(rt->rt_gateway);
-			if (sdl->sdl_index == ifp->if_index)
+			if (sdl->sdl_index == ifp->if_index) {
+				rtref(rt);
+				mtx_leave(&nd6_mtx);
 				nd6_free(rt, ifp, i_am_router);
+				rtfree(rt);
+				mtx_enter(&nd6_mtx);
+			}
 		}
 	}
+	mtx_leave(&nd6_mtx);
 }
 
 struct rtentry *
@@ -794,6 +841,7 @@ nd6_rtrequest(struct ifnet *ifp, int req
 			break;
 		}
 		nd6_inuse++;
+		refcnt_init(&ln->ln_refcnt);
 		mq_init(&ln->ln_mq, LN_HOLD_QUEUE, IPL_SOFTNET);
 		rt->rt_llinfo = (caddr_t)ln;
 		ln->ln_rt = rt;
@@ -900,7 +948,8 @@ nd6_rtrequest(struct ifnet *ifp, int req
 		atomic_sub_int(&ln_hold_total, mq_purge(&ln->ln_mq));
 		mtx_leave(&nd6_mtx);
 
-		pool_put(&nd6_pool, ln);
+		if (refcnt_rele(&ln->ln_refcnt))
+			pool_put(&nd6_pool, ln);
 
 		/* leave from solicited node multicast for proxy ND */
 		if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
Index: netinet6/nd6.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/nd6.h,v
diff -u -p -r1.103 nd6.h
--- netinet6/nd6.h	14 Aug 2025 08:50:25 -0000	1.103
+++ netinet6/nd6.h	9 Sep 2025 11:26:40 -0000
@@ -79,9 +79,11 @@ struct	in6_ndireq {
 
 #include <sys/queue.h>
 
-struct	llinfo_nd6 {
-	TAILQ_ENTRY(llinfo_nd6)	 ln_list;	/* [mN] global nd6_list */
+struct llinfo_nd6 {
+	TAILQ_ENTRY(llinfo_nd6)	 ln_list;	/* [m] global nd6_list */
 	struct	rtentry		*ln_rt;		/* [I] backpointer to rtentry */
+	/* keep fields above in sync with struct llinfo_nd6_iterator */
+	struct	refcnt		 ln_refcnt;	/* entry refereced by list */
 	struct	mbuf_queue ln_mq;	/* hold packets until resolved */
 	struct	in6_addr ln_saddr6;	/* source of prompting packet */
 	long	ln_asked;	/* number of queries already sent for addr */
@@ -92,6 +94,12 @@ struct	llinfo_nd6 {
 #define LN_HOLD_QUEUE 10
 #define LN_HOLD_TOTAL 100
 
+struct llinfo_nd6_iterator {
+	TAILQ_ENTRY(llinfo_nd6)	 ln_list;	/* [m] global nd6_list */
+	struct	rtentry		*ln_rt;		/* [I] always NULL */
+	/* keep fields above in sync with struct llinfo_nd6 */
+};
+
 extern unsigned int ln_hold_total;
 
 #define ND6_LLINFO_PERMANENT(n)	((n)->ln_rt->rt_expire == 0)
@@ -109,7 +117,7 @@ extern int nd6_delay;
 extern int nd6_umaxtries;
 extern int nd6_mmaxtries;
 extern int nd6_maxnudhint;
-extern int nd6_gctimer;
+extern const int nd6_gctimer;
 
 struct nd_opts {
 	struct nd_opt_hdr *nd_opts_src_lladdr;