Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
route gwroute mutex
To:
tech@openbsd.org
Date:
Thu, 6 Mar 2025 14:47:10 +0100

Download raw body.

Thread
Hi,

I am still trying to make the leaves of the routing tree MP safe.

A small step would be to protect rt_cachecnt and rt_gwroute writers
with a per route mutex.  I am aware that this is not complete, e.g.
rt_flags also looks fishy.  The READ_ONCE() in rt_getll() does not
protect from use-after-free.  I want to avoid mutex in the hot path.
A previous attempt with SMR failed when running BGP.  Let's make
this first step and find a smarter solition later.  route_cleargateway()
is not in the hot packet path, so I added the mutex there.

I have moved the rt_cachecnt from rt_setgwroute() to rt_putgwroute()
to have the RTF_CACHED logic in one function.

ok?

bluhm

Index: net/route.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
diff -u -p -r1.442 route.c
--- net/route.c	21 Feb 2025 22:21:20 -0000	1.442
+++ net/route.c	3 Mar 2025 23:50:31 -0000
@@ -324,10 +324,12 @@ rtisvalid(struct rtentry *rt)
 		return (0);
 
 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
-		if (rt->rt_gwroute == NULL)
+		struct rtentry *gwroute = READ_ONCE(rt->rt_gwroute);
+
+		if (gwroute == NULL)
 			return (0);
-		KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY));
-		if (!ISSET(rt->rt_gwroute->rt_flags, RTF_UP))
+		KASSERT(!ISSET(gwroute->rt_flags, RTF_GATEWAY));
+		if (!ISSET(gwroute->rt_flags, RTF_UP))
 			return (0);
 	}
 
@@ -567,15 +569,6 @@ rt_setgwroute(struct rtentry *rt, const 
 			atomic_cas_uint(&rt->rt_mtu, mtu, nhmtu);
 	}
 
-	/*
-	 * To avoid reference counting problems when writing link-layer
-	 * addresses in an outgoing packet, we ensure that the lifetime
-	 * of a cached entry is greater than the bigger lifetime of the
-	 * gateway entries it is pointed by.
-	 */
-	nhrt->rt_flags |= RTF_CACHED;
-	nhrt->rt_cachecnt++;
-
 	/* commit */
 	rt_putgwroute(rt, nhrt);
 
@@ -595,17 +588,32 @@ rt_putgwroute(struct rtentry *rt, struct
 	if (!ISSET(rt->rt_flags, RTF_GATEWAY))
 		return;
 
-	/* this is protected as per [X] in route.h */
+	/*
+	 * To avoid reference counting problems when writing link-layer
+	 * addresses in an outgoing packet, we ensure that the lifetime
+	 * of a cached entry is greater than the bigger lifetime of the
+	 * gateway entries it is pointed by.
+	 */
+	if (nhrt != NULL) {
+		mtx_enter(&nhrt->rt_mtx);
+		SET(nhrt->rt_flags, RTF_CACHED);
+		nhrt->rt_cachecnt++;
+		mtx_leave(&nhrt->rt_mtx);
+	}
+
+	mtx_enter(&rt->rt_mtx);
 	onhrt = rt->rt_gwroute;
 	rt->rt_gwroute = nhrt;
+	mtx_leave(&rt->rt_mtx);
 
 	if (onhrt != NULL) {
+		mtx_enter(&onhrt->rt_mtx);
 		KASSERT(onhrt->rt_cachecnt > 0);
 		KASSERT(ISSET(onhrt->rt_flags, RTF_CACHED));
-
-		--onhrt->rt_cachecnt;
+		onhrt->rt_cachecnt--;
 		if (onhrt->rt_cachecnt == 0)
 			CLR(onhrt->rt_flags, RTF_CACHED);
+		mtx_leave(&onhrt->rt_mtx);
 
 		rtfree(onhrt);
 	}
@@ -1004,6 +1012,7 @@ rtrequest(int req, struct rt_addrinfo *i
 			return (ENOBUFS);
 		}
 
+		mtx_init_flags(&rt->rt_mtx, IPL_SOFTNET, "rtentry", 0);
 		refcnt_init_trace(&rt->rt_refcnt, DT_REFCNT_IDX_RTENTRY);
 		rt->rt_flags = info->rti_flags | RTF_UP;
 		rt->rt_priority = prio;	/* init routing priority */
@@ -1165,7 +1174,7 @@ rt_getll(struct rtentry *rt)
 {
 	if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
 	 	/* We may return NULL here. */
-		return (rt->rt_gwroute);
+		return (READ_ONCE(rt->rt_gwroute));
 	}
 
 	return (rt);
Index: net/route.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
diff -u -p -r1.213 route.h
--- net/route.h	16 Feb 2025 11:39:28 -0000	1.213
+++ net/route.h	3 Mar 2025 23:50:31 -0000
@@ -41,6 +41,7 @@
  *	N	net lock
  *	X	exclusive net lock, or shared net lock + kernel lock
  *	R	art (rtable) lock
+ *	r	per route entry mutex	rt_mtx
  *	L	arp/nd6/etc lock for updates, net lock for reads
  *	T	rttimer_mtx		route timer lists
  */
@@ -114,6 +115,7 @@ struct rttimer;
  */
 
 struct rtentry {
+	struct mutex	 rt_mtx;
 	struct sockaddr	*rt_dest;	/* [I] destination */
 	SRPL_ENTRY(rtentry) rt_next;	/* [R] next mpath entry to our dst */
 	struct sockaddr	*rt_gateway;	/* [X] gateway address */
@@ -121,8 +123,8 @@ struct rtentry {
 	caddr_t		 rt_llinfo;	/* [L] pointer to link level info or
 					   an MPLS structure */
 	union {
-		struct rtentry	*_nh;	/* [X] rtentry for rt_gateway */
-		unsigned int	 _ref;	/* [X] # gateway rtentry refs */
+		struct rtentry	*_nh;	/* [r] rtentry for rt_gateway */
+		unsigned int	 _ref;	/* [r] # gateway rtentry refs */
 	} RT_gw;
 #define rt_gwroute	 RT_gw._nh
 #define rt_cachecnt	 RT_gw._ref
Index: net/rtsock.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtsock.c,v
diff -u -p -r1.381 rtsock.c
--- net/rtsock.c	16 Feb 2025 11:39:28 -0000	1.381
+++ net/rtsock.c	3 Mar 2025 23:50:31 -0000
@@ -1343,9 +1343,11 @@ route_cleargateway(struct rtentry *rt, v
 {
 	struct rtentry *nhrt = arg;
 
+	mtx_enter(&rt->rt_mtx);
 	if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
 	    !ISSET(rt->rt_locks, RTV_MTU))
 		atomic_store_int(&rt->rt_mtu, 0);
+	mtx_leave(&rt->rt_mtx);
 
 	return (0);
 }