Download raw body.
route gwroute mutex
Hi,
I am still trying to make the leaves of the routing tree MP safe.
A small step would be to protect rt_cachecnt and rt_gwroute writers
with a per route mutex. I am aware that this is not complete, e.g.
rt_flags also looks fishy. The READ_ONCE() in rt_getll() does not
protect from use-after-free. I want to avoid mutex in the hot path.
A previous attempt with SMR failed when running BGP. Let's make
this first step and find a smarter solition later. route_cleargateway()
is not in the hot packet path, so I added the mutex there.
I have moved the rt_cachecnt from rt_setgwroute() to rt_putgwroute()
to have the RTF_CACHED logic in one function.
ok?
bluhm
Index: net/route.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
diff -u -p -r1.442 route.c
--- net/route.c 21 Feb 2025 22:21:20 -0000 1.442
+++ net/route.c 3 Mar 2025 23:50:31 -0000
@@ -324,10 +324,12 @@ rtisvalid(struct rtentry *rt)
return (0);
if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
- if (rt->rt_gwroute == NULL)
+ struct rtentry *gwroute = READ_ONCE(rt->rt_gwroute);
+
+ if (gwroute == NULL)
return (0);
- KASSERT(!ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY));
- if (!ISSET(rt->rt_gwroute->rt_flags, RTF_UP))
+ KASSERT(!ISSET(gwroute->rt_flags, RTF_GATEWAY));
+ if (!ISSET(gwroute->rt_flags, RTF_UP))
return (0);
}
@@ -567,15 +569,6 @@ rt_setgwroute(struct rtentry *rt, const
atomic_cas_uint(&rt->rt_mtu, mtu, nhmtu);
}
- /*
- * To avoid reference counting problems when writing link-layer
- * addresses in an outgoing packet, we ensure that the lifetime
- * of a cached entry is greater than the bigger lifetime of the
- * gateway entries it is pointed by.
- */
- nhrt->rt_flags |= RTF_CACHED;
- nhrt->rt_cachecnt++;
-
/* commit */
rt_putgwroute(rt, nhrt);
@@ -595,17 +588,32 @@ rt_putgwroute(struct rtentry *rt, struct
if (!ISSET(rt->rt_flags, RTF_GATEWAY))
return;
- /* this is protected as per [X] in route.h */
+ /*
+ * To avoid reference counting problems when writing link-layer
+ * addresses in an outgoing packet, we ensure that the lifetime
+ * of a cached entry is greater than the bigger lifetime of the
+ * gateway entries it is pointed by.
+ */
+ if (nhrt != NULL) {
+ mtx_enter(&nhrt->rt_mtx);
+ SET(nhrt->rt_flags, RTF_CACHED);
+ nhrt->rt_cachecnt++;
+ mtx_leave(&nhrt->rt_mtx);
+ }
+
+ mtx_enter(&rt->rt_mtx);
onhrt = rt->rt_gwroute;
rt->rt_gwroute = nhrt;
+ mtx_leave(&rt->rt_mtx);
if (onhrt != NULL) {
+ mtx_enter(&onhrt->rt_mtx);
KASSERT(onhrt->rt_cachecnt > 0);
KASSERT(ISSET(onhrt->rt_flags, RTF_CACHED));
-
- --onhrt->rt_cachecnt;
+ onhrt->rt_cachecnt--;
if (onhrt->rt_cachecnt == 0)
CLR(onhrt->rt_flags, RTF_CACHED);
+ mtx_leave(&onhrt->rt_mtx);
rtfree(onhrt);
}
@@ -1004,6 +1012,7 @@ rtrequest(int req, struct rt_addrinfo *i
return (ENOBUFS);
}
+ mtx_init_flags(&rt->rt_mtx, IPL_SOFTNET, "rtentry", 0);
refcnt_init_trace(&rt->rt_refcnt, DT_REFCNT_IDX_RTENTRY);
rt->rt_flags = info->rti_flags | RTF_UP;
rt->rt_priority = prio; /* init routing priority */
@@ -1165,7 +1174,7 @@ rt_getll(struct rtentry *rt)
{
if (ISSET(rt->rt_flags, RTF_GATEWAY)) {
/* We may return NULL here. */
- return (rt->rt_gwroute);
+ return (READ_ONCE(rt->rt_gwroute));
}
return (rt);
Index: net/route.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
diff -u -p -r1.213 route.h
--- net/route.h 16 Feb 2025 11:39:28 -0000 1.213
+++ net/route.h 3 Mar 2025 23:50:31 -0000
@@ -41,6 +41,7 @@
* N net lock
* X exclusive net lock, or shared net lock + kernel lock
* R art (rtable) lock
+ * r per route entry mutex rt_mtx
* L arp/nd6/etc lock for updates, net lock for reads
* T rttimer_mtx route timer lists
*/
@@ -114,6 +115,7 @@ struct rttimer;
*/
struct rtentry {
+ struct mutex rt_mtx;
struct sockaddr *rt_dest; /* [I] destination */
SRPL_ENTRY(rtentry) rt_next; /* [R] next mpath entry to our dst */
struct sockaddr *rt_gateway; /* [X] gateway address */
@@ -121,8 +123,8 @@ struct rtentry {
caddr_t rt_llinfo; /* [L] pointer to link level info or
an MPLS structure */
union {
- struct rtentry *_nh; /* [X] rtentry for rt_gateway */
- unsigned int _ref; /* [X] # gateway rtentry refs */
+ struct rtentry *_nh; /* [r] rtentry for rt_gateway */
+ unsigned int _ref; /* [r] # gateway rtentry refs */
} RT_gw;
#define rt_gwroute RT_gw._nh
#define rt_cachecnt RT_gw._ref
Index: net/rtsock.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtsock.c,v
diff -u -p -r1.381 rtsock.c
--- net/rtsock.c 16 Feb 2025 11:39:28 -0000 1.381
+++ net/rtsock.c 3 Mar 2025 23:50:31 -0000
@@ -1343,9 +1343,11 @@ route_cleargateway(struct rtentry *rt, v
{
struct rtentry *nhrt = arg;
+ mtx_enter(&rt->rt_mtx);
if (ISSET(rt->rt_flags, RTF_GATEWAY) && rt->rt_gwroute == nhrt &&
!ISSET(rt->rt_locks, RTV_MTU))
atomic_store_int(&rt->rt_mtu, 0);
+ mtx_leave(&rt->rt_mtx);
return (0);
}
route gwroute mutex