Index | Thread | Search

From:
Alexander Bluhm <alexander.bluhm@gmx.net>
Subject:
route generation number for route cache
To:
tech@openbsd.org
Date:
Tue, 30 Jan 2024 13:43:34 +0100

Download raw body.

Thread
Hi,

The outgoing route is cached at the inpcb.  This cache is only
invalidated when the socket closes or if the route gets invalid.
More specific routes are not detected.  Especially with dynamic
routing protocols, sockets must be closed and reopened to use the
correct route.  Running ping during a route change shows the problem.

To solve this, I added a route generation number that is updated
whenever the routing table changes.  The lookup in struct route is
put into the route_validate() API.  If the generation number is too
old, the cached route gets discarded.

This is the implementation for ip_output() and ip_forward().  IPv6
and more places will follow.

ok?

bluhm

Index: net/route.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
diff -u -p -r1.426 route.c
--- net/route.c	13 Nov 2023 17:18:27 -0000	1.426
+++ net/route.c	29 Jan 2024 22:25:44 -0000
@@ -140,6 +140,7 @@
 
 /*
  * Locks used to protect struct members:
+ *      a       atomic operations
  *      I       immutable after creation
  *      L       rtlabel_mtx
  *      T       rttimer_mtx
@@ -152,8 +153,9 @@ static uint32_t		rt_hashjitter;
 
 extern unsigned int	rtmap_limit;
 
-struct cpumem *		rtcounters;
-int			rttrash;	/* routes not in table but not freed */
+struct cpumem	*rtcounters;
+int		 rttrash;	/* [a] routes not in table but not freed */
+u_long		 rtgeneration;	/* [a] generation number, routes changed */
 
 struct pool	rtentry_pool;		/* pool for rtentry structures */
 struct pool	rttimer_pool;		/* pool for rttimer structures */
@@ -199,6 +201,33 @@ route_init(void)
 #endif
 }
 
+void
+route_validate(struct route *ro, struct in_addr addr, u_int rtableid)
+{
+	u_long gen;
+
+	gen = atomic_load_long(&rtgeneration);
+	membar_consumer();
+
+	if (rtisvalid(ro->ro_rt) &&
+	    ro->ro_generation == gen &&
+	    ro->ro_tableid == rtableid &&
+	    ro->ro_dst.sa_family == AF_INET &&
+	    satosin(&ro->ro_dst)->sin_addr.s_addr == addr.s_addr) {
+		return;
+	}
+
+	rtfree(ro->ro_rt);
+	ro->ro_rt = NULL;
+	ro->ro_generation = gen;
+	ro->ro_tableid = rtableid;
+
+	memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
+	satosin(&ro->ro_dst)->sin_family = AF_INET;
+	satosin(&ro->ro_dst)->sin_len = sizeof(struct sockaddr_in);
+	satosin(&ro->ro_dst)->sin_addr = addr;
+}
+
 /*
  * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise.
  */
@@ -824,6 +853,9 @@ rtrequest_delete(struct rt_addrinfo *inf
 	else
 		rtfree(rt);
 
+	membar_producer();
+	atomic_inc_long(&rtgeneration);
+
 	return (0);
 }
 
@@ -992,6 +1024,10 @@ rtrequest(int req, struct rt_addrinfo *i
 			*ret_nrt = rt;
 		else
 			rtfree(rt);
+
+		membar_producer();
+		atomic_inc_long(&rtgeneration);
+
 		break;
 	}
 
@@ -1828,6 +1864,9 @@ rt_if_linkstate_change(struct rtentry *r
 		    rt->rt_priority | RTP_DOWN, rt);
 	}
 	if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask));
+
+	membar_producer();
+	atomic_inc_long(&rtgeneration);
 
 	return (error);
 }
Index: net/route.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
diff -u -p -r1.203 route.h
--- net/route.h	12 Nov 2023 17:51:40 -0000	1.203
+++ net/route.h	29 Jan 2024 22:25:44 -0000
@@ -377,6 +377,7 @@ struct sockaddr_rtsearch {
  */
 struct route {
 	struct	rtentry *ro_rt;
+	u_long		 ro_generation;
 	u_long		 ro_tableid;	/* u_long because of alignment */
 	struct	sockaddr ro_dst;
 };
@@ -438,15 +439,18 @@ void		 rtlabel_unref(u_int16_t);
 #define	RT_RESOLVE	1
 
 extern struct rtstat rtstat;
+extern u_long rtgeneration;
 
 struct mbuf;
 struct socket;
 struct ifnet;
+struct in_addr;
 struct sockaddr_in6;
 struct if_ieee80211_data;
 struct bfd_config;
 
 void	 route_init(void);
+void	 route_validate(struct route *, struct in_addr, u_int);
 void	 rtm_ifchg(struct ifnet *);
 void	 rtm_ifannounce(struct ifnet *, int);
 void	 rtm_bfd(struct bfd_config *);
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.387 ip_input.c
--- netinet/ip_input.c	16 Sep 2023 09:33:27 -0000	1.387
+++ netinet/ip_input.c	29 Jan 2024 22:24:13 -0000
@@ -1475,7 +1475,6 @@ ip_forward(struct mbuf *m, struct ifnet 
 {
 	struct mbuf mfake, *mcopy = NULL;
 	struct ip *ip = mtod(m, struct ip *);
-	struct sockaddr_in *sin;
 	struct route ro;
 	int error = 0, type = 0, code = 0, destmtu = 0, fake = 0, len;
 	u_int32_t dest;
@@ -1491,15 +1490,11 @@ ip_forward(struct mbuf *m, struct ifnet 
 		goto freecopy;
 	}
 
-	memset(&ro, 0, sizeof(ro));
-	sin = satosin(&ro.ro_dst);
-	sin->sin_family = AF_INET;
-	sin->sin_len = sizeof(*sin);
-	sin->sin_addr = ip->ip_dst;
-
+	ro.ro_rt = NULL;
+	route_validate(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
 	if (!rtisvalid(rt)) {
 		rtfree(rt);
-		rt = rtalloc_mpath(sintosa(sin), &ip->ip_src.s_addr,
+		rt = rtalloc_mpath(&ro.ro_dst, &ip->ip_src.s_addr,
 		    m->m_pkthdr.ph_rtableid);
 		if (rt == NULL) {
 			ipstat_inc(ips_noroute);
@@ -1507,6 +1502,7 @@ ip_forward(struct mbuf *m, struct ifnet 
 			return;
 		}
 	}
+	ro.ro_rt = rt;
 
 	/*
 	 * Save at most 68 bytes of the packet in case
@@ -1557,8 +1553,6 @@ ip_forward(struct mbuf *m, struct ifnet 
 		}
 	}
 
-	ro.ro_rt = rt;
-	ro.ro_tableid = m->m_pkthdr.ph_rtableid;
 	error = ip_output(m, NULL, &ro,
 	    (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
 	    NULL, NULL, 0);
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
diff -u -p -r1.393 ip_output.c
--- netinet/ip_output.c	18 Jan 2024 11:03:16 -0000	1.393
+++ netinet/ip_output.c	29 Jan 2024 22:24:13 -0000
@@ -159,28 +159,15 @@ reroute:
 	 */
 	if (ro == NULL) {
 		ro = &iproute;
-		memset(ro, 0, sizeof(*ro));
+		ro->ro_rt = NULL;
 	}
 
-	dst = satosin(&ro->ro_dst);
-
 	/*
 	 * If there is a cached route, check that it is to the same
 	 * destination and is still up.  If not, free it and try again.
 	 */
-	if (!rtisvalid(ro->ro_rt) ||
-	    dst->sin_addr.s_addr != ip->ip_dst.s_addr ||
-	    ro->ro_tableid != m->m_pkthdr.ph_rtableid) {
-		rtfree(ro->ro_rt);
-		ro->ro_rt = NULL;
-	}
-
-	if (ro->ro_rt == NULL) {
-		dst->sin_family = AF_INET;
-		dst->sin_len = sizeof(*dst);
-		dst->sin_addr = ip->ip_dst;
-		ro->ro_tableid = m->m_pkthdr.ph_rtableid;
-	}
+	route_validate(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
+	dst = satosin(&ro->ro_dst);
 
 	if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
 	    (ip->ip_dst.s_addr == INADDR_BROADCAST)) &&
Index: netinet6/in6.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6.h,v
diff -u -p -r1.112 in6.h
--- netinet6/in6.h	27 Jan 2024 21:13:46 -0000	1.112
+++ netinet6/in6.h	29 Jan 2024 22:31:51 -0000
@@ -145,10 +145,11 @@ extern const struct in6_addr in6addr_lin
 
 #if __BSD_VISIBLE
 /*
- * IPv6 route structure
+ * IPv6 route structure, keep fields in sync with struct route
  */
 struct route_in6 {
 	struct	rtentry *ro_rt;
+	u_long		 ro_generation;
 	u_long		 ro_tableid;	/* padded to long for alignment */
 	struct	sockaddr_in6 ro_dst;
 };