Index | Thread | Search

From:
Alexander Bluhm <alexander.bluhm@gmx.net>
Subject:
Re: route generation number for route cache
To:
Mark Kettenis <mark.kettenis@xs4all.nl>, tech@openbsd.org, Claudio Jeker <claudio@openbsd.org>
Date:
Wed, 31 Jan 2024 14:17:21 +0100

Download raw body.

Thread
On Wed, Jan 31, 2024 at 10:53:09AM +0100, Claudio Jeker wrote:
> Fine by me. It is a bummer that our atomic functions are not memory model
> aware. It seems like one of those footguns that just keep lingering around
> and fire at random.

Atomic operation is for changing one memory location from different
CPU simultanously.  Memory barrier keeps the view on different
memory locations consistent over multiple CPU.  For generation
numbers you need both.  That is the model our CPU, C compiler, and
kernel have.  Maybe C11 is smarter, but that does not matter for
this diff.

> Why is the function called route_validate()? I find this name very
> confusing since it does not really validate the route. It actually
> invalidates the cached route object if needed.
> 
> Isn't there a better name for this? Maybe route_cachecheck() or some other
> form.

I was not happy with the name either.  Just call it route_cache()
for now.

> Last but not least I despise struct route it is one of those things that
> should not exists like this since it embedds a struct sockaddr and for
> IPv6 there needs to be a different struct with bigger size (struct
> route_in6).

There was generic struct route for all domains.  Then came IPv6
which does not fit.  All other domains were removed, so generic
struct route is IPv4 only.

We should have struct route_in and struct route_in6 which contain
specific sockaddr.  I can rename that in a later diff.

> The cherry on top of this turd is that it holds copies of data
> that are readily available (which your diff shows here).

I don't get that.  Route cache has route, generation number, routing
table and socket address.  If the latter match and the route is
valid, the cache is valid.  I don't see redundancy.  Or do you think
struct route_in6 should have in6_addr instead of sockaddr_in6?
rtalloc() takes a struct sockaddr so it is handy to have a sockaddr_in
or sockaddr_in6 in the cache.

Anyway, I would like to get this in.  I have IPv6 follow up diffs
and renaming struct route now would make merging hard.  Can I get
the generation nummber commited first and then make the cache more
consistent?

New diff, only function route_cache() renamed.

ok?

bluhm

Index: net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
diff -u -p -r1.426 route.c
--- net/route.c	13 Nov 2023 17:18:27 -0000	1.426
+++ net/route.c	31 Jan 2024 12:55:19 -0000
@@ -140,6 +140,7 @@
 
 /*
  * Locks used to protect struct members:
+ *      a       atomic operations
  *      I       immutable after creation
  *      L       rtlabel_mtx
  *      T       rttimer_mtx
@@ -152,8 +153,9 @@ static uint32_t		rt_hashjitter;
 
 extern unsigned int	rtmap_limit;
 
-struct cpumem *		rtcounters;
-int			rttrash;	/* routes not in table but not freed */
+struct cpumem	*rtcounters;
+int		 rttrash;	/* [a] routes not in table but not freed */
+u_long		 rtgeneration;	/* [a] generation number, routes changed */
 
 struct pool	rtentry_pool;		/* pool for rtentry structures */
 struct pool	rttimer_pool;		/* pool for rttimer structures */
@@ -199,6 +201,33 @@ route_init(void)
 #endif
 }
 
+void
+route_cache(struct route *ro, struct in_addr addr, u_int rtableid)
+{
+	u_long gen;
+
+	gen = atomic_load_long(&rtgeneration);
+	membar_consumer();
+
+	if (rtisvalid(ro->ro_rt) &&
+	    ro->ro_generation == gen &&
+	    ro->ro_tableid == rtableid &&
+	    ro->ro_dst.sa_family == AF_INET &&
+	    satosin(&ro->ro_dst)->sin_addr.s_addr == addr.s_addr) {
+		return;
+	}
+
+	rtfree(ro->ro_rt);
+	ro->ro_rt = NULL;
+	ro->ro_generation = gen;
+	ro->ro_tableid = rtableid;
+
+	memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
+	satosin(&ro->ro_dst)->sin_family = AF_INET;
+	satosin(&ro->ro_dst)->sin_len = sizeof(struct sockaddr_in);
+	satosin(&ro->ro_dst)->sin_addr = addr;
+}
+
 /*
  * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise.
  */
@@ -824,6 +853,9 @@ rtrequest_delete(struct rt_addrinfo *inf
 	else
 		rtfree(rt);
 
+	membar_producer();
+	atomic_inc_long(&rtgeneration);
+
 	return (0);
 }
 
@@ -992,6 +1024,10 @@ rtrequest(int req, struct rt_addrinfo *i
 			*ret_nrt = rt;
 		else
 			rtfree(rt);
+
+		membar_producer();
+		atomic_inc_long(&rtgeneration);
+
 		break;
 	}
 
@@ -1828,6 +1864,9 @@ rt_if_linkstate_change(struct rtentry *r
 		    rt->rt_priority | RTP_DOWN, rt);
 	}
 	if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask));
+
+	membar_producer();
+	atomic_inc_long(&rtgeneration);
 
 	return (error);
 }
Index: net/route.h
===================================================================
RCS file: /cvs/src/sys/net/route.h,v
diff -u -p -r1.203 route.h
--- net/route.h	12 Nov 2023 17:51:40 -0000	1.203
+++ net/route.h	31 Jan 2024 12:55:19 -0000
@@ -377,6 +377,7 @@ struct sockaddr_rtsearch {
  */
 struct route {
 	struct	rtentry *ro_rt;
+	u_long		 ro_generation;
 	u_long		 ro_tableid;	/* u_long because of alignment */
 	struct	sockaddr ro_dst;
 };
@@ -438,15 +439,18 @@ void		 rtlabel_unref(u_int16_t);
 #define	RT_RESOLVE	1
 
 extern struct rtstat rtstat;
+extern u_long rtgeneration;
 
 struct mbuf;
 struct socket;
 struct ifnet;
+struct in_addr;
 struct sockaddr_in6;
 struct if_ieee80211_data;
 struct bfd_config;
 
 void	 route_init(void);
+void	 route_cache(struct route *, struct in_addr, u_int);
 void	 rtm_ifchg(struct ifnet *);
 void	 rtm_ifannounce(struct ifnet *, int);
 void	 rtm_bfd(struct bfd_config *);
Index: netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.387 ip_input.c
--- netinet/ip_input.c	16 Sep 2023 09:33:27 -0000	1.387
+++ netinet/ip_input.c	31 Jan 2024 12:55:20 -0000
@@ -1475,7 +1475,6 @@ ip_forward(struct mbuf *m, struct ifnet 
 {
 	struct mbuf mfake, *mcopy = NULL;
 	struct ip *ip = mtod(m, struct ip *);
-	struct sockaddr_in *sin;
 	struct route ro;
 	int error = 0, type = 0, code = 0, destmtu = 0, fake = 0, len;
 	u_int32_t dest;
@@ -1491,15 +1490,11 @@ ip_forward(struct mbuf *m, struct ifnet 
 		goto freecopy;
 	}
 
-	memset(&ro, 0, sizeof(ro));
-	sin = satosin(&ro.ro_dst);
-	sin->sin_family = AF_INET;
-	sin->sin_len = sizeof(*sin);
-	sin->sin_addr = ip->ip_dst;
-
+	ro.ro_rt = NULL;
+	route_cache(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
 	if (!rtisvalid(rt)) {
 		rtfree(rt);
-		rt = rtalloc_mpath(sintosa(sin), &ip->ip_src.s_addr,
+		rt = rtalloc_mpath(&ro.ro_dst, &ip->ip_src.s_addr,
 		    m->m_pkthdr.ph_rtableid);
 		if (rt == NULL) {
 			ipstat_inc(ips_noroute);
@@ -1507,6 +1502,7 @@ ip_forward(struct mbuf *m, struct ifnet 
 			return;
 		}
 	}
+	ro.ro_rt = rt;
 
 	/*
 	 * Save at most 68 bytes of the packet in case
@@ -1557,8 +1553,6 @@ ip_forward(struct mbuf *m, struct ifnet 
 		}
 	}
 
-	ro.ro_rt = rt;
-	ro.ro_tableid = m->m_pkthdr.ph_rtableid;
 	error = ip_output(m, NULL, &ro,
 	    (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
 	    NULL, NULL, 0);
Index: netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
diff -u -p -r1.393 ip_output.c
--- netinet/ip_output.c	18 Jan 2024 11:03:16 -0000	1.393
+++ netinet/ip_output.c	31 Jan 2024 12:55:20 -0000
@@ -159,28 +159,15 @@ reroute:
 	 */
 	if (ro == NULL) {
 		ro = &iproute;
-		memset(ro, 0, sizeof(*ro));
+		ro->ro_rt = NULL;
 	}
 
-	dst = satosin(&ro->ro_dst);
-
 	/*
 	 * If there is a cached route, check that it is to the same
 	 * destination and is still up.  If not, free it and try again.
 	 */
-	if (!rtisvalid(ro->ro_rt) ||
-	    dst->sin_addr.s_addr != ip->ip_dst.s_addr ||
-	    ro->ro_tableid != m->m_pkthdr.ph_rtableid) {
-		rtfree(ro->ro_rt);
-		ro->ro_rt = NULL;
-	}
-
-	if (ro->ro_rt == NULL) {
-		dst->sin_family = AF_INET;
-		dst->sin_len = sizeof(*dst);
-		dst->sin_addr = ip->ip_dst;
-		ro->ro_tableid = m->m_pkthdr.ph_rtableid;
-	}
+	route_cache(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
+	dst = satosin(&ro->ro_dst);
 
 	if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
 	    (ip->ip_dst.s_addr == INADDR_BROADCAST)) &&
Index: netinet6/in6.h
===================================================================
RCS file: /cvs/src/sys/netinet6/in6.h,v
diff -u -p -r1.112 in6.h
--- netinet6/in6.h	27 Jan 2024 21:13:46 -0000	1.112
+++ netinet6/in6.h	31 Jan 2024 12:55:20 -0000
@@ -145,10 +145,11 @@ extern const struct in6_addr in6addr_lin
 
 #if __BSD_VISIBLE
 /*
- * IPv6 route structure
+ * IPv6 route structure, keep fields in sync with struct route
  */
 struct route_in6 {
 	struct	rtentry *ro_rt;
+	u_long		 ro_generation;
 	u_long		 ro_tableid;	/* padded to long for alignment */
 	struct	sockaddr_in6 ro_dst;
 };