Index | Thread | Search

From:
Alexander Bluhm <alexander.bluhm@gmx.net>
Subject:
Re: route cache multi path
To:
tech@openbsd.org
Date:
Wed, 21 Feb 2024 15:31:24 +0100

Download raw body.

Thread
On Wed, Feb 21, 2024 at 11:14:52AM +0100, Claudio Jeker wrote:
> On Tue, Feb 20, 2024 at 10:54:20PM +0100, Alexander Bluhm wrote:
> > Hi,
> > 
> > This makes the route cache aware of multipath routing.  If source
> > address or multipath flags change, discard cached route.
> > 
> > ok?
> 
> I wonder about the RTF_MPATH check. Isn't the generation number preventing
> this already? You can't add or clear RTF_MPATH (which is done by the
> kernel) without adding or removing a route.

+                   !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||

This one is needed to ignore src for routes that do not have RTF_MPATH
set.  If route_cache() is called with a src that does not match the
src in the route cache, use the non-multipath route anyway.

> Also there is no need for ro_flags. If src == NULL then
> ro->ro_srcin.s_addr must be INADDR_ANY to be a match else src and
> ro->ro_srcin need to match. A similar check can be made for IPv6.

I was also not happy about ro_flags.  My first atempt was to signal
it inline with INADDR_ANY src in struct route.  But I was not sure
if a network packet with unspecified source is equivalent to calling
route_cache() with src NULL.  So decided against inband signaling
and added a flag.  The code gets nicer without flag.  As you confirm
that there is no semantic difference between src = NULL and *src =
INADDR_ANY, I will change that.

> For the ipmultipath check I would suggest something similar. Just bump the
> generation number in the sysctl (which your diff already does).

If ipmultipath is false, all routes are valid, no matter what src
is.  The check is there, to ignore ro->ro_srcin.s_addr == src->s_addr
in this case.

> Also can we please pass dst and src the same whay in route_cache() passing
> one by value and the other by reference is strange.

Dst is necessary, src is optional.  But IPv6 dst is too long to be
passed by value, so it is always a pointer.  Passing v4 by value
and v6 by pointer is done elsewhere.  I was also unsure about that
aproach.  I will change that.

> The use of struct in_addr / in6_addr for the source address is probably
> something I would like to change at some point. I would prefer to use also
> sockaddr there but that should be tackled after or with the rtalloc_mpath()
> clean up.

That is a different story.  I have not seen yet why in_addr src is
bad.  The current uint32_t *src is worse.  Maybe I can combine
route_cache() and rtalloc_mpath() into route_mpath() that does cache
and route lookup together.  Then it gets in_addr and all the sockaddr
creation is done there.  Will try that later.

New diff, ok?

bluhm

Index: net/route.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
diff -u -p -r1.432 route.c
--- net/route.c	13 Feb 2024 12:22:09 -0000	1.432
+++ net/route.c	21 Feb 2024 14:17:24 -0000
@@ -202,7 +202,8 @@ route_init(void)
 }
 
 int
-route_cache(struct route *ro, struct in_addr addr, u_int rtableid)
+route_cache(struct route *ro, const struct in_addr *dst,
+    const struct in_addr *src, u_int rtableid)
 {
 	u_long gen;
 
@@ -213,28 +214,35 @@ route_cache(struct route *ro, struct in_
 	    ro->ro_generation == gen &&
 	    ro->ro_tableid == rtableid &&
 	    ro->ro_dstsa.sa_family == AF_INET &&
-	    ro->ro_dstsin.sin_addr.s_addr == addr.s_addr) {
-		ipstat_inc(ips_rtcachehit);
-		return (0);
+	    ro->ro_dstsin.sin_addr.s_addr == dst->s_addr) {
+		if (src == NULL || !ipmultipath ||
+		    !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
+		    (ro->ro_srcin.s_addr != INADDR_ANY &&
+		    ro->ro_srcin.s_addr == src->s_addr)) {
+			ipstat_inc(ips_rtcachehit);
+			return (0);
+		}
 	}
 
 	ipstat_inc(ips_rtcachemiss);
 	rtfree(ro->ro_rt);
-	ro->ro_rt = NULL;
+	memset(ro, 0, sizeof(*ro));
 	ro->ro_generation = gen;
 	ro->ro_tableid = rtableid;
 
-	memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
 	ro->ro_dstsin.sin_family = AF_INET;
 	ro->ro_dstsin.sin_len = sizeof(struct sockaddr_in);
-	ro->ro_dstsin.sin_addr = addr;
+	ro->ro_dstsin.sin_addr = *dst;
+	if (src != NULL)
+		ro->ro_srcin = *src;
 
 	return (ESRCH);
 }
 
 #ifdef INET6
 int
-route6_cache(struct route *ro, const struct in6_addr *addr, u_int rtableid)
+route6_cache(struct route *ro, const struct in6_addr *dst,
+    const struct in6_addr *src, u_int rtableid)
 {
 	u_long gen;
 
@@ -245,21 +253,27 @@ route6_cache(struct route *ro, const str
 	    ro->ro_generation == gen &&
 	    ro->ro_tableid == rtableid &&
 	    ro->ro_dstsa.sa_family == AF_INET6 &&
-	    IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, addr)) {
-		ip6stat_inc(ip6s_rtcachehit);
-		return (0);
+	    IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) {
+		if (src == NULL || !ip6_multipath ||
+		    !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
+		    (!IN6_IS_ADDR_UNSPECIFIED(&ro->ro_srcin6) &&
+		    IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) {
+			ip6stat_inc(ip6s_rtcachehit);
+			return (0);
+		}
 	}
 
 	ip6stat_inc(ip6s_rtcachemiss);
 	rtfree(ro->ro_rt);
-	ro->ro_rt = NULL;
+	memset(ro, 0, sizeof(*ro));
 	ro->ro_generation = gen;
 	ro->ro_tableid = rtableid;
 
-	memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
 	ro->ro_dstsin6.sin6_family = AF_INET6;
 	ro->ro_dstsin6.sin6_len = sizeof(struct sockaddr_in6);
-	ro->ro_dstsin6.sin6_addr = *addr;
+	ro->ro_dstsin6.sin6_addr = *dst;
+	if (src != NULL)
+		ro->ro_srcin6 = *src;
 
 	return (ESRCH);
 }
Index: net/route.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
diff -u -p -r1.206 route.h
--- net/route.h	13 Feb 2024 12:22:09 -0000	1.206
+++ net/route.h	21 Feb 2024 14:13:17 -0000
@@ -393,13 +393,14 @@ struct route {
 	u_long		 ro_generation;
 	u_long		 ro_tableid;	/* u_long because of alignment */
 	union {
-		struct	sockaddr	rod_sa;
-		struct	sockaddr_in	rod_sin;
-		struct	sockaddr_in6	rod_sin6;
-	} ro_dst;
-#define ro_dstsa	ro_dst.rod_sa
-#define ro_dstsin	ro_dst.rod_sin
-#define ro_dstsin6	ro_dst.rod_sin6
+		struct	sockaddr	ro_dstsa;
+		struct	sockaddr_in	ro_dstsin;
+		struct	sockaddr_in6	ro_dstsin6;
+	};
+	union {
+		struct	in_addr		ro_srcin;
+		struct	in6_addr	ro_srcin6;
+	};
 };
 
 #endif /* __BSD_VISIBLE */
@@ -462,8 +463,10 @@ struct if_ieee80211_data;
 struct bfd_config;
 
 void	 route_init(void);
-int	 route_cache(struct route *, struct in_addr, u_int);
-int	 route6_cache(struct route *, const struct in6_addr *, u_int);
+int	 route_cache(struct route *, const struct in_addr *,
+	    const struct in_addr *, u_int);
+int	 route6_cache(struct route *, const struct in6_addr *,
+	    const struct in6_addr *, u_int);
 void	 rtm_ifchg(struct ifnet *);
 void	 rtm_ifannounce(struct ifnet *, int);
 void	 rtm_bfd(struct bfd_config *);
Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.293 in_pcb.c
--- netinet/in_pcb.c	13 Feb 2024 12:22:09 -0000	1.293
+++ netinet/in_pcb.c	21 Feb 2024 14:10:59 -0000
@@ -919,7 +919,8 @@ in_pcbrtentry(struct inpcb *inp)
 
 	if (inp->inp_faddr.s_addr == INADDR_ANY)
 		return (NULL);
-	if (route_cache(ro, inp->inp_faddr, inp->inp_rtableid)) {
+	if (route_cache(ro, &inp->inp_faddr, &inp->inp_laddr,
+	    inp->inp_rtableid)) {
 		ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa,
 		    &inp->inp_laddr.s_addr, ro->ro_tableid);
 	}
@@ -982,7 +983,7 @@ in_pcbselsrc(struct in_addr *insrc, stru
 	 * If route is known or can be allocated now,
 	 * our src addr is taken from the i/f, else punt.
 	 */
-	if (route_cache(ro, sin->sin_addr, rtableid)) {
+	if (route_cache(ro, &sin->sin_addr, NULL, rtableid)) {
 		/* No route yet, so try to acquire one */
 		ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid);
 	}
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.389 ip_input.c
--- netinet/ip_input.c	13 Feb 2024 12:22:09 -0000	1.389
+++ netinet/ip_input.c	21 Feb 2024 14:11:03 -0000
@@ -118,7 +118,6 @@ const struct sysctl_bounded_args ipctl_v
 	{ IPCTL_IPPORT_HILASTAUTO, &ipport_hilastauto, 0, 65535 },
 	{ IPCTL_IPPORT_MAXQUEUE, &ip_maxqueue, 0, 10000 },
 	{ IPCTL_MFORWARDING, &ipmforwarding, 0, 1 },
-	{ IPCTL_MULTIPATH, &ipmultipath, 0, 1 },
 	{ IPCTL_ARPTIMEOUT, &arpt_keep, 0, INT_MAX },
 	{ IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
 };
@@ -1491,7 +1490,7 @@ ip_forward(struct mbuf *m, struct ifnet 
 	}
 
 	ro.ro_rt = NULL;
-	route_cache(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
+	route_cache(&ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
 	if (!rtisvalid(rt)) {
 		rtfree(rt);
 		rt = rtalloc_mpath(&ro.ro_dstsa, &ip->ip_src.s_addr,
@@ -1633,10 +1632,10 @@ int
 ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen)
 {
-	int error;
 #ifdef MROUTING
 	extern struct mrtstat mrtstat;
 #endif
+	int oldval, error;
 
 	/* Almost all sysctl names at this level are terminal. */
 	if (namelen != 1 && name[0] != IPCTL_IFQUEUE &&
@@ -1721,6 +1720,15 @@ ip_sysctl(int *name, u_int namelen, void
 	case IPCTL_MRTVIF:
 		return (EOPNOTSUPP);
 #endif
+	case IPCTL_MULTIPATH:
+		NET_LOCK();
+		oldval = ipmultipath;
+		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
+		    &ipmultipath, 0, 1);
+		if (oldval != ipmultipath)
+			atomic_inc_long(&rtgeneration);
+		NET_UNLOCK();
+		return (error);
 	default:
 		NET_LOCK();
 		error = sysctl_bounded_arr(ipctl_vars, nitems(ipctl_vars),
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
diff -u -p -r1.395 ip_output.c
--- netinet/ip_output.c	13 Feb 2024 12:22:09 -0000	1.395
+++ netinet/ip_output.c	21 Feb 2024 14:11:07 -0000
@@ -166,7 +166,7 @@ reroute:
 	 * If there is a cached route, check that it is to the same
 	 * destination and is still up.  If not, free it and try again.
 	 */
-	route_cache(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
+	route_cache(ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
 	dst = &ro->ro_dstsin;
 
 	if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
diff -u -p -r1.138 in6_pcb.c
--- netinet6/in6_pcb.c	13 Feb 2024 12:22:09 -0000	1.138
+++ netinet6/in6_pcb.c	21 Feb 2024 14:04:19 -0000
@@ -565,7 +565,8 @@ in6_pcbrtentry(struct inpcb *inp)
 
 	if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
 		return (NULL);
-	if (route6_cache(ro, &inp->inp_faddr6, inp->inp_rtableid)) {
+	if (route6_cache(ro, &inp->inp_faddr6, &inp->inp_laddr6,
+	    inp->inp_rtableid)) {
 		ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa,
 		    &inp->inp_laddr6.s6_addr32[0], ro->ro_tableid);
 	}
Index: netinet6/in6_src.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_src.c,v
diff -u -p -r1.94 in6_src.c
--- netinet6/in6_src.c	13 Feb 2024 12:22:09 -0000	1.94
+++ netinet6/in6_src.c	21 Feb 2024 14:04:19 -0000
@@ -179,8 +179,8 @@ in6_pcbselsrc(const struct in6_addr **in
 	 * If route is known or can be allocated now,
 	 * our src addr is taken from the i/f, else punt.
 	 */
-	if (route6_cache(ro, dst, rtableid)) {
-		ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE, ro->ro_tableid);
+	if (route6_cache(ro, dst, NULL, rtableid)) {
+		ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid);
 	}
 
 	/*
@@ -304,7 +304,7 @@ in6_selectroute(const struct in6_addr *d
 	 * a new one.
 	 */
 	if (ro) {
-		if (route6_cache(ro, dst, rtableid)) {
+		if (route6_cache(ro, dst, NULL, rtableid)) {
 			/* No route yet, so try to acquire one */
 			ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL,
 			    ro->ro_tableid);
Index: netinet6/ip6_forward.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
diff -u -p -r1.114 ip6_forward.c
--- netinet6/ip6_forward.c	13 Feb 2024 12:22:09 -0000	1.114
+++ netinet6/ip6_forward.c	21 Feb 2024 14:04:19 -0000
@@ -166,7 +166,8 @@ reroute:
 #endif /* IPSEC */
 
 	ro.ro_rt = NULL;
-	route6_cache(&ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid);
+	route6_cache(&ro, &ip6->ip6_dst, &ip6->ip6_src,
+	    m->m_pkthdr.ph_rtableid);
 	dst = &ro.ro_dstsa;
 	if (!rtisvalid(rt)) {
 		rtfree(rt);
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
diff -u -p -r1.257 ip6_input.c
--- netinet6/ip6_input.c	3 Dec 2023 20:36:24 -0000	1.257
+++ netinet6/ip6_input.c	21 Feb 2024 14:08:14 -0000
@@ -1451,7 +1451,6 @@ const struct sysctl_bounded_args ipv6ctl
 	{ IPV6CTL_USE_DEPRECATED, &ip6_use_deprecated, 0, 1 },
 	{ IPV6CTL_MAXFRAGS, &ip6_maxfrags, 0, 1000 },
 	{ IPV6CTL_MFORWARDING, &ip6_mforwarding, 0, 1 },
-	{ IPV6CTL_MULTIPATH, &ip6_multipath, 0, 1 },
 	{ IPV6CTL_MCAST_PMTU, &ip6_mcast_pmtu, 0, 1 },
 	{ IPV6CTL_NEIGHBORGCTHRESH, &ip6_neighborgcthresh, -1, 5 * 2048 },
 	{ IPV6CTL_MAXDYNROUTES, &ip6_maxdynroutes, -1, 5 * 4096 },
@@ -1499,7 +1498,7 @@ ip6_sysctl(int *name, u_int namelen, voi
 #ifdef MROUTING
 	extern struct mrt6stat mrt6stat;
 #endif
-	int error;
+	int oldval, error;
 
 	/* Almost all sysctl names at this level are terminal. */
 	if (namelen != 1 && name[0] != IPV6CTL_IFQUEUE)
@@ -1551,6 +1550,15 @@ ip6_sysctl(int *name, u_int namelen, voi
 		    oldp, oldlenp, newp, newlen, &ip6intrq));
 	case IPV6CTL_SOIIKEY:
 		return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
+	case IPV6CTL_MULTIPATH:
+		NET_LOCK();
+		oldval = ip6_multipath;
+		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
+		    &ip6_multipath, 0, 1);
+		if (oldval != ip6_multipath)
+			atomic_inc_long(&rtgeneration);
+		NET_UNLOCK();
+		return (error);
 	default:
 		NET_LOCK();
 		error = sysctl_bounded_arr(ipv6ctl_vars, nitems(ipv6ctl_vars),
Index: netinet6/ip6_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
diff -u -p -r1.286 ip6_output.c
--- netinet6/ip6_output.c	13 Feb 2024 12:22:09 -0000	1.286
+++ netinet6/ip6_output.c	21 Feb 2024 14:04:19 -0000
@@ -480,7 +480,7 @@ reroute:
 			goto bad;
 		}
 	} else {
-		route6_cache(ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid);
+		route6_cache(ro, &ip6->ip6_dst, NULL, m->m_pkthdr.ph_rtableid);
 	}
 
 	if (rt && (rt->rt_flags & RTF_GATEWAY) &&