Index | Thread | Search

From:
Claudio Jeker <cjeker@diehard.n-r-g.com>
Subject:
Re: route cache multi path
To:
Alexander Bluhm <alexander.bluhm@gmx.net>
Cc:
tech@openbsd.org
Date:
Wed, 21 Feb 2024 11:14:52 +0100

Download raw body.

Thread
On Tue, Feb 20, 2024 at 10:54:20PM +0100, Alexander Bluhm wrote:
> Hi,
> 
> This makes the route cache aware of multipath routing.  If source
> address or multipath flags change, discard cached route.
> 
> ok?

I wonder about the RTF_MPATH check. Isn't the generation number preventing
this already? You can't add or clear RTF_MPATH (which is done by the
kernel) without adding or removing a route.

Also there is no need for ro_flags. If src == NULL then
ro->ro_srcin.s_addr must be INADDR_ANY to be a match else src and
ro->ro_srcin need to match. A similar check can be made for IPv6.

For the ipmultipath check I would suggest something similar. Just bump the
generation number in the sysctl (which your diff already does).
 
Also can we please pass dst and src the same whay in route_cache() passing
one by value and the other by reference is strange.

The use of struct in_addr / in6_addr for the source address is probably
something I would like to change at some point. I would prefer to use also
sockaddr there but that should be tackled after or with the rtalloc_mpath()
clean up.

> bluhm
> 
> Index: net/route.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
> diff -u -p -r1.432 route.c
> --- net/route.c	13 Feb 2024 12:22:09 -0000	1.432
> +++ net/route.c	20 Feb 2024 21:25:11 -0000
> @@ -202,7 +202,8 @@ route_init(void)
>  }
>  
>  int
> -route_cache(struct route *ro, struct in_addr addr, u_int rtableid)
> +route_cache(struct route *ro, struct in_addr dst, const struct in_addr *src,
> +    u_int rtableid)
>  {
>  	u_long gen;
>  
> @@ -213,28 +214,37 @@ route_cache(struct route *ro, struct in_
>  	    ro->ro_generation == gen &&
>  	    ro->ro_tableid == rtableid &&
>  	    ro->ro_dstsa.sa_family == AF_INET &&
> -	    ro->ro_dstsin.sin_addr.s_addr == addr.s_addr) {
> -		ipstat_inc(ips_rtcachehit);
> -		return (0);
> +	    ro->ro_dstsin.sin_addr.s_addr == dst.s_addr) {
> +		if (src == NULL || !ipmultipath ||
> +		    !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
> +		    (ISSET(ro->ro_flags, RTF_MPATH) &&
> +		    ro->ro_srcin.s_addr == src->s_addr)) {
> +			ipstat_inc(ips_rtcachehit);
> +			return (0);
> +		}
>  	}
>  
>  	ipstat_inc(ips_rtcachemiss);
>  	rtfree(ro->ro_rt);
> -	ro->ro_rt = NULL;
> +	memset(ro, 0, sizeof(*ro));
>  	ro->ro_generation = gen;
>  	ro->ro_tableid = rtableid;
>  
> -	memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
>  	ro->ro_dstsin.sin_family = AF_INET;
>  	ro->ro_dstsin.sin_len = sizeof(struct sockaddr_in);
> -	ro->ro_dstsin.sin_addr = addr;
> +	ro->ro_dstsin.sin_addr = dst;
> +	if (src != NULL) {
> +		ro->ro_srcin = *src;
> +		SET(ro->ro_flags, RTF_MPATH);
> +	}
>  
>  	return (ESRCH);
>  }
>  
>  #ifdef INET6
>  int
> -route6_cache(struct route *ro, const struct in6_addr *addr, u_int rtableid)
> +route6_cache(struct route *ro, const struct in6_addr *dst,
> +    const struct in6_addr *src, u_int rtableid)
>  {
>  	u_long gen;
>  
> @@ -245,21 +255,29 @@ route6_cache(struct route *ro, const str
>  	    ro->ro_generation == gen &&
>  	    ro->ro_tableid == rtableid &&
>  	    ro->ro_dstsa.sa_family == AF_INET6 &&
> -	    IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, addr)) {
> -		ip6stat_inc(ip6s_rtcachehit);
> -		return (0);
> +	    IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) {
> +		if (src == NULL || !ip6_multipath ||
> +		    !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
> +		    (ISSET(ro->ro_flags, RTF_MPATH) &&
> +		    IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) {
> +			ip6stat_inc(ip6s_rtcachehit);
> +			return (0);
> +		}
>  	}
>  
>  	ip6stat_inc(ip6s_rtcachemiss);
>  	rtfree(ro->ro_rt);
> -	ro->ro_rt = NULL;
> +	memset(ro, 0, sizeof(*ro));
>  	ro->ro_generation = gen;
>  	ro->ro_tableid = rtableid;
>  
> -	memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
>  	ro->ro_dstsin6.sin6_family = AF_INET6;
>  	ro->ro_dstsin6.sin6_len = sizeof(struct sockaddr_in6);
> -	ro->ro_dstsin6.sin6_addr = *addr;
> +	ro->ro_dstsin6.sin6_addr = *dst;
> +	if (src != NULL) {
> +		ro->ro_srcin6 = *src;
> +		SET(ro->ro_flags, RTF_MPATH);
> +	}
>  
>  	return (ESRCH);
>  }
> Index: net/route.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
> diff -u -p -r1.206 route.h
> --- net/route.h	13 Feb 2024 12:22:09 -0000	1.206
> +++ net/route.h	20 Feb 2024 18:19:42 -0000
> @@ -393,13 +393,15 @@ struct route {
>  	u_long		 ro_generation;
>  	u_long		 ro_tableid;	/* u_long because of alignment */
>  	union {
> -		struct	sockaddr	rod_sa;
> -		struct	sockaddr_in	rod_sin;
> -		struct	sockaddr_in6	rod_sin6;
> -	} ro_dst;
> -#define ro_dstsa	ro_dst.rod_sa
> -#define ro_dstsin	ro_dst.rod_sin
> -#define ro_dstsin6	ro_dst.rod_sin6
> +		struct	sockaddr	ro_dstsa;
> +		struct	sockaddr_in	ro_dstsin;
> +		struct	sockaddr_in6	ro_dstsin6;
> +	};
> +	union {
> +		struct	in_addr		ro_srcin;
> +		struct	in6_addr	ro_srcin6;
> +	};
> +	u_int		ro_flags;
>  };
>  
>  #endif /* __BSD_VISIBLE */
> @@ -462,8 +464,10 @@ struct if_ieee80211_data;
>  struct bfd_config;
>  
>  void	 route_init(void);
> -int	 route_cache(struct route *, struct in_addr, u_int);
> -int	 route6_cache(struct route *, const struct in6_addr *, u_int);
> +int	 route_cache(struct route *, struct in_addr, const struct in_addr *,
> +	    u_int);
> +int	 route6_cache(struct route *, const struct in6_addr *,
> +	    const struct in6_addr *, u_int);
>  void	 rtm_ifchg(struct ifnet *);
>  void	 rtm_ifannounce(struct ifnet *, int);
>  void	 rtm_bfd(struct bfd_config *);
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> diff -u -p -r1.293 in_pcb.c
> --- netinet/in_pcb.c	13 Feb 2024 12:22:09 -0000	1.293
> +++ netinet/in_pcb.c	20 Feb 2024 18:17:13 -0000
> @@ -919,7 +919,8 @@ in_pcbrtentry(struct inpcb *inp)
>  
>  	if (inp->inp_faddr.s_addr == INADDR_ANY)
>  		return (NULL);
> -	if (route_cache(ro, inp->inp_faddr, inp->inp_rtableid)) {
> +	if (route_cache(ro, inp->inp_faddr, &inp->inp_laddr,
> +	    inp->inp_rtableid)) {
>  		ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa,
>  		    &inp->inp_laddr.s_addr, ro->ro_tableid);
>  	}
> @@ -982,7 +983,7 @@ in_pcbselsrc(struct in_addr *insrc, stru
>  	 * If route is known or can be allocated now,
>  	 * our src addr is taken from the i/f, else punt.
>  	 */
> -	if (route_cache(ro, sin->sin_addr, rtableid)) {
> +	if (route_cache(ro, sin->sin_addr, NULL, rtableid)) {
>  		/* No route yet, so try to acquire one */
>  		ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid);
>  	}
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> diff -u -p -r1.389 ip_input.c
> --- netinet/ip_input.c	13 Feb 2024 12:22:09 -0000	1.389
> +++ netinet/ip_input.c	20 Feb 2024 18:46:25 -0000
> @@ -118,7 +118,6 @@ const struct sysctl_bounded_args ipctl_v
>  	{ IPCTL_IPPORT_HILASTAUTO, &ipport_hilastauto, 0, 65535 },
>  	{ IPCTL_IPPORT_MAXQUEUE, &ip_maxqueue, 0, 10000 },
>  	{ IPCTL_MFORWARDING, &ipmforwarding, 0, 1 },
> -	{ IPCTL_MULTIPATH, &ipmultipath, 0, 1 },
>  	{ IPCTL_ARPTIMEOUT, &arpt_keep, 0, INT_MAX },
>  	{ IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
>  };
> @@ -1491,7 +1490,7 @@ ip_forward(struct mbuf *m, struct ifnet 
>  	}
>  
>  	ro.ro_rt = NULL;
> -	route_cache(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
> +	route_cache(&ro, ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
>  	if (!rtisvalid(rt)) {
>  		rtfree(rt);
>  		rt = rtalloc_mpath(&ro.ro_dstsa, &ip->ip_src.s_addr,
> @@ -1633,10 +1632,10 @@ int
>  ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
>      size_t newlen)
>  {
> -	int error;
>  #ifdef MROUTING
>  	extern struct mrtstat mrtstat;
>  #endif
> +	int oldval, error;
>  
>  	/* Almost all sysctl names at this level are terminal. */
>  	if (namelen != 1 && name[0] != IPCTL_IFQUEUE &&
> @@ -1721,6 +1720,15 @@ ip_sysctl(int *name, u_int namelen, void
>  	case IPCTL_MRTVIF:
>  		return (EOPNOTSUPP);
>  #endif
> +	case IPCTL_MULTIPATH:
> +		NET_LOCK();
> +		oldval = ipmultipath;
> +		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> +		    &ipmultipath, 0, 1);
> +		if (oldval != ipmultipath)
> +			atomic_inc_long(&rtgeneration);
> +		NET_UNLOCK();
> +		return (error);
>  	default:
>  		NET_LOCK();
>  		error = sysctl_bounded_arr(ipctl_vars, nitems(ipctl_vars),
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
> diff -u -p -r1.395 ip_output.c
> --- netinet/ip_output.c	13 Feb 2024 12:22:09 -0000	1.395
> +++ netinet/ip_output.c	20 Feb 2024 18:17:13 -0000
> @@ -166,7 +166,7 @@ reroute:
>  	 * If there is a cached route, check that it is to the same
>  	 * destination and is still up.  If not, free it and try again.
>  	 */
> -	route_cache(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
> +	route_cache(ro, ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
>  	dst = &ro->ro_dstsin;
>  
>  	if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
> Index: netinet6/in6_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
> diff -u -p -r1.138 in6_pcb.c
> --- netinet6/in6_pcb.c	13 Feb 2024 12:22:09 -0000	1.138
> +++ netinet6/in6_pcb.c	20 Feb 2024 18:17:13 -0000
> @@ -565,7 +565,8 @@ in6_pcbrtentry(struct inpcb *inp)
>  
>  	if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
>  		return (NULL);
> -	if (route6_cache(ro, &inp->inp_faddr6, inp->inp_rtableid)) {
> +	if (route6_cache(ro, &inp->inp_faddr6, &inp->inp_laddr6,
> +	    inp->inp_rtableid)) {
>  		ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa,
>  		    &inp->inp_laddr6.s6_addr32[0], ro->ro_tableid);
>  	}
> Index: netinet6/in6_src.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_src.c,v
> diff -u -p -r1.94 in6_src.c
> --- netinet6/in6_src.c	13 Feb 2024 12:22:09 -0000	1.94
> +++ netinet6/in6_src.c	20 Feb 2024 18:17:13 -0000
> @@ -179,8 +179,8 @@ in6_pcbselsrc(const struct in6_addr **in
>  	 * If route is known or can be allocated now,
>  	 * our src addr is taken from the i/f, else punt.
>  	 */
> -	if (route6_cache(ro, dst, rtableid)) {
> -		ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE, ro->ro_tableid);
> +	if (route6_cache(ro, dst, NULL, rtableid)) {
> +		ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid);
>  	}
>  
>  	/*
> @@ -304,7 +304,7 @@ in6_selectroute(const struct in6_addr *d
>  	 * a new one.
>  	 */
>  	if (ro) {
> -		if (route6_cache(ro, dst, rtableid)) {
> +		if (route6_cache(ro, dst, NULL, rtableid)) {
>  			/* No route yet, so try to acquire one */
>  			ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL,
>  			    ro->ro_tableid);
> Index: netinet6/ip6_forward.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
> diff -u -p -r1.114 ip6_forward.c
> --- netinet6/ip6_forward.c	13 Feb 2024 12:22:09 -0000	1.114
> +++ netinet6/ip6_forward.c	20 Feb 2024 18:17:13 -0000
> @@ -166,7 +166,8 @@ reroute:
>  #endif /* IPSEC */
>  
>  	ro.ro_rt = NULL;
> -	route6_cache(&ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid);
> +	route6_cache(&ro, &ip6->ip6_dst, &ip6->ip6_src,
> +	    m->m_pkthdr.ph_rtableid);
>  	dst = &ro.ro_dstsa;
>  	if (!rtisvalid(rt)) {
>  		rtfree(rt);
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
> diff -u -p -r1.257 ip6_input.c
> --- netinet6/ip6_input.c	3 Dec 2023 20:36:24 -0000	1.257
> +++ netinet6/ip6_input.c	20 Feb 2024 18:46:09 -0000
> @@ -1451,7 +1451,6 @@ const struct sysctl_bounded_args ipv6ctl
>  	{ IPV6CTL_USE_DEPRECATED, &ip6_use_deprecated, 0, 1 },
>  	{ IPV6CTL_MAXFRAGS, &ip6_maxfrags, 0, 1000 },
>  	{ IPV6CTL_MFORWARDING, &ip6_mforwarding, 0, 1 },
> -	{ IPV6CTL_MULTIPATH, &ip6_multipath, 0, 1 },
>  	{ IPV6CTL_MCAST_PMTU, &ip6_mcast_pmtu, 0, 1 },
>  	{ IPV6CTL_NEIGHBORGCTHRESH, &ip6_neighborgcthresh, -1, 5 * 2048 },
>  	{ IPV6CTL_MAXDYNROUTES, &ip6_maxdynroutes, -1, 5 * 4096 },
> @@ -1499,7 +1498,7 @@ ip6_sysctl(int *name, u_int namelen, voi
>  #ifdef MROUTING
>  	extern struct mrt6stat mrt6stat;
>  #endif
> -	int error;
> +	int oldval, error;
>  
>  	/* Almost all sysctl names at this level are terminal. */
>  	if (namelen != 1 && name[0] != IPV6CTL_IFQUEUE)
> @@ -1551,6 +1550,15 @@ ip6_sysctl(int *name, u_int namelen, voi
>  		    oldp, oldlenp, newp, newlen, &ip6intrq));
>  	case IPV6CTL_SOIIKEY:
>  		return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
> +	case IPV6CTL_MULTIPATH:
> +		NET_LOCK();
> +		oldval = ip6_multipath;
> +		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> +		    &ip6_multipath, 0, 1);
> +		if (oldval != ip6_multipath)
> +			atomic_inc_long(&rtgeneration);
> +		NET_UNLOCK();
> +		return (error);
>  	default:
>  		NET_LOCK();
>  		error = sysctl_bounded_arr(ipv6ctl_vars, nitems(ipv6ctl_vars),
> Index: netinet6/ip6_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
> diff -u -p -r1.286 ip6_output.c
> --- netinet6/ip6_output.c	13 Feb 2024 12:22:09 -0000	1.286
> +++ netinet6/ip6_output.c	20 Feb 2024 18:17:13 -0000
> @@ -480,7 +480,7 @@ reroute:
>  			goto bad;
>  		}
>  	} else {
> -		route6_cache(ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid);
> +		route6_cache(ro, &ip6->ip6_dst, NULL, m->m_pkthdr.ph_rtableid);
>  	}
>  
>  	if (rt && (rt->rt_flags & RTF_GATEWAY) &&
> 

-- 
:wq Claudio