Download raw body.
route cache multi path
On Wed, Feb 21, 2024 at 11:14:52AM +0100, Claudio Jeker wrote:
> On Tue, Feb 20, 2024 at 10:54:20PM +0100, Alexander Bluhm wrote:
> > Hi,
> >
> > This makes the route cache aware of multipath routing. If source
> > address or multipath flags change, discard cached route.
> >
> > ok?
>
> I wonder about the RTF_MPATH check. Isn't the generation number preventing
> this already? You can't add or clear RTF_MPATH (which is done by the
> kernel) without adding or removing a route.
+ !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
This one is needed to ignore src for routes that do not have RTF_MPATH
set. If route_cache() is called with a src that does not match the
src in the route cache, use the non-multipath route anyway.
> Also there is no need for ro_flags. If src == NULL then
> ro->ro_srcin.s_addr must be INADDR_ANY to be a match else src and
> ro->ro_srcin need to match. A similar check can be made for IPv6.
I was also not happy about ro_flags. My first atempt was to signal
it inline with INADDR_ANY src in struct route. But I was not sure
if a network packet with unspecified source is equivalent to calling
route_cache() with src NULL. So decided against inband signaling
and added a flag. The code gets nicer without flag. As you confirm
that there is no semantic difference between src = NULL and *src =
INADDR_ANY, I will change that.
> For the ipmultipath check I would suggest something similar. Just bump the
> generation number in the sysctl (which your diff already does).
If ipmultipath is false, all routes are valid, no matter what src
is. The check is there, to ignore ro->ro_srcin.s_addr == src->s_addr
in this case.
> Also can we please pass dst and src the same whay in route_cache() passing
> one by value and the other by reference is strange.
Dst is necessary, src is optional. But IPv6 dst is too long to be
passed by value, so it is always a pointer. Passing v4 by value
and v6 by pointer is done elsewhere. I was also unsure about that
aproach. I will change that.
> The use of struct in_addr / in6_addr for the source address is probably
> something I would like to change at some point. I would prefer to use also
> sockaddr there but that should be tackled after or with the rtalloc_mpath()
> clean up.
That is a different story. I have not seen yet why in_addr src is
bad. The current uint32_t *src is worse. Maybe I can combine
route_cache() and rtalloc_mpath() into route_mpath() that does cache
and route lookup together. Then it gets in_addr and all the sockaddr
creation is done there. Will try that later.
New diff, ok?
bluhm
Index: net/route.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
diff -u -p -r1.432 route.c
--- net/route.c 13 Feb 2024 12:22:09 -0000 1.432
+++ net/route.c 21 Feb 2024 14:17:24 -0000
@@ -202,7 +202,8 @@ route_init(void)
}
int
-route_cache(struct route *ro, struct in_addr addr, u_int rtableid)
+route_cache(struct route *ro, const struct in_addr *dst,
+ const struct in_addr *src, u_int rtableid)
{
u_long gen;
@@ -213,28 +214,35 @@ route_cache(struct route *ro, struct in_
ro->ro_generation == gen &&
ro->ro_tableid == rtableid &&
ro->ro_dstsa.sa_family == AF_INET &&
- ro->ro_dstsin.sin_addr.s_addr == addr.s_addr) {
- ipstat_inc(ips_rtcachehit);
- return (0);
+ ro->ro_dstsin.sin_addr.s_addr == dst->s_addr) {
+ if (src == NULL || !ipmultipath ||
+ !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
+ (ro->ro_srcin.s_addr != INADDR_ANY &&
+ ro->ro_srcin.s_addr == src->s_addr)) {
+ ipstat_inc(ips_rtcachehit);
+ return (0);
+ }
}
ipstat_inc(ips_rtcachemiss);
rtfree(ro->ro_rt);
- ro->ro_rt = NULL;
+ memset(ro, 0, sizeof(*ro));
ro->ro_generation = gen;
ro->ro_tableid = rtableid;
- memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
ro->ro_dstsin.sin_family = AF_INET;
ro->ro_dstsin.sin_len = sizeof(struct sockaddr_in);
- ro->ro_dstsin.sin_addr = addr;
+ ro->ro_dstsin.sin_addr = *dst;
+ if (src != NULL)
+ ro->ro_srcin = *src;
return (ESRCH);
}
#ifdef INET6
int
-route6_cache(struct route *ro, const struct in6_addr *addr, u_int rtableid)
+route6_cache(struct route *ro, const struct in6_addr *dst,
+ const struct in6_addr *src, u_int rtableid)
{
u_long gen;
@@ -245,21 +253,27 @@ route6_cache(struct route *ro, const str
ro->ro_generation == gen &&
ro->ro_tableid == rtableid &&
ro->ro_dstsa.sa_family == AF_INET6 &&
- IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, addr)) {
- ip6stat_inc(ip6s_rtcachehit);
- return (0);
+ IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) {
+ if (src == NULL || !ip6_multipath ||
+ !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
+ (!IN6_IS_ADDR_UNSPECIFIED(&ro->ro_srcin6) &&
+ IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) {
+ ip6stat_inc(ip6s_rtcachehit);
+ return (0);
+ }
}
ip6stat_inc(ip6s_rtcachemiss);
rtfree(ro->ro_rt);
- ro->ro_rt = NULL;
+ memset(ro, 0, sizeof(*ro));
ro->ro_generation = gen;
ro->ro_tableid = rtableid;
- memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
ro->ro_dstsin6.sin6_family = AF_INET6;
ro->ro_dstsin6.sin6_len = sizeof(struct sockaddr_in6);
- ro->ro_dstsin6.sin6_addr = *addr;
+ ro->ro_dstsin6.sin6_addr = *dst;
+ if (src != NULL)
+ ro->ro_srcin6 = *src;
return (ESRCH);
}
Index: net/route.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
diff -u -p -r1.206 route.h
--- net/route.h 13 Feb 2024 12:22:09 -0000 1.206
+++ net/route.h 21 Feb 2024 14:13:17 -0000
@@ -393,13 +393,14 @@ struct route {
u_long ro_generation;
u_long ro_tableid; /* u_long because of alignment */
union {
- struct sockaddr rod_sa;
- struct sockaddr_in rod_sin;
- struct sockaddr_in6 rod_sin6;
- } ro_dst;
-#define ro_dstsa ro_dst.rod_sa
-#define ro_dstsin ro_dst.rod_sin
-#define ro_dstsin6 ro_dst.rod_sin6
+ struct sockaddr ro_dstsa;
+ struct sockaddr_in ro_dstsin;
+ struct sockaddr_in6 ro_dstsin6;
+ };
+ union {
+ struct in_addr ro_srcin;
+ struct in6_addr ro_srcin6;
+ };
};
#endif /* __BSD_VISIBLE */
@@ -462,8 +463,10 @@ struct if_ieee80211_data;
struct bfd_config;
void route_init(void);
-int route_cache(struct route *, struct in_addr, u_int);
-int route6_cache(struct route *, const struct in6_addr *, u_int);
+int route_cache(struct route *, const struct in_addr *,
+ const struct in_addr *, u_int);
+int route6_cache(struct route *, const struct in6_addr *,
+ const struct in6_addr *, u_int);
void rtm_ifchg(struct ifnet *);
void rtm_ifannounce(struct ifnet *, int);
void rtm_bfd(struct bfd_config *);
Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
diff -u -p -r1.293 in_pcb.c
--- netinet/in_pcb.c 13 Feb 2024 12:22:09 -0000 1.293
+++ netinet/in_pcb.c 21 Feb 2024 14:10:59 -0000
@@ -919,7 +919,8 @@ in_pcbrtentry(struct inpcb *inp)
if (inp->inp_faddr.s_addr == INADDR_ANY)
return (NULL);
- if (route_cache(ro, inp->inp_faddr, inp->inp_rtableid)) {
+ if (route_cache(ro, &inp->inp_faddr, &inp->inp_laddr,
+ inp->inp_rtableid)) {
ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa,
&inp->inp_laddr.s_addr, ro->ro_tableid);
}
@@ -982,7 +983,7 @@ in_pcbselsrc(struct in_addr *insrc, stru
* If route is known or can be allocated now,
* our src addr is taken from the i/f, else punt.
*/
- if (route_cache(ro, sin->sin_addr, rtableid)) {
+ if (route_cache(ro, &sin->sin_addr, NULL, rtableid)) {
/* No route yet, so try to acquire one */
ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid);
}
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.389 ip_input.c
--- netinet/ip_input.c 13 Feb 2024 12:22:09 -0000 1.389
+++ netinet/ip_input.c 21 Feb 2024 14:11:03 -0000
@@ -118,7 +118,6 @@ const struct sysctl_bounded_args ipctl_v
{ IPCTL_IPPORT_HILASTAUTO, &ipport_hilastauto, 0, 65535 },
{ IPCTL_IPPORT_MAXQUEUE, &ip_maxqueue, 0, 10000 },
{ IPCTL_MFORWARDING, &ipmforwarding, 0, 1 },
- { IPCTL_MULTIPATH, &ipmultipath, 0, 1 },
{ IPCTL_ARPTIMEOUT, &arpt_keep, 0, INT_MAX },
{ IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
};
@@ -1491,7 +1490,7 @@ ip_forward(struct mbuf *m, struct ifnet
}
ro.ro_rt = NULL;
- route_cache(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
+ route_cache(&ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
if (!rtisvalid(rt)) {
rtfree(rt);
rt = rtalloc_mpath(&ro.ro_dstsa, &ip->ip_src.s_addr,
@@ -1633,10 +1632,10 @@ int
ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
size_t newlen)
{
- int error;
#ifdef MROUTING
extern struct mrtstat mrtstat;
#endif
+ int oldval, error;
/* Almost all sysctl names at this level are terminal. */
if (namelen != 1 && name[0] != IPCTL_IFQUEUE &&
@@ -1721,6 +1720,15 @@ ip_sysctl(int *name, u_int namelen, void
case IPCTL_MRTVIF:
return (EOPNOTSUPP);
#endif
+ case IPCTL_MULTIPATH:
+ NET_LOCK();
+ oldval = ipmultipath;
+ error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
+ &ipmultipath, 0, 1);
+ if (oldval != ipmultipath)
+ atomic_inc_long(&rtgeneration);
+ NET_UNLOCK();
+ return (error);
default:
NET_LOCK();
error = sysctl_bounded_arr(ipctl_vars, nitems(ipctl_vars),
Index: netinet/ip_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
diff -u -p -r1.395 ip_output.c
--- netinet/ip_output.c 13 Feb 2024 12:22:09 -0000 1.395
+++ netinet/ip_output.c 21 Feb 2024 14:11:07 -0000
@@ -166,7 +166,7 @@ reroute:
* If there is a cached route, check that it is to the same
* destination and is still up. If not, free it and try again.
*/
- route_cache(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
+ route_cache(ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
dst = &ro->ro_dstsin;
if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
diff -u -p -r1.138 in6_pcb.c
--- netinet6/in6_pcb.c 13 Feb 2024 12:22:09 -0000 1.138
+++ netinet6/in6_pcb.c 21 Feb 2024 14:04:19 -0000
@@ -565,7 +565,8 @@ in6_pcbrtentry(struct inpcb *inp)
if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
return (NULL);
- if (route6_cache(ro, &inp->inp_faddr6, inp->inp_rtableid)) {
+ if (route6_cache(ro, &inp->inp_faddr6, &inp->inp_laddr6,
+ inp->inp_rtableid)) {
ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa,
&inp->inp_laddr6.s6_addr32[0], ro->ro_tableid);
}
Index: netinet6/in6_src.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_src.c,v
diff -u -p -r1.94 in6_src.c
--- netinet6/in6_src.c 13 Feb 2024 12:22:09 -0000 1.94
+++ netinet6/in6_src.c 21 Feb 2024 14:04:19 -0000
@@ -179,8 +179,8 @@ in6_pcbselsrc(const struct in6_addr **in
* If route is known or can be allocated now,
* our src addr is taken from the i/f, else punt.
*/
- if (route6_cache(ro, dst, rtableid)) {
- ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE, ro->ro_tableid);
+ if (route6_cache(ro, dst, NULL, rtableid)) {
+ ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid);
}
/*
@@ -304,7 +304,7 @@ in6_selectroute(const struct in6_addr *d
* a new one.
*/
if (ro) {
- if (route6_cache(ro, dst, rtableid)) {
+ if (route6_cache(ro, dst, NULL, rtableid)) {
/* No route yet, so try to acquire one */
ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL,
ro->ro_tableid);
Index: netinet6/ip6_forward.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
diff -u -p -r1.114 ip6_forward.c
--- netinet6/ip6_forward.c 13 Feb 2024 12:22:09 -0000 1.114
+++ netinet6/ip6_forward.c 21 Feb 2024 14:04:19 -0000
@@ -166,7 +166,8 @@ reroute:
#endif /* IPSEC */
ro.ro_rt = NULL;
- route6_cache(&ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid);
+ route6_cache(&ro, &ip6->ip6_dst, &ip6->ip6_src,
+ m->m_pkthdr.ph_rtableid);
dst = &ro.ro_dstsa;
if (!rtisvalid(rt)) {
rtfree(rt);
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
diff -u -p -r1.257 ip6_input.c
--- netinet6/ip6_input.c 3 Dec 2023 20:36:24 -0000 1.257
+++ netinet6/ip6_input.c 21 Feb 2024 14:08:14 -0000
@@ -1451,7 +1451,6 @@ const struct sysctl_bounded_args ipv6ctl
{ IPV6CTL_USE_DEPRECATED, &ip6_use_deprecated, 0, 1 },
{ IPV6CTL_MAXFRAGS, &ip6_maxfrags, 0, 1000 },
{ IPV6CTL_MFORWARDING, &ip6_mforwarding, 0, 1 },
- { IPV6CTL_MULTIPATH, &ip6_multipath, 0, 1 },
{ IPV6CTL_MCAST_PMTU, &ip6_mcast_pmtu, 0, 1 },
{ IPV6CTL_NEIGHBORGCTHRESH, &ip6_neighborgcthresh, -1, 5 * 2048 },
{ IPV6CTL_MAXDYNROUTES, &ip6_maxdynroutes, -1, 5 * 4096 },
@@ -1499,7 +1498,7 @@ ip6_sysctl(int *name, u_int namelen, voi
#ifdef MROUTING
extern struct mrt6stat mrt6stat;
#endif
- int error;
+ int oldval, error;
/* Almost all sysctl names at this level are terminal. */
if (namelen != 1 && name[0] != IPV6CTL_IFQUEUE)
@@ -1551,6 +1550,15 @@ ip6_sysctl(int *name, u_int namelen, voi
oldp, oldlenp, newp, newlen, &ip6intrq));
case IPV6CTL_SOIIKEY:
return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
+ case IPV6CTL_MULTIPATH:
+ NET_LOCK();
+ oldval = ip6_multipath;
+ error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
+ &ip6_multipath, 0, 1);
+ if (oldval != ip6_multipath)
+ atomic_inc_long(&rtgeneration);
+ NET_UNLOCK();
+ return (error);
default:
NET_LOCK();
error = sysctl_bounded_arr(ipv6ctl_vars, nitems(ipv6ctl_vars),
Index: netinet6/ip6_output.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
diff -u -p -r1.286 ip6_output.c
--- netinet6/ip6_output.c 13 Feb 2024 12:22:09 -0000 1.286
+++ netinet6/ip6_output.c 21 Feb 2024 14:04:19 -0000
@@ -480,7 +480,7 @@ reroute:
goto bad;
}
} else {
- route6_cache(ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid);
+ route6_cache(ro, &ip6->ip6_dst, NULL, m->m_pkthdr.ph_rtableid);
}
if (rt && (rt->rt_flags & RTF_GATEWAY) &&
route cache multi path