Download raw body.
route cache multi path
On Tue, Feb 20, 2024 at 10:54:20PM +0100, Alexander Bluhm wrote:
> Hi,
>
> This makes the route cache aware of multipath routing. If source
> address or multipath flags change, discard cached route.
>
> ok?
I wonder about the RTF_MPATH check. Isn't the generation number preventing
this already? You can't add or clear RTF_MPATH (which is done by the
kernel) without adding or removing a route.
Also there is no need for ro_flags. If src == NULL then
ro->ro_srcin.s_addr must be INADDR_ANY to be a match else src and
ro->ro_srcin need to match. A similar check can be made for IPv6.
For the ipmultipath check I would suggest something similar. Just bump the
generation number in the sysctl (which your diff already does).
Also can we please pass dst and src the same whay in route_cache() passing
one by value and the other by reference is strange.
The use of struct in_addr / in6_addr for the source address is probably
something I would like to change at some point. I would prefer to use also
sockaddr there but that should be tackled after or with the rtalloc_mpath()
clean up.
> bluhm
>
> Index: net/route.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
> diff -u -p -r1.432 route.c
> --- net/route.c 13 Feb 2024 12:22:09 -0000 1.432
> +++ net/route.c 20 Feb 2024 21:25:11 -0000
> @@ -202,7 +202,8 @@ route_init(void)
> }
>
> int
> -route_cache(struct route *ro, struct in_addr addr, u_int rtableid)
> +route_cache(struct route *ro, struct in_addr dst, const struct in_addr *src,
> + u_int rtableid)
> {
> u_long gen;
>
> @@ -213,28 +214,37 @@ route_cache(struct route *ro, struct in_
> ro->ro_generation == gen &&
> ro->ro_tableid == rtableid &&
> ro->ro_dstsa.sa_family == AF_INET &&
> - ro->ro_dstsin.sin_addr.s_addr == addr.s_addr) {
> - ipstat_inc(ips_rtcachehit);
> - return (0);
> + ro->ro_dstsin.sin_addr.s_addr == dst.s_addr) {
> + if (src == NULL || !ipmultipath ||
> + !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
> + (ISSET(ro->ro_flags, RTF_MPATH) &&
> + ro->ro_srcin.s_addr == src->s_addr)) {
> + ipstat_inc(ips_rtcachehit);
> + return (0);
> + }
> }
>
> ipstat_inc(ips_rtcachemiss);
> rtfree(ro->ro_rt);
> - ro->ro_rt = NULL;
> + memset(ro, 0, sizeof(*ro));
> ro->ro_generation = gen;
> ro->ro_tableid = rtableid;
>
> - memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
> ro->ro_dstsin.sin_family = AF_INET;
> ro->ro_dstsin.sin_len = sizeof(struct sockaddr_in);
> - ro->ro_dstsin.sin_addr = addr;
> + ro->ro_dstsin.sin_addr = dst;
> + if (src != NULL) {
> + ro->ro_srcin = *src;
> + SET(ro->ro_flags, RTF_MPATH);
> + }
>
> return (ESRCH);
> }
>
> #ifdef INET6
> int
> -route6_cache(struct route *ro, const struct in6_addr *addr, u_int rtableid)
> +route6_cache(struct route *ro, const struct in6_addr *dst,
> + const struct in6_addr *src, u_int rtableid)
> {
> u_long gen;
>
> @@ -245,21 +255,29 @@ route6_cache(struct route *ro, const str
> ro->ro_generation == gen &&
> ro->ro_tableid == rtableid &&
> ro->ro_dstsa.sa_family == AF_INET6 &&
> - IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, addr)) {
> - ip6stat_inc(ip6s_rtcachehit);
> - return (0);
> + IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) {
> + if (src == NULL || !ip6_multipath ||
> + !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
> + (ISSET(ro->ro_flags, RTF_MPATH) &&
> + IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) {
> + ip6stat_inc(ip6s_rtcachehit);
> + return (0);
> + }
> }
>
> ip6stat_inc(ip6s_rtcachemiss);
> rtfree(ro->ro_rt);
> - ro->ro_rt = NULL;
> + memset(ro, 0, sizeof(*ro));
> ro->ro_generation = gen;
> ro->ro_tableid = rtableid;
>
> - memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
> ro->ro_dstsin6.sin6_family = AF_INET6;
> ro->ro_dstsin6.sin6_len = sizeof(struct sockaddr_in6);
> - ro->ro_dstsin6.sin6_addr = *addr;
> + ro->ro_dstsin6.sin6_addr = *dst;
> + if (src != NULL) {
> + ro->ro_srcin6 = *src;
> + SET(ro->ro_flags, RTF_MPATH);
> + }
>
> return (ESRCH);
> }
> Index: net/route.h
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
> diff -u -p -r1.206 route.h
> --- net/route.h 13 Feb 2024 12:22:09 -0000 1.206
> +++ net/route.h 20 Feb 2024 18:19:42 -0000
> @@ -393,13 +393,15 @@ struct route {
> u_long ro_generation;
> u_long ro_tableid; /* u_long because of alignment */
> union {
> - struct sockaddr rod_sa;
> - struct sockaddr_in rod_sin;
> - struct sockaddr_in6 rod_sin6;
> - } ro_dst;
> -#define ro_dstsa ro_dst.rod_sa
> -#define ro_dstsin ro_dst.rod_sin
> -#define ro_dstsin6 ro_dst.rod_sin6
> + struct sockaddr ro_dstsa;
> + struct sockaddr_in ro_dstsin;
> + struct sockaddr_in6 ro_dstsin6;
> + };
> + union {
> + struct in_addr ro_srcin;
> + struct in6_addr ro_srcin6;
> + };
> + u_int ro_flags;
> };
>
> #endif /* __BSD_VISIBLE */
> @@ -462,8 +464,10 @@ struct if_ieee80211_data;
> struct bfd_config;
>
> void route_init(void);
> -int route_cache(struct route *, struct in_addr, u_int);
> -int route6_cache(struct route *, const struct in6_addr *, u_int);
> +int route_cache(struct route *, struct in_addr, const struct in_addr *,
> + u_int);
> +int route6_cache(struct route *, const struct in6_addr *,
> + const struct in6_addr *, u_int);
> void rtm_ifchg(struct ifnet *);
> void rtm_ifannounce(struct ifnet *, int);
> void rtm_bfd(struct bfd_config *);
> Index: netinet/in_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> diff -u -p -r1.293 in_pcb.c
> --- netinet/in_pcb.c 13 Feb 2024 12:22:09 -0000 1.293
> +++ netinet/in_pcb.c 20 Feb 2024 18:17:13 -0000
> @@ -919,7 +919,8 @@ in_pcbrtentry(struct inpcb *inp)
>
> if (inp->inp_faddr.s_addr == INADDR_ANY)
> return (NULL);
> - if (route_cache(ro, inp->inp_faddr, inp->inp_rtableid)) {
> + if (route_cache(ro, inp->inp_faddr, &inp->inp_laddr,
> + inp->inp_rtableid)) {
> ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa,
> &inp->inp_laddr.s_addr, ro->ro_tableid);
> }
> @@ -982,7 +983,7 @@ in_pcbselsrc(struct in_addr *insrc, stru
> * If route is known or can be allocated now,
> * our src addr is taken from the i/f, else punt.
> */
> - if (route_cache(ro, sin->sin_addr, rtableid)) {
> + if (route_cache(ro, sin->sin_addr, NULL, rtableid)) {
> /* No route yet, so try to acquire one */
> ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid);
> }
> Index: netinet/ip_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> diff -u -p -r1.389 ip_input.c
> --- netinet/ip_input.c 13 Feb 2024 12:22:09 -0000 1.389
> +++ netinet/ip_input.c 20 Feb 2024 18:46:25 -0000
> @@ -118,7 +118,6 @@ const struct sysctl_bounded_args ipctl_v
> { IPCTL_IPPORT_HILASTAUTO, &ipport_hilastauto, 0, 65535 },
> { IPCTL_IPPORT_MAXQUEUE, &ip_maxqueue, 0, 10000 },
> { IPCTL_MFORWARDING, &ipmforwarding, 0, 1 },
> - { IPCTL_MULTIPATH, &ipmultipath, 0, 1 },
> { IPCTL_ARPTIMEOUT, &arpt_keep, 0, INT_MAX },
> { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
> };
> @@ -1491,7 +1490,7 @@ ip_forward(struct mbuf *m, struct ifnet
> }
>
> ro.ro_rt = NULL;
> - route_cache(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
> + route_cache(&ro, ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
> if (!rtisvalid(rt)) {
> rtfree(rt);
> rt = rtalloc_mpath(&ro.ro_dstsa, &ip->ip_src.s_addr,
> @@ -1633,10 +1632,10 @@ int
> ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
> size_t newlen)
> {
> - int error;
> #ifdef MROUTING
> extern struct mrtstat mrtstat;
> #endif
> + int oldval, error;
>
> /* Almost all sysctl names at this level are terminal. */
> if (namelen != 1 && name[0] != IPCTL_IFQUEUE &&
> @@ -1721,6 +1720,15 @@ ip_sysctl(int *name, u_int namelen, void
> case IPCTL_MRTVIF:
> return (EOPNOTSUPP);
> #endif
> + case IPCTL_MULTIPATH:
> + NET_LOCK();
> + oldval = ipmultipath;
> + error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> + &ipmultipath, 0, 1);
> + if (oldval != ipmultipath)
> + atomic_inc_long(&rtgeneration);
> + NET_UNLOCK();
> + return (error);
> default:
> NET_LOCK();
> error = sysctl_bounded_arr(ipctl_vars, nitems(ipctl_vars),
> Index: netinet/ip_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
> diff -u -p -r1.395 ip_output.c
> --- netinet/ip_output.c 13 Feb 2024 12:22:09 -0000 1.395
> +++ netinet/ip_output.c 20 Feb 2024 18:17:13 -0000
> @@ -166,7 +166,7 @@ reroute:
> * If there is a cached route, check that it is to the same
> * destination and is still up. If not, free it and try again.
> */
> - route_cache(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
> + route_cache(ro, ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
> dst = &ro->ro_dstsin;
>
> if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
> Index: netinet6/in6_pcb.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
> diff -u -p -r1.138 in6_pcb.c
> --- netinet6/in6_pcb.c 13 Feb 2024 12:22:09 -0000 1.138
> +++ netinet6/in6_pcb.c 20 Feb 2024 18:17:13 -0000
> @@ -565,7 +565,8 @@ in6_pcbrtentry(struct inpcb *inp)
>
> if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6))
> return (NULL);
> - if (route6_cache(ro, &inp->inp_faddr6, inp->inp_rtableid)) {
> + if (route6_cache(ro, &inp->inp_faddr6, &inp->inp_laddr6,
> + inp->inp_rtableid)) {
> ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa,
> &inp->inp_laddr6.s6_addr32[0], ro->ro_tableid);
> }
> Index: netinet6/in6_src.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_src.c,v
> diff -u -p -r1.94 in6_src.c
> --- netinet6/in6_src.c 13 Feb 2024 12:22:09 -0000 1.94
> +++ netinet6/in6_src.c 20 Feb 2024 18:17:13 -0000
> @@ -179,8 +179,8 @@ in6_pcbselsrc(const struct in6_addr **in
> * If route is known or can be allocated now,
> * our src addr is taken from the i/f, else punt.
> */
> - if (route6_cache(ro, dst, rtableid)) {
> - ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE, ro->ro_tableid);
> + if (route6_cache(ro, dst, NULL, rtableid)) {
> + ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid);
> }
>
> /*
> @@ -304,7 +304,7 @@ in6_selectroute(const struct in6_addr *d
> * a new one.
> */
> if (ro) {
> - if (route6_cache(ro, dst, rtableid)) {
> + if (route6_cache(ro, dst, NULL, rtableid)) {
> /* No route yet, so try to acquire one */
> ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL,
> ro->ro_tableid);
> Index: netinet6/ip6_forward.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v
> diff -u -p -r1.114 ip6_forward.c
> --- netinet6/ip6_forward.c 13 Feb 2024 12:22:09 -0000 1.114
> +++ netinet6/ip6_forward.c 20 Feb 2024 18:17:13 -0000
> @@ -166,7 +166,8 @@ reroute:
> #endif /* IPSEC */
>
> ro.ro_rt = NULL;
> - route6_cache(&ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid);
> + route6_cache(&ro, &ip6->ip6_dst, &ip6->ip6_src,
> + m->m_pkthdr.ph_rtableid);
> dst = &ro.ro_dstsa;
> if (!rtisvalid(rt)) {
> rtfree(rt);
> Index: netinet6/ip6_input.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
> diff -u -p -r1.257 ip6_input.c
> --- netinet6/ip6_input.c 3 Dec 2023 20:36:24 -0000 1.257
> +++ netinet6/ip6_input.c 20 Feb 2024 18:46:09 -0000
> @@ -1451,7 +1451,6 @@ const struct sysctl_bounded_args ipv6ctl
> { IPV6CTL_USE_DEPRECATED, &ip6_use_deprecated, 0, 1 },
> { IPV6CTL_MAXFRAGS, &ip6_maxfrags, 0, 1000 },
> { IPV6CTL_MFORWARDING, &ip6_mforwarding, 0, 1 },
> - { IPV6CTL_MULTIPATH, &ip6_multipath, 0, 1 },
> { IPV6CTL_MCAST_PMTU, &ip6_mcast_pmtu, 0, 1 },
> { IPV6CTL_NEIGHBORGCTHRESH, &ip6_neighborgcthresh, -1, 5 * 2048 },
> { IPV6CTL_MAXDYNROUTES, &ip6_maxdynroutes, -1, 5 * 4096 },
> @@ -1499,7 +1498,7 @@ ip6_sysctl(int *name, u_int namelen, voi
> #ifdef MROUTING
> extern struct mrt6stat mrt6stat;
> #endif
> - int error;
> + int oldval, error;
>
> /* Almost all sysctl names at this level are terminal. */
> if (namelen != 1 && name[0] != IPV6CTL_IFQUEUE)
> @@ -1551,6 +1550,15 @@ ip6_sysctl(int *name, u_int namelen, voi
> oldp, oldlenp, newp, newlen, &ip6intrq));
> case IPV6CTL_SOIIKEY:
> return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen));
> + case IPV6CTL_MULTIPATH:
> + NET_LOCK();
> + oldval = ip6_multipath;
> + error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> + &ip6_multipath, 0, 1);
> + if (oldval != ip6_multipath)
> + atomic_inc_long(&rtgeneration);
> + NET_UNLOCK();
> + return (error);
> default:
> NET_LOCK();
> error = sysctl_bounded_arr(ipv6ctl_vars, nitems(ipv6ctl_vars),
> Index: netinet6/ip6_output.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
> diff -u -p -r1.286 ip6_output.c
> --- netinet6/ip6_output.c 13 Feb 2024 12:22:09 -0000 1.286
> +++ netinet6/ip6_output.c 20 Feb 2024 18:17:13 -0000
> @@ -480,7 +480,7 @@ reroute:
> goto bad;
> }
> } else {
> - route6_cache(ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid);
> + route6_cache(ro, &ip6->ip6_dst, NULL, m->m_pkthdr.ph_rtableid);
> }
>
> if (rt && (rt->rt_flags & RTF_GATEWAY) &&
>
--
:wq Claudio
route cache multi path