From: Alexander Bluhm Subject: route cache multi path To: tech@openbsd.org Date: Tue, 20 Feb 2024 22:54:20 +0100 Hi, This makes the route cache aware of multipath routing. If source address or multipath flags change, discard cached route. ok? bluhm Index: net/route.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v diff -u -p -r1.432 route.c --- net/route.c 13 Feb 2024 12:22:09 -0000 1.432 +++ net/route.c 20 Feb 2024 21:25:11 -0000 @@ -202,7 +202,8 @@ route_init(void) } int -route_cache(struct route *ro, struct in_addr addr, u_int rtableid) +route_cache(struct route *ro, struct in_addr dst, const struct in_addr *src, + u_int rtableid) { u_long gen; @@ -213,28 +214,37 @@ route_cache(struct route *ro, struct in_ ro->ro_generation == gen && ro->ro_tableid == rtableid && ro->ro_dstsa.sa_family == AF_INET && - ro->ro_dstsin.sin_addr.s_addr == addr.s_addr) { - ipstat_inc(ips_rtcachehit); - return (0); + ro->ro_dstsin.sin_addr.s_addr == dst.s_addr) { + if (src == NULL || !ipmultipath || + !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) || + (ISSET(ro->ro_flags, RTF_MPATH) && + ro->ro_srcin.s_addr == src->s_addr)) { + ipstat_inc(ips_rtcachehit); + return (0); + } } ipstat_inc(ips_rtcachemiss); rtfree(ro->ro_rt); - ro->ro_rt = NULL; + memset(ro, 0, sizeof(*ro)); ro->ro_generation = gen; ro->ro_tableid = rtableid; - memset(&ro->ro_dst, 0, sizeof(ro->ro_dst)); ro->ro_dstsin.sin_family = AF_INET; ro->ro_dstsin.sin_len = sizeof(struct sockaddr_in); - ro->ro_dstsin.sin_addr = addr; + ro->ro_dstsin.sin_addr = dst; + if (src != NULL) { + ro->ro_srcin = *src; + SET(ro->ro_flags, RTF_MPATH); + } return (ESRCH); } #ifdef INET6 int -route6_cache(struct route *ro, const struct in6_addr *addr, u_int rtableid) +route6_cache(struct route *ro, const struct in6_addr *dst, + const struct in6_addr *src, u_int rtableid) { u_long gen; @@ -245,21 +255,29 @@ route6_cache(struct route *ro, const str ro->ro_generation == gen && ro->ro_tableid == rtableid && ro->ro_dstsa.sa_family == AF_INET6 && - IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, addr)) { - ip6stat_inc(ip6s_rtcachehit); - return (0); + IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) { + if (src == NULL || !ip6_multipath || + !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) || + (ISSET(ro->ro_flags, RTF_MPATH) && + IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) { + ip6stat_inc(ip6s_rtcachehit); + return (0); + } } ip6stat_inc(ip6s_rtcachemiss); rtfree(ro->ro_rt); - ro->ro_rt = NULL; + memset(ro, 0, sizeof(*ro)); ro->ro_generation = gen; ro->ro_tableid = rtableid; - memset(&ro->ro_dst, 0, sizeof(ro->ro_dst)); ro->ro_dstsin6.sin6_family = AF_INET6; ro->ro_dstsin6.sin6_len = sizeof(struct sockaddr_in6); - ro->ro_dstsin6.sin6_addr = *addr; + ro->ro_dstsin6.sin6_addr = *dst; + if (src != NULL) { + ro->ro_srcin6 = *src; + SET(ro->ro_flags, RTF_MPATH); + } return (ESRCH); } Index: net/route.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v diff -u -p -r1.206 route.h --- net/route.h 13 Feb 2024 12:22:09 -0000 1.206 +++ net/route.h 20 Feb 2024 18:19:42 -0000 @@ -393,13 +393,15 @@ struct route { u_long ro_generation; u_long ro_tableid; /* u_long because of alignment */ union { - struct sockaddr rod_sa; - struct sockaddr_in rod_sin; - struct sockaddr_in6 rod_sin6; - } ro_dst; -#define ro_dstsa ro_dst.rod_sa -#define ro_dstsin ro_dst.rod_sin -#define ro_dstsin6 ro_dst.rod_sin6 + struct sockaddr ro_dstsa; + struct sockaddr_in ro_dstsin; + struct sockaddr_in6 ro_dstsin6; + }; + union { + struct in_addr ro_srcin; + struct in6_addr ro_srcin6; + }; + u_int ro_flags; }; #endif /* __BSD_VISIBLE */ @@ -462,8 +464,10 @@ struct if_ieee80211_data; struct bfd_config; void route_init(void); -int route_cache(struct route *, struct in_addr, u_int); -int route6_cache(struct route *, const struct in6_addr *, u_int); +int route_cache(struct route *, struct in_addr, const struct in_addr *, + u_int); +int route6_cache(struct route *, const struct in6_addr *, + const struct in6_addr *, u_int); void rtm_ifchg(struct ifnet *); void rtm_ifannounce(struct ifnet *, int); void rtm_bfd(struct bfd_config *); Index: netinet/in_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v diff -u -p -r1.293 in_pcb.c --- netinet/in_pcb.c 13 Feb 2024 12:22:09 -0000 1.293 +++ netinet/in_pcb.c 20 Feb 2024 18:17:13 -0000 @@ -919,7 +919,8 @@ in_pcbrtentry(struct inpcb *inp) if (inp->inp_faddr.s_addr == INADDR_ANY) return (NULL); - if (route_cache(ro, inp->inp_faddr, inp->inp_rtableid)) { + if (route_cache(ro, inp->inp_faddr, &inp->inp_laddr, + inp->inp_rtableid)) { ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, &inp->inp_laddr.s_addr, ro->ro_tableid); } @@ -982,7 +983,7 @@ in_pcbselsrc(struct in_addr *insrc, stru * If route is known or can be allocated now, * our src addr is taken from the i/f, else punt. */ - if (route_cache(ro, sin->sin_addr, rtableid)) { + if (route_cache(ro, sin->sin_addr, NULL, rtableid)) { /* No route yet, so try to acquire one */ ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid); } Index: netinet/ip_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v diff -u -p -r1.389 ip_input.c --- netinet/ip_input.c 13 Feb 2024 12:22:09 -0000 1.389 +++ netinet/ip_input.c 20 Feb 2024 18:46:25 -0000 @@ -118,7 +118,6 @@ const struct sysctl_bounded_args ipctl_v { IPCTL_IPPORT_HILASTAUTO, &ipport_hilastauto, 0, 65535 }, { IPCTL_IPPORT_MAXQUEUE, &ip_maxqueue, 0, 10000 }, { IPCTL_MFORWARDING, &ipmforwarding, 0, 1 }, - { IPCTL_MULTIPATH, &ipmultipath, 0, 1 }, { IPCTL_ARPTIMEOUT, &arpt_keep, 0, INT_MAX }, { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX }, }; @@ -1491,7 +1490,7 @@ ip_forward(struct mbuf *m, struct ifnet } ro.ro_rt = NULL; - route_cache(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid); + route_cache(&ro, ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid); if (!rtisvalid(rt)) { rtfree(rt); rt = rtalloc_mpath(&ro.ro_dstsa, &ip->ip_src.s_addr, @@ -1633,10 +1632,10 @@ int ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - int error; #ifdef MROUTING extern struct mrtstat mrtstat; #endif + int oldval, error; /* Almost all sysctl names at this level are terminal. */ if (namelen != 1 && name[0] != IPCTL_IFQUEUE && @@ -1721,6 +1720,15 @@ ip_sysctl(int *name, u_int namelen, void case IPCTL_MRTVIF: return (EOPNOTSUPP); #endif + case IPCTL_MULTIPATH: + NET_LOCK(); + oldval = ipmultipath; + error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, + &ipmultipath, 0, 1); + if (oldval != ipmultipath) + atomic_inc_long(&rtgeneration); + NET_UNLOCK(); + return (error); default: NET_LOCK(); error = sysctl_bounded_arr(ipctl_vars, nitems(ipctl_vars), Index: netinet/ip_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v diff -u -p -r1.395 ip_output.c --- netinet/ip_output.c 13 Feb 2024 12:22:09 -0000 1.395 +++ netinet/ip_output.c 20 Feb 2024 18:17:13 -0000 @@ -166,7 +166,7 @@ reroute: * If there is a cached route, check that it is to the same * destination and is still up. If not, free it and try again. */ - route_cache(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid); + route_cache(ro, ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid); dst = &ro->ro_dstsin; if ((IN_MULTICAST(ip->ip_dst.s_addr) || Index: netinet6/in6_pcb.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v diff -u -p -r1.138 in6_pcb.c --- netinet6/in6_pcb.c 13 Feb 2024 12:22:09 -0000 1.138 +++ netinet6/in6_pcb.c 20 Feb 2024 18:17:13 -0000 @@ -565,7 +565,8 @@ in6_pcbrtentry(struct inpcb *inp) if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) return (NULL); - if (route6_cache(ro, &inp->inp_faddr6, inp->inp_rtableid)) { + if (route6_cache(ro, &inp->inp_faddr6, &inp->inp_laddr6, + inp->inp_rtableid)) { ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, &inp->inp_laddr6.s6_addr32[0], ro->ro_tableid); } Index: netinet6/in6_src.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_src.c,v diff -u -p -r1.94 in6_src.c --- netinet6/in6_src.c 13 Feb 2024 12:22:09 -0000 1.94 +++ netinet6/in6_src.c 20 Feb 2024 18:17:13 -0000 @@ -179,8 +179,8 @@ in6_pcbselsrc(const struct in6_addr **in * If route is known or can be allocated now, * our src addr is taken from the i/f, else punt. */ - if (route6_cache(ro, dst, rtableid)) { - ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE, ro->ro_tableid); + if (route6_cache(ro, dst, NULL, rtableid)) { + ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid); } /* @@ -304,7 +304,7 @@ in6_selectroute(const struct in6_addr *d * a new one. */ if (ro) { - if (route6_cache(ro, dst, rtableid)) { + if (route6_cache(ro, dst, NULL, rtableid)) { /* No route yet, so try to acquire one */ ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid); Index: netinet6/ip6_forward.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v diff -u -p -r1.114 ip6_forward.c --- netinet6/ip6_forward.c 13 Feb 2024 12:22:09 -0000 1.114 +++ netinet6/ip6_forward.c 20 Feb 2024 18:17:13 -0000 @@ -166,7 +166,8 @@ reroute: #endif /* IPSEC */ ro.ro_rt = NULL; - route6_cache(&ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid); + route6_cache(&ro, &ip6->ip6_dst, &ip6->ip6_src, + m->m_pkthdr.ph_rtableid); dst = &ro.ro_dstsa; if (!rtisvalid(rt)) { rtfree(rt); Index: netinet6/ip6_input.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v diff -u -p -r1.257 ip6_input.c --- netinet6/ip6_input.c 3 Dec 2023 20:36:24 -0000 1.257 +++ netinet6/ip6_input.c 20 Feb 2024 18:46:09 -0000 @@ -1451,7 +1451,6 @@ const struct sysctl_bounded_args ipv6ctl { IPV6CTL_USE_DEPRECATED, &ip6_use_deprecated, 0, 1 }, { IPV6CTL_MAXFRAGS, &ip6_maxfrags, 0, 1000 }, { IPV6CTL_MFORWARDING, &ip6_mforwarding, 0, 1 }, - { IPV6CTL_MULTIPATH, &ip6_multipath, 0, 1 }, { IPV6CTL_MCAST_PMTU, &ip6_mcast_pmtu, 0, 1 }, { IPV6CTL_NEIGHBORGCTHRESH, &ip6_neighborgcthresh, -1, 5 * 2048 }, { IPV6CTL_MAXDYNROUTES, &ip6_maxdynroutes, -1, 5 * 4096 }, @@ -1499,7 +1498,7 @@ ip6_sysctl(int *name, u_int namelen, voi #ifdef MROUTING extern struct mrt6stat mrt6stat; #endif - int error; + int oldval, error; /* Almost all sysctl names at this level are terminal. */ if (namelen != 1 && name[0] != IPV6CTL_IFQUEUE) @@ -1551,6 +1550,15 @@ ip6_sysctl(int *name, u_int namelen, voi oldp, oldlenp, newp, newlen, &ip6intrq)); case IPV6CTL_SOIIKEY: return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen)); + case IPV6CTL_MULTIPATH: + NET_LOCK(); + oldval = ip6_multipath; + error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, + &ip6_multipath, 0, 1); + if (oldval != ip6_multipath) + atomic_inc_long(&rtgeneration); + NET_UNLOCK(); + return (error); default: NET_LOCK(); error = sysctl_bounded_arr(ipv6ctl_vars, nitems(ipv6ctl_vars), Index: netinet6/ip6_output.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v diff -u -p -r1.286 ip6_output.c --- netinet6/ip6_output.c 13 Feb 2024 12:22:09 -0000 1.286 +++ netinet6/ip6_output.c 20 Feb 2024 18:17:13 -0000 @@ -480,7 +480,7 @@ reroute: goto bad; } } else { - route6_cache(ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid); + route6_cache(ro, &ip6->ip6_dst, NULL, m->m_pkthdr.ph_rtableid); } if (rt && (rt->rt_flags & RTF_GATEWAY) &&