From: Claudio Jeker Subject: Re: route cache multi path To: Alexander Bluhm Cc: tech@openbsd.org Date: Wed, 21 Feb 2024 11:14:52 +0100 On Tue, Feb 20, 2024 at 10:54:20PM +0100, Alexander Bluhm wrote: > Hi, > > This makes the route cache aware of multipath routing. If source > address or multipath flags change, discard cached route. > > ok? I wonder about the RTF_MPATH check. Isn't the generation number preventing this already? You can't add or clear RTF_MPATH (which is done by the kernel) without adding or removing a route. Also there is no need for ro_flags. If src == NULL then ro->ro_srcin.s_addr must be INADDR_ANY to be a match else src and ro->ro_srcin need to match. A similar check can be made for IPv6. For the ipmultipath check I would suggest something similar. Just bump the generation number in the sysctl (which your diff already does). Also can we please pass dst and src the same whay in route_cache() passing one by value and the other by reference is strange. The use of struct in_addr / in6_addr for the source address is probably something I would like to change at some point. I would prefer to use also sockaddr there but that should be tackled after or with the rtalloc_mpath() clean up. > bluhm > > Index: net/route.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v > diff -u -p -r1.432 route.c > --- net/route.c 13 Feb 2024 12:22:09 -0000 1.432 > +++ net/route.c 20 Feb 2024 21:25:11 -0000 > @@ -202,7 +202,8 @@ route_init(void) > } > > int > -route_cache(struct route *ro, struct in_addr addr, u_int rtableid) > +route_cache(struct route *ro, struct in_addr dst, const struct in_addr *src, > + u_int rtableid) > { > u_long gen; > > @@ -213,28 +214,37 @@ route_cache(struct route *ro, struct in_ > ro->ro_generation == gen && > ro->ro_tableid == rtableid && > ro->ro_dstsa.sa_family == AF_INET && > - ro->ro_dstsin.sin_addr.s_addr == addr.s_addr) { > - ipstat_inc(ips_rtcachehit); > - return (0); > + ro->ro_dstsin.sin_addr.s_addr == dst.s_addr) { > + if (src == NULL || !ipmultipath || > + !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) || > + (ISSET(ro->ro_flags, RTF_MPATH) && > + ro->ro_srcin.s_addr == src->s_addr)) { > + ipstat_inc(ips_rtcachehit); > + return (0); > + } > } > > ipstat_inc(ips_rtcachemiss); > rtfree(ro->ro_rt); > - ro->ro_rt = NULL; > + memset(ro, 0, sizeof(*ro)); > ro->ro_generation = gen; > ro->ro_tableid = rtableid; > > - memset(&ro->ro_dst, 0, sizeof(ro->ro_dst)); > ro->ro_dstsin.sin_family = AF_INET; > ro->ro_dstsin.sin_len = sizeof(struct sockaddr_in); > - ro->ro_dstsin.sin_addr = addr; > + ro->ro_dstsin.sin_addr = dst; > + if (src != NULL) { > + ro->ro_srcin = *src; > + SET(ro->ro_flags, RTF_MPATH); > + } > > return (ESRCH); > } > > #ifdef INET6 > int > -route6_cache(struct route *ro, const struct in6_addr *addr, u_int rtableid) > +route6_cache(struct route *ro, const struct in6_addr *dst, > + const struct in6_addr *src, u_int rtableid) > { > u_long gen; > > @@ -245,21 +255,29 @@ route6_cache(struct route *ro, const str > ro->ro_generation == gen && > ro->ro_tableid == rtableid && > ro->ro_dstsa.sa_family == AF_INET6 && > - IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, addr)) { > - ip6stat_inc(ip6s_rtcachehit); > - return (0); > + IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) { > + if (src == NULL || !ip6_multipath || > + !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) || > + (ISSET(ro->ro_flags, RTF_MPATH) && > + IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) { > + ip6stat_inc(ip6s_rtcachehit); > + return (0); > + } > } > > ip6stat_inc(ip6s_rtcachemiss); > rtfree(ro->ro_rt); > - ro->ro_rt = NULL; > + memset(ro, 0, sizeof(*ro)); > ro->ro_generation = gen; > ro->ro_tableid = rtableid; > > - memset(&ro->ro_dst, 0, sizeof(ro->ro_dst)); > ro->ro_dstsin6.sin6_family = AF_INET6; > ro->ro_dstsin6.sin6_len = sizeof(struct sockaddr_in6); > - ro->ro_dstsin6.sin6_addr = *addr; > + ro->ro_dstsin6.sin6_addr = *dst; > + if (src != NULL) { > + ro->ro_srcin6 = *src; > + SET(ro->ro_flags, RTF_MPATH); > + } > > return (ESRCH); > } > Index: net/route.h > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v > diff -u -p -r1.206 route.h > --- net/route.h 13 Feb 2024 12:22:09 -0000 1.206 > +++ net/route.h 20 Feb 2024 18:19:42 -0000 > @@ -393,13 +393,15 @@ struct route { > u_long ro_generation; > u_long ro_tableid; /* u_long because of alignment */ > union { > - struct sockaddr rod_sa; > - struct sockaddr_in rod_sin; > - struct sockaddr_in6 rod_sin6; > - } ro_dst; > -#define ro_dstsa ro_dst.rod_sa > -#define ro_dstsin ro_dst.rod_sin > -#define ro_dstsin6 ro_dst.rod_sin6 > + struct sockaddr ro_dstsa; > + struct sockaddr_in ro_dstsin; > + struct sockaddr_in6 ro_dstsin6; > + }; > + union { > + struct in_addr ro_srcin; > + struct in6_addr ro_srcin6; > + }; > + u_int ro_flags; > }; > > #endif /* __BSD_VISIBLE */ > @@ -462,8 +464,10 @@ struct if_ieee80211_data; > struct bfd_config; > > void route_init(void); > -int route_cache(struct route *, struct in_addr, u_int); > -int route6_cache(struct route *, const struct in6_addr *, u_int); > +int route_cache(struct route *, struct in_addr, const struct in_addr *, > + u_int); > +int route6_cache(struct route *, const struct in6_addr *, > + const struct in6_addr *, u_int); > void rtm_ifchg(struct ifnet *); > void rtm_ifannounce(struct ifnet *, int); > void rtm_bfd(struct bfd_config *); > Index: netinet/in_pcb.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v > diff -u -p -r1.293 in_pcb.c > --- netinet/in_pcb.c 13 Feb 2024 12:22:09 -0000 1.293 > +++ netinet/in_pcb.c 20 Feb 2024 18:17:13 -0000 > @@ -919,7 +919,8 @@ in_pcbrtentry(struct inpcb *inp) > > if (inp->inp_faddr.s_addr == INADDR_ANY) > return (NULL); > - if (route_cache(ro, inp->inp_faddr, inp->inp_rtableid)) { > + if (route_cache(ro, inp->inp_faddr, &inp->inp_laddr, > + inp->inp_rtableid)) { > ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, > &inp->inp_laddr.s_addr, ro->ro_tableid); > } > @@ -982,7 +983,7 @@ in_pcbselsrc(struct in_addr *insrc, stru > * If route is known or can be allocated now, > * our src addr is taken from the i/f, else punt. > */ > - if (route_cache(ro, sin->sin_addr, rtableid)) { > + if (route_cache(ro, sin->sin_addr, NULL, rtableid)) { > /* No route yet, so try to acquire one */ > ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid); > } > Index: netinet/ip_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v > diff -u -p -r1.389 ip_input.c > --- netinet/ip_input.c 13 Feb 2024 12:22:09 -0000 1.389 > +++ netinet/ip_input.c 20 Feb 2024 18:46:25 -0000 > @@ -118,7 +118,6 @@ const struct sysctl_bounded_args ipctl_v > { IPCTL_IPPORT_HILASTAUTO, &ipport_hilastauto, 0, 65535 }, > { IPCTL_IPPORT_MAXQUEUE, &ip_maxqueue, 0, 10000 }, > { IPCTL_MFORWARDING, &ipmforwarding, 0, 1 }, > - { IPCTL_MULTIPATH, &ipmultipath, 0, 1 }, > { IPCTL_ARPTIMEOUT, &arpt_keep, 0, INT_MAX }, > { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX }, > }; > @@ -1491,7 +1490,7 @@ ip_forward(struct mbuf *m, struct ifnet > } > > ro.ro_rt = NULL; > - route_cache(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid); > + route_cache(&ro, ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid); > if (!rtisvalid(rt)) { > rtfree(rt); > rt = rtalloc_mpath(&ro.ro_dstsa, &ip->ip_src.s_addr, > @@ -1633,10 +1632,10 @@ int > ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, > size_t newlen) > { > - int error; > #ifdef MROUTING > extern struct mrtstat mrtstat; > #endif > + int oldval, error; > > /* Almost all sysctl names at this level are terminal. */ > if (namelen != 1 && name[0] != IPCTL_IFQUEUE && > @@ -1721,6 +1720,15 @@ ip_sysctl(int *name, u_int namelen, void > case IPCTL_MRTVIF: > return (EOPNOTSUPP); > #endif > + case IPCTL_MULTIPATH: > + NET_LOCK(); > + oldval = ipmultipath; > + error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, > + &ipmultipath, 0, 1); > + if (oldval != ipmultipath) > + atomic_inc_long(&rtgeneration); > + NET_UNLOCK(); > + return (error); > default: > NET_LOCK(); > error = sysctl_bounded_arr(ipctl_vars, nitems(ipctl_vars), > Index: netinet/ip_output.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v > diff -u -p -r1.395 ip_output.c > --- netinet/ip_output.c 13 Feb 2024 12:22:09 -0000 1.395 > +++ netinet/ip_output.c 20 Feb 2024 18:17:13 -0000 > @@ -166,7 +166,7 @@ reroute: > * If there is a cached route, check that it is to the same > * destination and is still up. If not, free it and try again. > */ > - route_cache(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid); > + route_cache(ro, ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid); > dst = &ro->ro_dstsin; > > if ((IN_MULTICAST(ip->ip_dst.s_addr) || > Index: netinet6/in6_pcb.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v > diff -u -p -r1.138 in6_pcb.c > --- netinet6/in6_pcb.c 13 Feb 2024 12:22:09 -0000 1.138 > +++ netinet6/in6_pcb.c 20 Feb 2024 18:17:13 -0000 > @@ -565,7 +565,8 @@ in6_pcbrtentry(struct inpcb *inp) > > if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_faddr6)) > return (NULL); > - if (route6_cache(ro, &inp->inp_faddr6, inp->inp_rtableid)) { > + if (route6_cache(ro, &inp->inp_faddr6, &inp->inp_laddr6, > + inp->inp_rtableid)) { > ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, > &inp->inp_laddr6.s6_addr32[0], ro->ro_tableid); > } > Index: netinet6/in6_src.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_src.c,v > diff -u -p -r1.94 in6_src.c > --- netinet6/in6_src.c 13 Feb 2024 12:22:09 -0000 1.94 > +++ netinet6/in6_src.c 20 Feb 2024 18:17:13 -0000 > @@ -179,8 +179,8 @@ in6_pcbselsrc(const struct in6_addr **in > * If route is known or can be allocated now, > * our src addr is taken from the i/f, else punt. > */ > - if (route6_cache(ro, dst, rtableid)) { > - ro->ro_rt = rtalloc(&ro->ro_dstsa, RT_RESOLVE, ro->ro_tableid); > + if (route6_cache(ro, dst, NULL, rtableid)) { > + ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, ro->ro_tableid); > } > > /* > @@ -304,7 +304,7 @@ in6_selectroute(const struct in6_addr *d > * a new one. > */ > if (ro) { > - if (route6_cache(ro, dst, rtableid)) { > + if (route6_cache(ro, dst, NULL, rtableid)) { > /* No route yet, so try to acquire one */ > ro->ro_rt = rtalloc_mpath(&ro->ro_dstsa, NULL, > ro->ro_tableid); > Index: netinet6/ip6_forward.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_forward.c,v > diff -u -p -r1.114 ip6_forward.c > --- netinet6/ip6_forward.c 13 Feb 2024 12:22:09 -0000 1.114 > +++ netinet6/ip6_forward.c 20 Feb 2024 18:17:13 -0000 > @@ -166,7 +166,8 @@ reroute: > #endif /* IPSEC */ > > ro.ro_rt = NULL; > - route6_cache(&ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid); > + route6_cache(&ro, &ip6->ip6_dst, &ip6->ip6_src, > + m->m_pkthdr.ph_rtableid); > dst = &ro.ro_dstsa; > if (!rtisvalid(rt)) { > rtfree(rt); > Index: netinet6/ip6_input.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v > diff -u -p -r1.257 ip6_input.c > --- netinet6/ip6_input.c 3 Dec 2023 20:36:24 -0000 1.257 > +++ netinet6/ip6_input.c 20 Feb 2024 18:46:09 -0000 > @@ -1451,7 +1451,6 @@ const struct sysctl_bounded_args ipv6ctl > { IPV6CTL_USE_DEPRECATED, &ip6_use_deprecated, 0, 1 }, > { IPV6CTL_MAXFRAGS, &ip6_maxfrags, 0, 1000 }, > { IPV6CTL_MFORWARDING, &ip6_mforwarding, 0, 1 }, > - { IPV6CTL_MULTIPATH, &ip6_multipath, 0, 1 }, > { IPV6CTL_MCAST_PMTU, &ip6_mcast_pmtu, 0, 1 }, > { IPV6CTL_NEIGHBORGCTHRESH, &ip6_neighborgcthresh, -1, 5 * 2048 }, > { IPV6CTL_MAXDYNROUTES, &ip6_maxdynroutes, -1, 5 * 4096 }, > @@ -1499,7 +1498,7 @@ ip6_sysctl(int *name, u_int namelen, voi > #ifdef MROUTING > extern struct mrt6stat mrt6stat; > #endif > - int error; > + int oldval, error; > > /* Almost all sysctl names at this level are terminal. */ > if (namelen != 1 && name[0] != IPV6CTL_IFQUEUE) > @@ -1551,6 +1550,15 @@ ip6_sysctl(int *name, u_int namelen, voi > oldp, oldlenp, newp, newlen, &ip6intrq)); > case IPV6CTL_SOIIKEY: > return (ip6_sysctl_soiikey(oldp, oldlenp, newp, newlen)); > + case IPV6CTL_MULTIPATH: > + NET_LOCK(); > + oldval = ip6_multipath; > + error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, > + &ip6_multipath, 0, 1); > + if (oldval != ip6_multipath) > + atomic_inc_long(&rtgeneration); > + NET_UNLOCK(); > + return (error); > default: > NET_LOCK(); > error = sysctl_bounded_arr(ipv6ctl_vars, nitems(ipv6ctl_vars), > Index: netinet6/ip6_output.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v > diff -u -p -r1.286 ip6_output.c > --- netinet6/ip6_output.c 13 Feb 2024 12:22:09 -0000 1.286 > +++ netinet6/ip6_output.c 20 Feb 2024 18:17:13 -0000 > @@ -480,7 +480,7 @@ reroute: > goto bad; > } > } else { > - route6_cache(ro, &ip6->ip6_dst, m->m_pkthdr.ph_rtableid); > + route6_cache(ro, &ip6->ip6_dst, NULL, m->m_pkthdr.ph_rtableid); > } > > if (rt && (rt->rt_flags & RTF_GATEWAY) && > -- :wq Claudio