From: Alexander Bluhm Subject: Re: route generation number for route cache To: tech@openbsd.org Date: Tue, 30 Jan 2024 14:57:42 +0100 On Tue, Jan 30, 2024 at 02:35:35PM +0100, Claudio Jeker wrote: > On Tue, Jan 30, 2024 at 01:43:34PM +0100, Alexander Bluhm wrote: > > Hi, > > > > The outgoing route is cached at the inpcb. This cache is only > > invalidated when the socket closes or if the route gets invalid. > > More specific routes are not detected. Especially with dynamic > > routing protocols, sockets must be closed and reopened to use the > > correct route. Running ping during a route change shows the problem. > > > > To solve this, I added a route generation number that is updated > > whenever the routing table changes. The lookup in struct route is > > put into the route_validate() API. If the generation number is too > > old, the cached route gets discarded. > > > > This is the implementation for ip_output() and ip_forward(). IPv6 > > and more places will follow. > > > > ok? > > I find the use of membar_producer and membar_consumer together with atomic > operations a questionable pattern. I would expect the use of > membar_exit_before_atomic() and membar_enter_after_atomic(). membar_exit_before_atomic() and membar_enter_after_atomic() affect both, load and sotre. I am only interessted in load and store for each case. So I think membar_producer() and membar_consumer() are more specific. membar_enter_after_atomic() and membar_exit_before_atomic() exist for architectures where they are less epxensove than membar_enter() membar_exit(). On amd64 everything is just a __membar(""). On arm64 the ..._atomic() funtions choose the more expensive operations of membar_enter() and membar_exit(). For generation number this too much. > > Index: net/route.c > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v > > diff -u -p -r1.426 route.c > > --- net/route.c 13 Nov 2023 17:18:27 -0000 1.426 > > +++ net/route.c 29 Jan 2024 22:25:44 -0000 > > @@ -140,6 +140,7 @@ > > > > /* > > * Locks used to protect struct members: > > + * a atomic operations > > * I immutable after creation > > * L rtlabel_mtx > > * T rttimer_mtx > > @@ -152,8 +153,9 @@ static uint32_t rt_hashjitter; > > > > extern unsigned int rtmap_limit; > > > > -struct cpumem * rtcounters; > > -int rttrash; /* routes not in table but not freed */ > > +struct cpumem *rtcounters; > > +int rttrash; /* [a] routes not in table but not freed */ > > +u_long rtgeneration; /* [a] generation number, routes changed */ > > > > struct pool rtentry_pool; /* pool for rtentry structures */ > > struct pool rttimer_pool; /* pool for rttimer structures */ > > @@ -199,6 +201,33 @@ route_init(void) > > #endif > > } > > > > +void > > +route_validate(struct route *ro, struct in_addr addr, u_int rtableid) > > +{ > > + u_long gen; > > + > > + gen = atomic_load_long(&rtgeneration); > > + membar_consumer(); > > + > > + if (rtisvalid(ro->ro_rt) && > > + ro->ro_generation == gen && > > + ro->ro_tableid == rtableid && > > + ro->ro_dst.sa_family == AF_INET && > > + satosin(&ro->ro_dst)->sin_addr.s_addr == addr.s_addr) { > > + return; > > + } > > + > > + rtfree(ro->ro_rt); > > + ro->ro_rt = NULL; > > + ro->ro_generation = gen; > > + ro->ro_tableid = rtableid; > > + > > + memset(&ro->ro_dst, 0, sizeof(ro->ro_dst)); > > + satosin(&ro->ro_dst)->sin_family = AF_INET; > > + satosin(&ro->ro_dst)->sin_len = sizeof(struct sockaddr_in); > > + satosin(&ro->ro_dst)->sin_addr = addr; > > +} > > + > > /* > > * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise. > > */ > > @@ -824,6 +853,9 @@ rtrequest_delete(struct rt_addrinfo *inf > > else > > rtfree(rt); > > > > + membar_producer(); > > + atomic_inc_long(&rtgeneration); > > + > > return (0); > > } > > > > @@ -992,6 +1024,10 @@ rtrequest(int req, struct rt_addrinfo *i > > *ret_nrt = rt; > > else > > rtfree(rt); > > + > > + membar_producer(); > > + atomic_inc_long(&rtgeneration); > > + > > break; > > } > > > > @@ -1828,6 +1864,9 @@ rt_if_linkstate_change(struct rtentry *r > > rt->rt_priority | RTP_DOWN, rt); > > } > > if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask)); > > + > > + membar_producer(); > > + atomic_inc_long(&rtgeneration); > > > > return (error); > > } > > Index: net/route.h > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v > > diff -u -p -r1.203 route.h > > --- net/route.h 12 Nov 2023 17:51:40 -0000 1.203 > > +++ net/route.h 29 Jan 2024 22:25:44 -0000 > > @@ -377,6 +377,7 @@ struct sockaddr_rtsearch { > > */ > > struct route { > > struct rtentry *ro_rt; > > + u_long ro_generation; > > u_long ro_tableid; /* u_long because of alignment */ > > struct sockaddr ro_dst; > > }; > > @@ -438,15 +439,18 @@ void rtlabel_unref(u_int16_t); > > #define RT_RESOLVE 1 > > > > extern struct rtstat rtstat; > > +extern u_long rtgeneration; > > > > struct mbuf; > > struct socket; > > struct ifnet; > > +struct in_addr; > > struct sockaddr_in6; > > struct if_ieee80211_data; > > struct bfd_config; > > > > void route_init(void); > > +void route_validate(struct route *, struct in_addr, u_int); > > void rtm_ifchg(struct ifnet *); > > void rtm_ifannounce(struct ifnet *, int); > > void rtm_bfd(struct bfd_config *); > > Index: netinet/ip_input.c > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v > > diff -u -p -r1.387 ip_input.c > > --- netinet/ip_input.c 16 Sep 2023 09:33:27 -0000 1.387 > > +++ netinet/ip_input.c 29 Jan 2024 22:24:13 -0000 > > @@ -1475,7 +1475,6 @@ ip_forward(struct mbuf *m, struct ifnet > > { > > struct mbuf mfake, *mcopy = NULL; > > struct ip *ip = mtod(m, struct ip *); > > - struct sockaddr_in *sin; > > struct route ro; > > int error = 0, type = 0, code = 0, destmtu = 0, fake = 0, len; > > u_int32_t dest; > > @@ -1491,15 +1490,11 @@ ip_forward(struct mbuf *m, struct ifnet > > goto freecopy; > > } > > > > - memset(&ro, 0, sizeof(ro)); > > - sin = satosin(&ro.ro_dst); > > - sin->sin_family = AF_INET; > > - sin->sin_len = sizeof(*sin); > > - sin->sin_addr = ip->ip_dst; > > - > > + ro.ro_rt = NULL; > > + route_validate(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid); > > if (!rtisvalid(rt)) { > > rtfree(rt); > > - rt = rtalloc_mpath(sintosa(sin), &ip->ip_src.s_addr, > > + rt = rtalloc_mpath(&ro.ro_dst, &ip->ip_src.s_addr, > > m->m_pkthdr.ph_rtableid); > > if (rt == NULL) { > > ipstat_inc(ips_noroute); > > @@ -1507,6 +1502,7 @@ ip_forward(struct mbuf *m, struct ifnet > > return; > > } > > } > > + ro.ro_rt = rt; > > > > /* > > * Save at most 68 bytes of the packet in case > > @@ -1557,8 +1553,6 @@ ip_forward(struct mbuf *m, struct ifnet > > } > > } > > > > - ro.ro_rt = rt; > > - ro.ro_tableid = m->m_pkthdr.ph_rtableid; > > error = ip_output(m, NULL, &ro, > > (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), > > NULL, NULL, 0); > > Index: netinet/ip_output.c > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v > > diff -u -p -r1.393 ip_output.c > > --- netinet/ip_output.c 18 Jan 2024 11:03:16 -0000 1.393 > > +++ netinet/ip_output.c 29 Jan 2024 22:24:13 -0000 > > @@ -159,28 +159,15 @@ reroute: > > */ > > if (ro == NULL) { > > ro = &iproute; > > - memset(ro, 0, sizeof(*ro)); > > + ro->ro_rt = NULL; > > } > > > > - dst = satosin(&ro->ro_dst); > > - > > /* > > * If there is a cached route, check that it is to the same > > * destination and is still up. If not, free it and try again. > > */ > > - if (!rtisvalid(ro->ro_rt) || > > - dst->sin_addr.s_addr != ip->ip_dst.s_addr || > > - ro->ro_tableid != m->m_pkthdr.ph_rtableid) { > > - rtfree(ro->ro_rt); > > - ro->ro_rt = NULL; > > - } > > - > > - if (ro->ro_rt == NULL) { > > - dst->sin_family = AF_INET; > > - dst->sin_len = sizeof(*dst); > > - dst->sin_addr = ip->ip_dst; > > - ro->ro_tableid = m->m_pkthdr.ph_rtableid; > > - } > > + route_validate(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid); > > + dst = satosin(&ro->ro_dst); > > > > if ((IN_MULTICAST(ip->ip_dst.s_addr) || > > (ip->ip_dst.s_addr == INADDR_BROADCAST)) && > > Index: netinet6/in6.h > > =================================================================== > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6.h,v > > diff -u -p -r1.112 in6.h > > --- netinet6/in6.h 27 Jan 2024 21:13:46 -0000 1.112 > > +++ netinet6/in6.h 29 Jan 2024 22:31:51 -0000 > > @@ -145,10 +145,11 @@ extern const struct in6_addr in6addr_lin > > > > #if __BSD_VISIBLE > > /* > > - * IPv6 route structure > > + * IPv6 route structure, keep fields in sync with struct route > > */ > > struct route_in6 { > > struct rtentry *ro_rt; > > + u_long ro_generation; > > u_long ro_tableid; /* padded to long for alignment */ > > struct sockaddr_in6 ro_dst; > > }; > > > > -- > :wq Claudio