From: Alexander Bluhm Subject: Re: route generation number for route cache To: Mark Kettenis , tech@openbsd.org, Claudio Jeker Date: Wed, 31 Jan 2024 14:17:21 +0100 On Wed, Jan 31, 2024 at 10:53:09AM +0100, Claudio Jeker wrote: > Fine by me. It is a bummer that our atomic functions are not memory model > aware. It seems like one of those footguns that just keep lingering around > and fire at random. Atomic operation is for changing one memory location from different CPU simultanously. Memory barrier keeps the view on different memory locations consistent over multiple CPU. For generation numbers you need both. That is the model our CPU, C compiler, and kernel have. Maybe C11 is smarter, but that does not matter for this diff. > Why is the function called route_validate()? I find this name very > confusing since it does not really validate the route. It actually > invalidates the cached route object if needed. > > Isn't there a better name for this? Maybe route_cachecheck() or some other > form. I was not happy with the name either. Just call it route_cache() for now. > Last but not least I despise struct route it is one of those things that > should not exists like this since it embedds a struct sockaddr and for > IPv6 there needs to be a different struct with bigger size (struct > route_in6). There was generic struct route for all domains. Then came IPv6 which does not fit. All other domains were removed, so generic struct route is IPv4 only. We should have struct route_in and struct route_in6 which contain specific sockaddr. I can rename that in a later diff. > The cherry on top of this turd is that it holds copies of data > that are readily available (which your diff shows here). I don't get that. Route cache has route, generation number, routing table and socket address. If the latter match and the route is valid, the cache is valid. I don't see redundancy. Or do you think struct route_in6 should have in6_addr instead of sockaddr_in6? rtalloc() takes a struct sockaddr so it is handy to have a sockaddr_in or sockaddr_in6 in the cache. Anyway, I would like to get this in. I have IPv6 follow up diffs and renaming struct route now would make merging hard. Can I get the generation nummber commited first and then make the cache more consistent? New diff, only function route_cache() renamed. ok? bluhm Index: net/route.c =================================================================== RCS file: /cvs/src/sys/net/route.c,v diff -u -p -r1.426 route.c --- net/route.c 13 Nov 2023 17:18:27 -0000 1.426 +++ net/route.c 31 Jan 2024 12:55:19 -0000 @@ -140,6 +140,7 @@ /* * Locks used to protect struct members: + * a atomic operations * I immutable after creation * L rtlabel_mtx * T rttimer_mtx @@ -152,8 +153,9 @@ static uint32_t rt_hashjitter; extern unsigned int rtmap_limit; -struct cpumem * rtcounters; -int rttrash; /* routes not in table but not freed */ +struct cpumem *rtcounters; +int rttrash; /* [a] routes not in table but not freed */ +u_long rtgeneration; /* [a] generation number, routes changed */ struct pool rtentry_pool; /* pool for rtentry structures */ struct pool rttimer_pool; /* pool for rttimer structures */ @@ -199,6 +201,33 @@ route_init(void) #endif } +void +route_cache(struct route *ro, struct in_addr addr, u_int rtableid) +{ + u_long gen; + + gen = atomic_load_long(&rtgeneration); + membar_consumer(); + + if (rtisvalid(ro->ro_rt) && + ro->ro_generation == gen && + ro->ro_tableid == rtableid && + ro->ro_dst.sa_family == AF_INET && + satosin(&ro->ro_dst)->sin_addr.s_addr == addr.s_addr) { + return; + } + + rtfree(ro->ro_rt); + ro->ro_rt = NULL; + ro->ro_generation = gen; + ro->ro_tableid = rtableid; + + memset(&ro->ro_dst, 0, sizeof(ro->ro_dst)); + satosin(&ro->ro_dst)->sin_family = AF_INET; + satosin(&ro->ro_dst)->sin_len = sizeof(struct sockaddr_in); + satosin(&ro->ro_dst)->sin_addr = addr; +} + /* * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise. */ @@ -824,6 +853,9 @@ rtrequest_delete(struct rt_addrinfo *inf else rtfree(rt); + membar_producer(); + atomic_inc_long(&rtgeneration); + return (0); } @@ -992,6 +1024,10 @@ rtrequest(int req, struct rt_addrinfo *i *ret_nrt = rt; else rtfree(rt); + + membar_producer(); + atomic_inc_long(&rtgeneration); + break; } @@ -1828,6 +1864,9 @@ rt_if_linkstate_change(struct rtentry *r rt->rt_priority | RTP_DOWN, rt); } if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask)); + + membar_producer(); + atomic_inc_long(&rtgeneration); return (error); } Index: net/route.h =================================================================== RCS file: /cvs/src/sys/net/route.h,v diff -u -p -r1.203 route.h --- net/route.h 12 Nov 2023 17:51:40 -0000 1.203 +++ net/route.h 31 Jan 2024 12:55:19 -0000 @@ -377,6 +377,7 @@ struct sockaddr_rtsearch { */ struct route { struct rtentry *ro_rt; + u_long ro_generation; u_long ro_tableid; /* u_long because of alignment */ struct sockaddr ro_dst; }; @@ -438,15 +439,18 @@ void rtlabel_unref(u_int16_t); #define RT_RESOLVE 1 extern struct rtstat rtstat; +extern u_long rtgeneration; struct mbuf; struct socket; struct ifnet; +struct in_addr; struct sockaddr_in6; struct if_ieee80211_data; struct bfd_config; void route_init(void); +void route_cache(struct route *, struct in_addr, u_int); void rtm_ifchg(struct ifnet *); void rtm_ifannounce(struct ifnet *, int); void rtm_bfd(struct bfd_config *); Index: netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v diff -u -p -r1.387 ip_input.c --- netinet/ip_input.c 16 Sep 2023 09:33:27 -0000 1.387 +++ netinet/ip_input.c 31 Jan 2024 12:55:20 -0000 @@ -1475,7 +1475,6 @@ ip_forward(struct mbuf *m, struct ifnet { struct mbuf mfake, *mcopy = NULL; struct ip *ip = mtod(m, struct ip *); - struct sockaddr_in *sin; struct route ro; int error = 0, type = 0, code = 0, destmtu = 0, fake = 0, len; u_int32_t dest; @@ -1491,15 +1490,11 @@ ip_forward(struct mbuf *m, struct ifnet goto freecopy; } - memset(&ro, 0, sizeof(ro)); - sin = satosin(&ro.ro_dst); - sin->sin_family = AF_INET; - sin->sin_len = sizeof(*sin); - sin->sin_addr = ip->ip_dst; - + ro.ro_rt = NULL; + route_cache(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid); if (!rtisvalid(rt)) { rtfree(rt); - rt = rtalloc_mpath(sintosa(sin), &ip->ip_src.s_addr, + rt = rtalloc_mpath(&ro.ro_dst, &ip->ip_src.s_addr, m->m_pkthdr.ph_rtableid); if (rt == NULL) { ipstat_inc(ips_noroute); @@ -1507,6 +1502,7 @@ ip_forward(struct mbuf *m, struct ifnet return; } } + ro.ro_rt = rt; /* * Save at most 68 bytes of the packet in case @@ -1557,8 +1553,6 @@ ip_forward(struct mbuf *m, struct ifnet } } - ro.ro_rt = rt; - ro.ro_tableid = m->m_pkthdr.ph_rtableid; error = ip_output(m, NULL, &ro, (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), NULL, NULL, 0); Index: netinet/ip_output.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_output.c,v diff -u -p -r1.393 ip_output.c --- netinet/ip_output.c 18 Jan 2024 11:03:16 -0000 1.393 +++ netinet/ip_output.c 31 Jan 2024 12:55:20 -0000 @@ -159,28 +159,15 @@ reroute: */ if (ro == NULL) { ro = &iproute; - memset(ro, 0, sizeof(*ro)); + ro->ro_rt = NULL; } - dst = satosin(&ro->ro_dst); - /* * If there is a cached route, check that it is to the same * destination and is still up. If not, free it and try again. */ - if (!rtisvalid(ro->ro_rt) || - dst->sin_addr.s_addr != ip->ip_dst.s_addr || - ro->ro_tableid != m->m_pkthdr.ph_rtableid) { - rtfree(ro->ro_rt); - ro->ro_rt = NULL; - } - - if (ro->ro_rt == NULL) { - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr = ip->ip_dst; - ro->ro_tableid = m->m_pkthdr.ph_rtableid; - } + route_cache(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid); + dst = satosin(&ro->ro_dst); if ((IN_MULTICAST(ip->ip_dst.s_addr) || (ip->ip_dst.s_addr == INADDR_BROADCAST)) && Index: netinet6/in6.h =================================================================== RCS file: /cvs/src/sys/netinet6/in6.h,v diff -u -p -r1.112 in6.h --- netinet6/in6.h 27 Jan 2024 21:13:46 -0000 1.112 +++ netinet6/in6.h 31 Jan 2024 12:55:20 -0000 @@ -145,10 +145,11 @@ extern const struct in6_addr in6addr_lin #if __BSD_VISIBLE /* - * IPv6 route structure + * IPv6 route structure, keep fields in sync with struct route */ struct route_in6 { struct rtentry *ro_rt; + u_long ro_generation; u_long ro_tableid; /* padded to long for alignment */ struct sockaddr_in6 ro_dst; };