Download raw body.
route generation number for route cache
On Tue, Jan 30, 2024 at 02:35:35PM +0100, Claudio Jeker wrote:
> On Tue, Jan 30, 2024 at 01:43:34PM +0100, Alexander Bluhm wrote:
> > Hi,
> >
> > The outgoing route is cached at the inpcb. This cache is only
> > invalidated when the socket closes or if the route gets invalid.
> > More specific routes are not detected. Especially with dynamic
> > routing protocols, sockets must be closed and reopened to use the
> > correct route. Running ping during a route change shows the problem.
> >
> > To solve this, I added a route generation number that is updated
> > whenever the routing table changes. The lookup in struct route is
> > put into the route_validate() API. If the generation number is too
> > old, the cached route gets discarded.
> >
> > This is the implementation for ip_output() and ip_forward(). IPv6
> > and more places will follow.
> >
> > ok?
>
> I find the use of membar_producer and membar_consumer together with atomic
> operations a questionable pattern. I would expect the use of
> membar_exit_before_atomic() and membar_enter_after_atomic().
membar_exit_before_atomic() and membar_enter_after_atomic() affect
both, load and sotre. I am only interessted in load and store for
each case. So I think membar_producer() and membar_consumer()
are more specific.
membar_enter_after_atomic() and membar_exit_before_atomic() exist
for architectures where they are less epxensove than membar_enter()
membar_exit().
On amd64 everything is just a __membar("").
On arm64 the ..._atomic() funtions choose the more expensive
operations of membar_enter() and membar_exit(). For generation
number this too much.
> > Index: net/route.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
> > diff -u -p -r1.426 route.c
> > --- net/route.c 13 Nov 2023 17:18:27 -0000 1.426
> > +++ net/route.c 29 Jan 2024 22:25:44 -0000
> > @@ -140,6 +140,7 @@
> >
> > /*
> > * Locks used to protect struct members:
> > + * a atomic operations
> > * I immutable after creation
> > * L rtlabel_mtx
> > * T rttimer_mtx
> > @@ -152,8 +153,9 @@ static uint32_t rt_hashjitter;
> >
> > extern unsigned int rtmap_limit;
> >
> > -struct cpumem * rtcounters;
> > -int rttrash; /* routes not in table but not freed */
> > +struct cpumem *rtcounters;
> > +int rttrash; /* [a] routes not in table but not freed */
> > +u_long rtgeneration; /* [a] generation number, routes changed */
> >
> > struct pool rtentry_pool; /* pool for rtentry structures */
> > struct pool rttimer_pool; /* pool for rttimer structures */
> > @@ -199,6 +201,33 @@ route_init(void)
> > #endif
> > }
> >
> > +void
> > +route_validate(struct route *ro, struct in_addr addr, u_int rtableid)
> > +{
> > + u_long gen;
> > +
> > + gen = atomic_load_long(&rtgeneration);
> > + membar_consumer();
> > +
> > + if (rtisvalid(ro->ro_rt) &&
> > + ro->ro_generation == gen &&
> > + ro->ro_tableid == rtableid &&
> > + ro->ro_dst.sa_family == AF_INET &&
> > + satosin(&ro->ro_dst)->sin_addr.s_addr == addr.s_addr) {
> > + return;
> > + }
> > +
> > + rtfree(ro->ro_rt);
> > + ro->ro_rt = NULL;
> > + ro->ro_generation = gen;
> > + ro->ro_tableid = rtableid;
> > +
> > + memset(&ro->ro_dst, 0, sizeof(ro->ro_dst));
> > + satosin(&ro->ro_dst)->sin_family = AF_INET;
> > + satosin(&ro->ro_dst)->sin_len = sizeof(struct sockaddr_in);
> > + satosin(&ro->ro_dst)->sin_addr = addr;
> > +}
> > +
> > /*
> > * Returns 1 if the (cached) ``rt'' entry is still valid, 0 otherwise.
> > */
> > @@ -824,6 +853,9 @@ rtrequest_delete(struct rt_addrinfo *inf
> > else
> > rtfree(rt);
> >
> > + membar_producer();
> > + atomic_inc_long(&rtgeneration);
> > +
> > return (0);
> > }
> >
> > @@ -992,6 +1024,10 @@ rtrequest(int req, struct rt_addrinfo *i
> > *ret_nrt = rt;
> > else
> > rtfree(rt);
> > +
> > + membar_producer();
> > + atomic_inc_long(&rtgeneration);
> > +
> > break;
> > }
> >
> > @@ -1828,6 +1864,9 @@ rt_if_linkstate_change(struct rtentry *r
> > rt->rt_priority | RTP_DOWN, rt);
> > }
> > if_group_routechange(rt_key(rt), rt_plen2mask(rt, &sa_mask));
> > +
> > + membar_producer();
> > + atomic_inc_long(&rtgeneration);
> >
> > return (error);
> > }
> > Index: net/route.h
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
> > diff -u -p -r1.203 route.h
> > --- net/route.h 12 Nov 2023 17:51:40 -0000 1.203
> > +++ net/route.h 29 Jan 2024 22:25:44 -0000
> > @@ -377,6 +377,7 @@ struct sockaddr_rtsearch {
> > */
> > struct route {
> > struct rtentry *ro_rt;
> > + u_long ro_generation;
> > u_long ro_tableid; /* u_long because of alignment */
> > struct sockaddr ro_dst;
> > };
> > @@ -438,15 +439,18 @@ void rtlabel_unref(u_int16_t);
> > #define RT_RESOLVE 1
> >
> > extern struct rtstat rtstat;
> > +extern u_long rtgeneration;
> >
> > struct mbuf;
> > struct socket;
> > struct ifnet;
> > +struct in_addr;
> > struct sockaddr_in6;
> > struct if_ieee80211_data;
> > struct bfd_config;
> >
> > void route_init(void);
> > +void route_validate(struct route *, struct in_addr, u_int);
> > void rtm_ifchg(struct ifnet *);
> > void rtm_ifannounce(struct ifnet *, int);
> > void rtm_bfd(struct bfd_config *);
> > Index: netinet/ip_input.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
> > diff -u -p -r1.387 ip_input.c
> > --- netinet/ip_input.c 16 Sep 2023 09:33:27 -0000 1.387
> > +++ netinet/ip_input.c 29 Jan 2024 22:24:13 -0000
> > @@ -1475,7 +1475,6 @@ ip_forward(struct mbuf *m, struct ifnet
> > {
> > struct mbuf mfake, *mcopy = NULL;
> > struct ip *ip = mtod(m, struct ip *);
> > - struct sockaddr_in *sin;
> > struct route ro;
> > int error = 0, type = 0, code = 0, destmtu = 0, fake = 0, len;
> > u_int32_t dest;
> > @@ -1491,15 +1490,11 @@ ip_forward(struct mbuf *m, struct ifnet
> > goto freecopy;
> > }
> >
> > - memset(&ro, 0, sizeof(ro));
> > - sin = satosin(&ro.ro_dst);
> > - sin->sin_family = AF_INET;
> > - sin->sin_len = sizeof(*sin);
> > - sin->sin_addr = ip->ip_dst;
> > -
> > + ro.ro_rt = NULL;
> > + route_validate(&ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
> > if (!rtisvalid(rt)) {
> > rtfree(rt);
> > - rt = rtalloc_mpath(sintosa(sin), &ip->ip_src.s_addr,
> > + rt = rtalloc_mpath(&ro.ro_dst, &ip->ip_src.s_addr,
> > m->m_pkthdr.ph_rtableid);
> > if (rt == NULL) {
> > ipstat_inc(ips_noroute);
> > @@ -1507,6 +1502,7 @@ ip_forward(struct mbuf *m, struct ifnet
> > return;
> > }
> > }
> > + ro.ro_rt = rt;
> >
> > /*
> > * Save at most 68 bytes of the packet in case
> > @@ -1557,8 +1553,6 @@ ip_forward(struct mbuf *m, struct ifnet
> > }
> > }
> >
> > - ro.ro_rt = rt;
> > - ro.ro_tableid = m->m_pkthdr.ph_rtableid;
> > error = ip_output(m, NULL, &ro,
> > (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
> > NULL, NULL, 0);
> > Index: netinet/ip_output.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
> > diff -u -p -r1.393 ip_output.c
> > --- netinet/ip_output.c 18 Jan 2024 11:03:16 -0000 1.393
> > +++ netinet/ip_output.c 29 Jan 2024 22:24:13 -0000
> > @@ -159,28 +159,15 @@ reroute:
> > */
> > if (ro == NULL) {
> > ro = &iproute;
> > - memset(ro, 0, sizeof(*ro));
> > + ro->ro_rt = NULL;
> > }
> >
> > - dst = satosin(&ro->ro_dst);
> > -
> > /*
> > * If there is a cached route, check that it is to the same
> > * destination and is still up. If not, free it and try again.
> > */
> > - if (!rtisvalid(ro->ro_rt) ||
> > - dst->sin_addr.s_addr != ip->ip_dst.s_addr ||
> > - ro->ro_tableid != m->m_pkthdr.ph_rtableid) {
> > - rtfree(ro->ro_rt);
> > - ro->ro_rt = NULL;
> > - }
> > -
> > - if (ro->ro_rt == NULL) {
> > - dst->sin_family = AF_INET;
> > - dst->sin_len = sizeof(*dst);
> > - dst->sin_addr = ip->ip_dst;
> > - ro->ro_tableid = m->m_pkthdr.ph_rtableid;
> > - }
> > + route_validate(ro, ip->ip_dst, m->m_pkthdr.ph_rtableid);
> > + dst = satosin(&ro->ro_dst);
> >
> > if ((IN_MULTICAST(ip->ip_dst.s_addr) ||
> > (ip->ip_dst.s_addr == INADDR_BROADCAST)) &&
> > Index: netinet6/in6.h
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6.h,v
> > diff -u -p -r1.112 in6.h
> > --- netinet6/in6.h 27 Jan 2024 21:13:46 -0000 1.112
> > +++ netinet6/in6.h 29 Jan 2024 22:31:51 -0000
> > @@ -145,10 +145,11 @@ extern const struct in6_addr in6addr_lin
> >
> > #if __BSD_VISIBLE
> > /*
> > - * IPv6 route structure
> > + * IPv6 route structure, keep fields in sync with struct route
> > */
> > struct route_in6 {
> > struct rtentry *ro_rt;
> > + u_long ro_generation;
> > u_long ro_tableid; /* padded to long for alignment */
> > struct sockaddr_in6 ro_dst;
> > };
> >
>
> --
> :wq Claudio
route generation number for route cache