From: Mark Kettenis Subject: Re: sysctl: unlock IP{,V6}CTL_MULTIPATH To: Alexander Bluhm Cc: mvs@openbsd.org, tech@openbsd.org Date: Sun, 22 Jun 2025 14:22:10 +0200 > Date: Sun, 22 Jun 2025 13:38:56 +0200 > From: Alexander Bluhm > > On Sun, Jun 22, 2025 at 01:04:05PM +0200, Mark Kettenis wrote: > > > Date: Sun, 22 Jun 2025 01:29:56 +0300 > > > From: Vitaliy Makkoveev > > > > > > On Sun, Jun 22, 2025 at 12:07:50AM +0200, Alexander Bluhm wrote: > > > > On Sun, Jun 22, 2025 at 12:52:45AM +0300, Vitaliy Makkoveev wrote: > > > > > On Sat, Jun 21, 2025 at 11:30:32PM +0200, Alexander Bluhm wrote: > > > > > > On Sat, Jun 21, 2025 at 10:21:02PM +0300, Vitaliy Makkoveev wrote: > > > > > > > They both are the same and both do `rtgeneration' update. As I > > > > > > > understand the code, we don't need to serialize `ipmultipath' and > > > > > > > `rtgeneration' update with the netstack, however, want to increment > > > > > > > `rtgeneration' before `ipmultipath' update. IIRC, atomic operations will > > > > > > > not be reordered and we don't need any extra barriers here. > > > > > > > > > > > > This is architecture dependent. membar_enter_after_atomic() is not > > > > > > defined everywhere. I think membar_consumer() is needed here. See > > > > > > comments inline. > > > > > > > > > > > > bluhm > > > > > > > > > > > > atomic_store_int(&ipmultipath, newval); > > > > > > membar_producer(); > > > > > > atomic_inc_long(&rtgeneration); > > > > > > > > > > > > This ensures that route cache gets invalid after ipmultipath changes. > > > > > > You need that as rt_hash() result is based on ipmultipath. That > > > > > > way the ipmultipath loads in rt_hash() and route_cache() can be > > > > > > kept independent. No lock needed. > > > > > > > > > > Could we have the side effects while two threads set `ipmultipath' to > > > > > opposing values? I mean > > > > > > > > > > /* ipmultipath is 1 */ > > > > > atomic_store_int(&ipmultipath, 0); /* thread A */ > > > > > atomic_store_int(&ipmultipath, 1); /* thread B */ > > > > > atomic_inc_long(&rtgeneration); /* thread B */ > > > > > atomic_inc_long(&rtgeneration); /* thread A */ > > > > > > > > > > If so, I like to keep patch serialized. > > > > > > > > As usual with concurrent sysctl, last write wins. There is no point > > > > in serializing interger writes of userland threads. Important is > > > > consistent kernel view. And in your example atomic_inc_long(&rtgeneration) > > > > reloads the routing cache at next use. It does not matter which > > > > thread does it. It just has to happen after writing ipmultipath > > > > on the current CPU. > > > > > > > > bluhm > > > > > > > > > > Ok. Updated diff, with barriers and without locks. > > > > Where are the membar_consumer() calls that match those > > membar_producer() calls? > > net/route.c route_cache() > > gen = atomic_load_long(&rtgeneration); > membar_consumer(); > ... > if (src == NULL || !ipmultipath || Thanks, yes, that does indeed look correct. > > > Index: sys/net/pf.c > > > =================================================================== > > > RCS file: /cvs/src/sys/net/pf.c,v > > > retrieving revision 1.1213 > > > diff -u -p -r1.1213 pf.c > > > --- sys/net/pf.c 9 Jun 2025 20:34:08 -0000 1.1213 > > > +++ sys/net/pf.c 21 Jun 2025 22:25:52 -0000 > > > @@ -6501,7 +6501,7 @@ pf_routable(struct pf_addr *addr, sa_fam > > > dst->sin_family = AF_INET; > > > dst->sin_len = sizeof(*dst); > > > dst->sin_addr = addr->v4; > > > - if (ipmultipath) > > > + if (atomic_load_int(&ipmultipath)) > > > check_mpath = 1; > > > break; > > > #ifdef INET6 > > > @@ -6516,7 +6516,7 @@ pf_routable(struct pf_addr *addr, sa_fam > > > dst6->sin6_family = AF_INET6; > > > dst6->sin6_len = sizeof(*dst6); > > > dst6->sin6_addr = addr->v6; > > > - if (ip6_multipath) > > > + if (atomic_load_int(&ip6_multipath)) > > > check_mpath = 1; > > > break; > > > #endif /* INET6 */ > > > Index: sys/net/route.c > > > =================================================================== > > > RCS file: /cvs/src/sys/net/route.c,v > > > retrieving revision 1.444 > > > diff -u -p -r1.444 route.c > > > --- sys/net/route.c 16 Mar 2025 23:45:06 -0000 1.444 > > > +++ sys/net/route.c 21 Jun 2025 22:25:52 -0000 > > > @@ -215,7 +215,7 @@ route_cache(struct route *ro, const stru > > > ro->ro_tableid == rtableid && > > > ro->ro_dstsa.sa_family == AF_INET && > > > ro->ro_dstsin.sin_addr.s_addr == dst->s_addr) { > > > - if (src == NULL || !ipmultipath || > > > + if (src == NULL || !atomic_load_int(&ipmultipath) || > > > !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) || > > > (ro->ro_srcin.s_addr != INADDR_ANY && > > > ro->ro_srcin.s_addr == src->s_addr)) { > > > @@ -272,7 +272,7 @@ route6_cache(struct route *ro, const str > > > ro->ro_tableid == rtableid && > > > ro->ro_dstsa.sa_family == AF_INET6 && > > > IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) { > > > - if (src == NULL || !ip6_multipath || > > > + if (src == NULL || !atomic_load_int(&ip6_multipath) || > > > !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) || > > > (!IN6_IS_ADDR_UNSPECIFIED(&ro->ro_srcin6) && > > > IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) { > > > @@ -426,7 +426,7 @@ rt_hash(struct rtentry *rt, const struct > > > { > > > const struct sockaddr_in *sin; > > > > > > - if (!ipmultipath) > > > + if (!atomic_load_int(&ipmultipath)) > > > return (-1); > > > > > > sin = satosin_const(dst); > > > @@ -440,7 +440,7 @@ rt_hash(struct rtentry *rt, const struct > > > { > > > const struct sockaddr_in6 *sin6; > > > > > > - if (!ip6_multipath) > > > + if (!atomic_load_int(&ip6_multipath)) > > > return (-1); > > > > > > sin6 = satosin6_const(dst); > > > Index: sys/netinet/ip_input.c > > > =================================================================== > > > RCS file: /cvs/src/sys/netinet/ip_input.c,v > > > retrieving revision 1.410 > > > diff -u -p -r1.410 ip_input.c > > > --- sys/netinet/ip_input.c 21 Jun 2025 14:21:17 -0000 1.410 > > > +++ sys/netinet/ip_input.c 21 Jun 2025 22:25:52 -0000 > > > @@ -93,7 +93,7 @@ > > > /* values controllable via sysctl */ > > > int ip_forwarding = 0; /* [a] */ > > > int ipmforwarding = 0; > > > -int ipmultipath = 0; > > > +int ipmultipath = 0; /* [a] */ > > > int ip_sendredirects = 1; /* [a] */ > > > int ip_dosourceroute = 0; /* [a] */ > > > int ip_defttl = IPDEFTTL; > > > @@ -1817,13 +1817,15 @@ ip_sysctl(int *name, u_int namelen, void > > > return (EOPNOTSUPP); > > > #endif > > > case IPCTL_MULTIPATH: > > > - NET_LOCK(); > > > - oldval = ipmultipath; > > > + oldval = newval = atomic_load_int(&ipmultipath); > > > error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, > > > - &ipmultipath, 0, 1); > > > - if (oldval != ipmultipath) > > > + &newval, 0, 1); > > > + if (error == 0 && oldval != newval) { > > > + atomic_store_int(&ipmultipath, newval); > > > + membar_producer(); > > > atomic_inc_long(&rtgeneration); > > > - NET_UNLOCK(); > > > + } > > > + > > > return (error); > > > case IPCTL_FORWARDING: > > > case IPCTL_SENDREDIRECTS: > > > Index: sys/netinet6/in6_proto.c > > > =================================================================== > > > RCS file: /cvs/src/sys/netinet6/in6_proto.c,v > > > retrieving revision 1.130 > > > diff -u -p -r1.130 in6_proto.c > > > --- sys/netinet6/in6_proto.c 21 Jun 2025 14:21:17 -0000 1.130 > > > +++ sys/netinet6/in6_proto.c 21 Jun 2025 22:25:52 -0000 > > > @@ -359,7 +359,7 @@ const struct domain inet6domain = { > > > */ > > > int ip6_forwarding = 0; /* [a] no forwarding unless sysctl to enable */ > > > int ip6_mforwarding = 0; /* no multicast forwarding unless ... */ > > > -int ip6_multipath = 0; /* no using multipath routes unless ... */ > > > +int ip6_multipath = 0; /* [a] no using multipath routes unless ... */ > > > int ip6_sendredirects = 1; /* [a] */ > > > int ip6_defhlim = IPV6_DEFHLIM; > > > int ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS; > > > Index: sys/netinet6/ip6_input.c > > > =================================================================== > > > RCS file: /cvs/src/sys/netinet6/ip6_input.c,v > > > retrieving revision 1.274 > > > diff -u -p -r1.274 ip6_input.c > > > --- sys/netinet6/ip6_input.c 21 Jun 2025 14:21:17 -0000 1.274 > > > +++ sys/netinet6/ip6_input.c 21 Jun 2025 22:25:52 -0000 > > > @@ -1566,15 +1566,19 @@ ip6_sysctl(int *name, u_int namelen, voi > > > return (sysctl_niq(name + 1, namelen - 1, > > > oldp, oldlenp, newp, newlen, &ip6intrq)); > > > case IPV6CTL_MULTIPATH: { > > > - NET_LOCK(); > > > - int oldval = ip6_multipath; > > > + int oldval, newval; > > > + > > > + oldval = newval = atomic_load_int(&ip6_multipath); > > > error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, > > > - &ip6_multipath, 0, 1); > > > - if (oldval != ip6_multipath) > > > + &newval, 0, 1); > > > + if (error == 0 && oldval != newval) { > > > + atomic_store_int(&ip6_multipath, newval); > > > + membar_producer(); > > > atomic_inc_long(&rtgeneration); > > > - NET_UNLOCK(); > > > + } > > > + > > > return (error); > > > - } > > > + } > > > case IPV6CTL_FORWARDING: > > > case IPV6CTL_SENDREDIRECTS: > > > return (sysctl_bounded_arr( > > > > > > >