Download raw body.
sysctl: unlock IP{,V6}CTL_MULTIPATH
On Sun, Jun 22, 2025 at 12:07:50AM +0200, Alexander Bluhm wrote:
> On Sun, Jun 22, 2025 at 12:52:45AM +0300, Vitaliy Makkoveev wrote:
> > On Sat, Jun 21, 2025 at 11:30:32PM +0200, Alexander Bluhm wrote:
> > > On Sat, Jun 21, 2025 at 10:21:02PM +0300, Vitaliy Makkoveev wrote:
> > > > They both are the same and both do `rtgeneration' update. As I
> > > > understand the code, we don't need to serialize `ipmultipath' and
> > > > `rtgeneration' update with the netstack, however, want to increment
> > > > `rtgeneration' before `ipmultipath' update. IIRC, atomic operations will
> > > > not be reordered and we don't need any extra barriers here.
> > >
> > > This is architecture dependent. membar_enter_after_atomic() is not
> > > defined everywhere. I think membar_consumer() is needed here. See
> > > comments inline.
> > >
> > > bluhm
> > >
> > > atomic_store_int(&ipmultipath, newval);
> > > membar_producer();
> > > atomic_inc_long(&rtgeneration);
> > >
> > > This ensures that route cache gets invalid after ipmultipath changes.
> > > You need that as rt_hash() result is based on ipmultipath. That
> > > way the ipmultipath loads in rt_hash() and route_cache() can be
> > > kept independent. No lock needed.
> >
> > Could we have the side effects while two threads set `ipmultipath' to
> > opposing values? I mean
> >
> > /* ipmultipath is 1 */
> > atomic_store_int(&ipmultipath, 0); /* thread A */
> > atomic_store_int(&ipmultipath, 1); /* thread B */
> > atomic_inc_long(&rtgeneration); /* thread B */
> > atomic_inc_long(&rtgeneration); /* thread A */
> >
> > If so, I like to keep patch serialized.
>
> As usual with concurrent sysctl, last write wins. There is no point
> in serializing interger writes of userland threads. Important is
> consistent kernel view. And in your example atomic_inc_long(&rtgeneration)
> reloads the routing cache at next use. It does not matter which
> thread does it. It just has to happen after writing ipmultipath
> on the current CPU.
>
> bluhm
>
Ok. Updated diff, with barriers and without locks.
Index: sys/net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1213
diff -u -p -r1.1213 pf.c
--- sys/net/pf.c 9 Jun 2025 20:34:08 -0000 1.1213
+++ sys/net/pf.c 21 Jun 2025 22:25:52 -0000
@@ -6501,7 +6501,7 @@ pf_routable(struct pf_addr *addr, sa_fam
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr = addr->v4;
- if (ipmultipath)
+ if (atomic_load_int(&ipmultipath))
check_mpath = 1;
break;
#ifdef INET6
@@ -6516,7 +6516,7 @@ pf_routable(struct pf_addr *addr, sa_fam
dst6->sin6_family = AF_INET6;
dst6->sin6_len = sizeof(*dst6);
dst6->sin6_addr = addr->v6;
- if (ip6_multipath)
+ if (atomic_load_int(&ip6_multipath))
check_mpath = 1;
break;
#endif /* INET6 */
Index: sys/net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.444
diff -u -p -r1.444 route.c
--- sys/net/route.c 16 Mar 2025 23:45:06 -0000 1.444
+++ sys/net/route.c 21 Jun 2025 22:25:52 -0000
@@ -215,7 +215,7 @@ route_cache(struct route *ro, const stru
ro->ro_tableid == rtableid &&
ro->ro_dstsa.sa_family == AF_INET &&
ro->ro_dstsin.sin_addr.s_addr == dst->s_addr) {
- if (src == NULL || !ipmultipath ||
+ if (src == NULL || !atomic_load_int(&ipmultipath) ||
!ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
(ro->ro_srcin.s_addr != INADDR_ANY &&
ro->ro_srcin.s_addr == src->s_addr)) {
@@ -272,7 +272,7 @@ route6_cache(struct route *ro, const str
ro->ro_tableid == rtableid &&
ro->ro_dstsa.sa_family == AF_INET6 &&
IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) {
- if (src == NULL || !ip6_multipath ||
+ if (src == NULL || !atomic_load_int(&ip6_multipath) ||
!ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
(!IN6_IS_ADDR_UNSPECIFIED(&ro->ro_srcin6) &&
IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) {
@@ -426,7 +426,7 @@ rt_hash(struct rtentry *rt, const struct
{
const struct sockaddr_in *sin;
- if (!ipmultipath)
+ if (!atomic_load_int(&ipmultipath))
return (-1);
sin = satosin_const(dst);
@@ -440,7 +440,7 @@ rt_hash(struct rtentry *rt, const struct
{
const struct sockaddr_in6 *sin6;
- if (!ip6_multipath)
+ if (!atomic_load_int(&ip6_multipath))
return (-1);
sin6 = satosin6_const(dst);
Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.410
diff -u -p -r1.410 ip_input.c
--- sys/netinet/ip_input.c 21 Jun 2025 14:21:17 -0000 1.410
+++ sys/netinet/ip_input.c 21 Jun 2025 22:25:52 -0000
@@ -93,7 +93,7 @@
/* values controllable via sysctl */
int ip_forwarding = 0; /* [a] */
int ipmforwarding = 0;
-int ipmultipath = 0;
+int ipmultipath = 0; /* [a] */
int ip_sendredirects = 1; /* [a] */
int ip_dosourceroute = 0; /* [a] */
int ip_defttl = IPDEFTTL;
@@ -1817,13 +1817,15 @@ ip_sysctl(int *name, u_int namelen, void
return (EOPNOTSUPP);
#endif
case IPCTL_MULTIPATH:
- NET_LOCK();
- oldval = ipmultipath;
+ oldval = newval = atomic_load_int(&ipmultipath);
error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
- &ipmultipath, 0, 1);
- if (oldval != ipmultipath)
+ &newval, 0, 1);
+ if (error == 0 && oldval != newval) {
+ atomic_store_int(&ipmultipath, newval);
+ membar_producer();
atomic_inc_long(&rtgeneration);
- NET_UNLOCK();
+ }
+
return (error);
case IPCTL_FORWARDING:
case IPCTL_SENDREDIRECTS:
Index: sys/netinet6/in6_proto.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.130
diff -u -p -r1.130 in6_proto.c
--- sys/netinet6/in6_proto.c 21 Jun 2025 14:21:17 -0000 1.130
+++ sys/netinet6/in6_proto.c 21 Jun 2025 22:25:52 -0000
@@ -359,7 +359,7 @@ const struct domain inet6domain = {
*/
int ip6_forwarding = 0; /* [a] no forwarding unless sysctl to enable */
int ip6_mforwarding = 0; /* no multicast forwarding unless ... */
-int ip6_multipath = 0; /* no using multipath routes unless ... */
+int ip6_multipath = 0; /* [a] no using multipath routes unless ... */
int ip6_sendredirects = 1; /* [a] */
int ip6_defhlim = IPV6_DEFHLIM;
int ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS;
Index: sys/netinet6/ip6_input.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.274
diff -u -p -r1.274 ip6_input.c
--- sys/netinet6/ip6_input.c 21 Jun 2025 14:21:17 -0000 1.274
+++ sys/netinet6/ip6_input.c 21 Jun 2025 22:25:52 -0000
@@ -1566,15 +1566,19 @@ ip6_sysctl(int *name, u_int namelen, voi
return (sysctl_niq(name + 1, namelen - 1,
oldp, oldlenp, newp, newlen, &ip6intrq));
case IPV6CTL_MULTIPATH: {
- NET_LOCK();
- int oldval = ip6_multipath;
+ int oldval, newval;
+
+ oldval = newval = atomic_load_int(&ip6_multipath);
error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
- &ip6_multipath, 0, 1);
- if (oldval != ip6_multipath)
+ &newval, 0, 1);
+ if (error == 0 && oldval != newval) {
+ atomic_store_int(&ip6_multipath, newval);
+ membar_producer();
atomic_inc_long(&rtgeneration);
- NET_UNLOCK();
+ }
+
return (error);
- }
+ }
case IPV6CTL_FORWARDING:
case IPV6CTL_SENDREDIRECTS:
return (sysctl_bounded_arr(
sysctl: unlock IP{,V6}CTL_MULTIPATH