Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: sysctl: unlock IP{,V6}CTL_MULTIPATH
To:
Mark Kettenis <mark.kettenis@xs4all.nl>
Cc:
Vitaliy Makkoveev <mvs@openbsd.org>, tech@openbsd.org
Date:
Sun, 22 Jun 2025 13:38:56 +0200

Download raw body.

Thread
On Sun, Jun 22, 2025 at 01:04:05PM +0200, Mark Kettenis wrote:
> > Date: Sun, 22 Jun 2025 01:29:56 +0300
> > From: Vitaliy Makkoveev <mvs@openbsd.org>
> > 
> > On Sun, Jun 22, 2025 at 12:07:50AM +0200, Alexander Bluhm wrote:
> > > On Sun, Jun 22, 2025 at 12:52:45AM +0300, Vitaliy Makkoveev wrote:
> > > > On Sat, Jun 21, 2025 at 11:30:32PM +0200, Alexander Bluhm wrote:
> > > > > On Sat, Jun 21, 2025 at 10:21:02PM +0300, Vitaliy Makkoveev wrote:
> > > > > > They both are the same and both do `rtgeneration' update. As I
> > > > > > understand the code, we don't need to serialize `ipmultipath' and
> > > > > > `rtgeneration' update with the netstack, however, want to increment
> > > > > > `rtgeneration' before `ipmultipath' update. IIRC, atomic operations will
> > > > > > not be reordered and we don't need any extra barriers here.
> > > > > 
> > > > > This is architecture dependent.  membar_enter_after_atomic() is not
> > > > > defined everywhere.  I think membar_consumer() is needed here.  See
> > > > > comments inline.
> > > > > 
> > > > > bluhm
> > > > > 
> > > > >     atomic_store_int(&ipmultipath, newval);
> > > > >     membar_producer();
> > > > >     atomic_inc_long(&rtgeneration);
> > > > > 
> > > > > This ensures that route cache gets invalid after ipmultipath changes.
> > > > > You need that as rt_hash() result is based on ipmultipath.  That
> > > > > way the ipmultipath loads in rt_hash() and route_cache() can be
> > > > > kept independent.  No lock needed.
> > > > 
> > > > Could we have the side effects while two threads set `ipmultipath' to
> > > > opposing values? I mean
> > > > 
> > > > 	/* ipmultipath is 1 */
> > > > 	atomic_store_int(&ipmultipath, 0); /* thread A */
> > > > 	atomic_store_int(&ipmultipath, 1); /* thread B */
> > > > 	atomic_inc_long(&rtgeneration); /* thread B */
> > > > 	atomic_inc_long(&rtgeneration); /* thread A */
> > > > 
> > > > If so, I like to keep patch serialized.
> > > 
> > > As usual with concurrent sysctl, last write wins.  There is no point
> > > in serializing interger writes of userland threads.  Important is
> > > consistent kernel view.  And in your example atomic_inc_long(&rtgeneration)
> > > reloads the routing cache at next use.  It does not matter which
> > > thread does it.  It just has to happen after writing ipmultipath
> > > on the current CPU.
> > > 
> > > bluhm
> > > 
> > 
> > Ok. Updated diff, with barriers and without locks.
> 
> Where are the membar_consumer() calls that match those
> membar_producer() calls?

net/route.c route_cache()

        gen = atomic_load_long(&rtgeneration);
        membar_consumer();
	...
                if (src == NULL || !ipmultipath ||

> > Index: sys/net/pf.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/pf.c,v
> > retrieving revision 1.1213
> > diff -u -p -r1.1213 pf.c
> > --- sys/net/pf.c	9 Jun 2025 20:34:08 -0000	1.1213
> > +++ sys/net/pf.c	21 Jun 2025 22:25:52 -0000
> > @@ -6501,7 +6501,7 @@ pf_routable(struct pf_addr *addr, sa_fam
> >  		dst->sin_family = AF_INET;
> >  		dst->sin_len = sizeof(*dst);
> >  		dst->sin_addr = addr->v4;
> > -		if (ipmultipath)
> > +		if (atomic_load_int(&ipmultipath))
> >  			check_mpath = 1;
> >  		break;
> >  #ifdef INET6
> > @@ -6516,7 +6516,7 @@ pf_routable(struct pf_addr *addr, sa_fam
> >  		dst6->sin6_family = AF_INET6;
> >  		dst6->sin6_len = sizeof(*dst6);
> >  		dst6->sin6_addr = addr->v6;
> > -		if (ip6_multipath)
> > +		if (atomic_load_int(&ip6_multipath))
> >  			check_mpath = 1;
> >  		break;
> >  #endif /* INET6 */
> > Index: sys/net/route.c
> > ===================================================================
> > RCS file: /cvs/src/sys/net/route.c,v
> > retrieving revision 1.444
> > diff -u -p -r1.444 route.c
> > --- sys/net/route.c	16 Mar 2025 23:45:06 -0000	1.444
> > +++ sys/net/route.c	21 Jun 2025 22:25:52 -0000
> > @@ -215,7 +215,7 @@ route_cache(struct route *ro, const stru
> >  	    ro->ro_tableid == rtableid &&
> >  	    ro->ro_dstsa.sa_family == AF_INET &&
> >  	    ro->ro_dstsin.sin_addr.s_addr == dst->s_addr) {
> > -		if (src == NULL || !ipmultipath ||
> > +		if (src == NULL || !atomic_load_int(&ipmultipath) ||
> >  		    !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
> >  		    (ro->ro_srcin.s_addr != INADDR_ANY &&
> >  		    ro->ro_srcin.s_addr == src->s_addr)) {
> > @@ -272,7 +272,7 @@ route6_cache(struct route *ro, const str
> >  	    ro->ro_tableid == rtableid &&
> >  	    ro->ro_dstsa.sa_family == AF_INET6 &&
> >  	    IN6_ARE_ADDR_EQUAL(&ro->ro_dstsin6.sin6_addr, dst)) {
> > -		if (src == NULL || !ip6_multipath ||
> > +		if (src == NULL || !atomic_load_int(&ip6_multipath) ||
> >  		    !ISSET(ro->ro_rt->rt_flags, RTF_MPATH) ||
> >  		    (!IN6_IS_ADDR_UNSPECIFIED(&ro->ro_srcin6) &&
> >  		    IN6_ARE_ADDR_EQUAL(&ro->ro_srcin6, src))) {
> > @@ -426,7 +426,7 @@ rt_hash(struct rtentry *rt, const struct
> >  	    {
> >  		const struct sockaddr_in *sin;
> >  
> > -		if (!ipmultipath)
> > +		if (!atomic_load_int(&ipmultipath))
> >  			return (-1);
> >  
> >  		sin = satosin_const(dst);
> > @@ -440,7 +440,7 @@ rt_hash(struct rtentry *rt, const struct
> >  	    {
> >  		const struct sockaddr_in6 *sin6;
> >  
> > -		if (!ip6_multipath)
> > +		if (!atomic_load_int(&ip6_multipath))
> >  			return (-1);
> >  
> >  		sin6 = satosin6_const(dst);
> > Index: sys/netinet/ip_input.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet/ip_input.c,v
> > retrieving revision 1.410
> > diff -u -p -r1.410 ip_input.c
> > --- sys/netinet/ip_input.c	21 Jun 2025 14:21:17 -0000	1.410
> > +++ sys/netinet/ip_input.c	21 Jun 2025 22:25:52 -0000
> > @@ -93,7 +93,7 @@
> >  /* values controllable via sysctl */
> >  int	ip_forwarding = 0;			/* [a] */
> >  int	ipmforwarding = 0;
> > -int	ipmultipath = 0;
> > +int	ipmultipath = 0;			/* [a] */
> >  int	ip_sendredirects = 1;			/* [a] */
> >  int	ip_dosourceroute = 0;			/* [a] */
> >  int	ip_defttl = IPDEFTTL;
> > @@ -1817,13 +1817,15 @@ ip_sysctl(int *name, u_int namelen, void
> >  		return (EOPNOTSUPP);
> >  #endif
> >  	case IPCTL_MULTIPATH:
> > -		NET_LOCK();
> > -		oldval = ipmultipath;
> > +		oldval = newval = atomic_load_int(&ipmultipath);
> >  		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> > -		    &ipmultipath, 0, 1);
> > -		if (oldval != ipmultipath)
> > +		    &newval, 0, 1);
> > +		if (error == 0 && oldval != newval) {
> > +			atomic_store_int(&ipmultipath, newval);
> > +			membar_producer();
> >  			atomic_inc_long(&rtgeneration);
> > -		NET_UNLOCK();
> > +		}
> > +
> >  		return (error);
> >  	case IPCTL_FORWARDING:
> >  	case IPCTL_SENDREDIRECTS:
> > Index: sys/netinet6/in6_proto.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
> > retrieving revision 1.130
> > diff -u -p -r1.130 in6_proto.c
> > --- sys/netinet6/in6_proto.c	21 Jun 2025 14:21:17 -0000	1.130
> > +++ sys/netinet6/in6_proto.c	21 Jun 2025 22:25:52 -0000
> > @@ -359,7 +359,7 @@ const struct domain inet6domain = {
> >   */
> >  int	ip6_forwarding = 0;	/* [a] no forwarding unless sysctl to enable */
> >  int	ip6_mforwarding = 0;	/* no multicast forwarding unless ... */
> > -int	ip6_multipath = 0;	/* no using multipath routes unless ... */
> > +int	ip6_multipath = 0;	/* [a] no using multipath routes unless ... */
> >  int	ip6_sendredirects = 1;	/* [a] */
> >  int	ip6_defhlim = IPV6_DEFHLIM;
> >  int	ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS;
> > Index: sys/netinet6/ip6_input.c
> > ===================================================================
> > RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> > retrieving revision 1.274
> > diff -u -p -r1.274 ip6_input.c
> > --- sys/netinet6/ip6_input.c	21 Jun 2025 14:21:17 -0000	1.274
> > +++ sys/netinet6/ip6_input.c	21 Jun 2025 22:25:52 -0000
> > @@ -1566,15 +1566,19 @@ ip6_sysctl(int *name, u_int namelen, voi
> >  		return (sysctl_niq(name + 1, namelen - 1,
> >  		    oldp, oldlenp, newp, newlen, &ip6intrq));
> >  	case IPV6CTL_MULTIPATH: {
> > -		NET_LOCK();
> > -		int oldval = ip6_multipath;
> > +		int oldval, newval;
> > +
> > +		oldval = newval = atomic_load_int(&ip6_multipath);
> >  		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> > -		    &ip6_multipath, 0, 1);
> > -		if (oldval != ip6_multipath)
> > +		    &newval, 0, 1);
> > +		if (error == 0 && oldval != newval) {
> > +			atomic_store_int(&ip6_multipath, newval);
> > +			membar_producer();
> >  			atomic_inc_long(&rtgeneration);
> > -		NET_UNLOCK();
> > +		}
> > +
> >  		return (error);
> > -	    }
> > +	}
> >  	case IPV6CTL_FORWARDING:
> >  	case IPV6CTL_SENDREDIRECTS:
> >  		return (sysctl_bounded_arr(
> > 
> >