Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: sysctl: unlock IP{,V6}CTL_MTUDISCTIMEOUT
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
tech@openbsd.org
Date:
Wed, 18 Jun 2025 21:59:06 +0200

Download raw body.

Thread
On Wed, Jun 18, 2025 at 10:39:36PM +0300, Vitaliy Makkoveev wrote:
> They are identical, so unlock them both. I want to serialize the new
> value assignment with the following rt_timer_queue_change() because on
> hybrid CPU the thread on faster core could be executed between them.
> I used the new temporary `ip_sysctl_lock' rwlock(9) for serialization.
> Netlock is not required, so do not stop packets processing.

Can you put "extern struct rwlock ip_sysctl_lock" into netinet/ip_var.h?
I don't like extern in C files as then the code may get out of sync.

OK bluhm@

> Index: sys/netinet/ip_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.409
> diff -u -p -r1.409 ip_input.c
> --- sys/netinet/ip_input.c	12 Jun 2025 20:37:59 -0000	1.409
> +++ sys/netinet/ip_input.c	18 Jun 2025 19:17:01 -0000
> @@ -98,7 +98,7 @@ int	ip_sendredirects = 1;			/* [a] */
>  int	ip_dosourceroute = 0;			/* [a] */
>  int	ip_defttl = IPDEFTTL;
>  int	ip_mtudisc = 1;
> -int	ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
> +int	ip_mtudisc_timeout = IPMTUDISCTIMEOUT;	/* [a] */
>  int	ip_directedbcast = 0;			/* [a] */
>  
>  /* Protects `ipq' and `ip_frags'. */
> @@ -1723,11 +1723,15 @@ ip_forward(struct mbuf *m, struct ifnet 
>  }
>  
>  #ifndef SMALL_KERNEL
> +
> +/* Temporary, to avoid sysctl_lock recursion. */
> +struct rwlock ip_sysctl_lock = RWLOCK_INITIALIZER("ipslk");
> +
>  int
>  ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
>      size_t newlen)
>  {
> -	int oldval, error;
> +	int oldval, newval, error;
>  
>  	/* Almost all sysctl names at this level are terminal. */
>  	if (namelen != 1 && name[0] != IPCTL_IFQUEUE &&
> @@ -1746,12 +1750,16 @@ ip_sysctl(int *name, u_int namelen, void
>  		NET_UNLOCK();
>  		return error;
>  	case IPCTL_MTUDISCTIMEOUT:
> -		NET_LOCK();
> +		oldval = newval = atomic_load_int(&ip_mtudisc_timeout);
>  		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> -		    &ip_mtudisc_timeout, 0, INT_MAX);
> -		rt_timer_queue_change(&ip_mtudisc_timeout_q,
> -		    ip_mtudisc_timeout);
> -		NET_UNLOCK();
> +		    &newval, 0, INT_MAX);
> +		if (error == 0 && oldval != newval) {
> +			rw_enter_write(&ip_sysctl_lock);
> +			atomic_store_int(&ip_mtudisc_timeout, newval);
> +			rt_timer_queue_change(&ip_mtudisc_timeout_q, newval);
> +			rw_exit_write(&ip_sysctl_lock);
> +		}
> +
>  		return (error);
>  #ifdef IPSEC
>  	case IPCTL_ENCDEBUG:
> Index: sys/netinet/ipsec_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ipsec_input.c,v
> retrieving revision 1.218
> diff -u -p -r1.218 ipsec_input.c
> --- sys/netinet/ipsec_input.c	16 Jun 2025 07:11:58 -0000	1.218
> +++ sys/netinet/ipsec_input.c	18 Jun 2025 19:17:01 -0000
> @@ -928,7 +928,8 @@ ipsec_set_mtu(struct tdb *tdbp, u_int32_
>  
>  		/* Store adjusted MTU in tdb */
>  		tdbp->tdb_mtu = mtu;
> -		tdbp->tdb_mtutimeout = gettime() + ip_mtudisc_timeout;
> +		tdbp->tdb_mtutimeout = gettime() +
> +		    atomic_load_int(&ip_mtudisc_timeout);
>  		DPRINTF("spi %08x mtu %d adjust %ld",
>  		    ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust);
>  	}
> Index: sys/netinet/ipsec_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ipsec_output.c,v
> retrieving revision 1.103
> diff -u -p -r1.103 ipsec_output.c
> --- sys/netinet/ipsec_output.c	16 Jun 2025 07:11:58 -0000	1.103
> +++ sys/netinet/ipsec_output.c	18 Jun 2025 19:17:01 -0000
> @@ -650,7 +650,8 @@ ipsec_adjust_mtu(struct mbuf *m, u_int32
>  
>  		mtu -= adjust;
>  		tdbp->tdb_mtu = mtu;
> -		tdbp->tdb_mtutimeout = gettime() + ip_mtudisc_timeout;
> +		tdbp->tdb_mtutimeout = gettime() +
> +		    atomic_load_int(&ip_mtudisc_timeout);
>  		DPRINTF("spi %08x mtu %d adjust %ld mbuf %p",
>  		    ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust, m);
>  		tdb_unref(tdbp);
> Index: sys/netinet6/in6_proto.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
> retrieving revision 1.129
> diff -u -p -r1.129 in6_proto.c
> --- sys/netinet6/in6_proto.c	16 Jun 2025 07:11:58 -0000	1.129
> +++ sys/netinet6/in6_proto.c	18 Jun 2025 19:17:01 -0000
> @@ -350,6 +350,11 @@ const struct domain inet6domain = {
>  };
>  
>  /*
> + * Locks used to protect global variables in this file:
> + *	a	atomic operations
> + */
> +
> +/*
>   * Internet configuration info
>   */
>  int	ip6_forwarding = 0;	/* [a] no forwarding unless sysctl to enable */
> @@ -384,4 +389,4 @@ u_long	rip6_recvspace = RIPV6RCVQ;
>  /* ICMPV6 parameters */
>  int	icmp6_redirtimeout = 10 * 60;	/* 10 minutes */
>  int	icmp6errppslim = 100;		/* 100pps */
> -int	ip6_mtudisc_timeout = IPMTUDISCTIMEOUT;
> +int	ip6_mtudisc_timeout = IPMTUDISCTIMEOUT;	/* [a] */
> Index: sys/netinet6/ip6_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
> retrieving revision 1.273
> diff -u -p -r1.273 ip6_input.c
> --- sys/netinet6/ip6_input.c	12 Jun 2025 20:37:59 -0000	1.273
> +++ sys/netinet6/ip6_input.c	18 Jun 2025 19:17:01 -0000
> @@ -1547,14 +1547,22 @@ ip6_sysctl(int *name, u_int namelen, voi
>  	case IPV6CTL_MRTMFC:
>  		return (EOPNOTSUPP);
>  #endif
> -	case IPV6CTL_MTUDISCTIMEOUT:
> -		NET_LOCK();
> +	case IPV6CTL_MTUDISCTIMEOUT: {
> +		extern struct rwlock ip_sysctl_lock;
> +		int oldval, newval;
> +
> +		oldval = newval = atomic_load_int(&ip6_mtudisc_timeout);
>  		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> -		    &ip6_mtudisc_timeout, 0, INT_MAX);
> -		rt_timer_queue_change(&icmp6_mtudisc_timeout_q,
> -		    ip6_mtudisc_timeout);
> -		NET_UNLOCK();
> +		    &newval, 0, INT_MAX);
> +		if (error == 0 && oldval != newval) {
> +			rw_enter_write(&ip_sysctl_lock);
> +			atomic_store_int(&ip6_mtudisc_timeout, newval);
> +			rt_timer_queue_change(&icmp6_mtudisc_timeout_q, newval);
> +			rw_exit_write(&ip_sysctl_lock);
> +		}
> +
>  		return (error);
> +	}
>  	case IPV6CTL_IFQUEUE:
>  		return (sysctl_niq(name + 1, namelen - 1,
>  		    oldp, oldlenp, newp, newlen, &ip6intrq));