Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
Re: sysctl: relax netlock around ESPCTL_UDPENCAP_ENABLE and ESPCTL_UDPENCAP_PORT
To:
Vitaliy Makkoveev <mvs@openbsd.org>
Cc:
tech@openbsd.org
Date:
Fri, 13 Jun 2025 15:03:55 +0200

Download raw body.

Thread
On Fri, Jun 13, 2025 at 10:10:59AM +0300, Vitaliy Makkoveev wrote:
> On Sat, Jun 07, 2025 at 04:25:58AM +0300, Vitaliy Makkoveev wrote:
> > Both `udpencap_enable' and `udpencap_port' are inetgers with the
> > read-write access via sysctl(2) and read-only access from the stack.
> > However, the recursive calls of ipsp_process_packet(),
> > ipsp_process_done() and (*xf_output)() make it complicated to be
> > atomically loads.
> > 
> > So, keep the sysctl modification protected by netlock, but move the
> > sysctl read-only access and copying out of netlock.
> > 
> 
> The previous diff makes the whole esp_sysctl() path mp-safe, so move it
> out of locking.
> 
> Note, ipsec(4) completely excluded from small kernel, this diff will not
> affect it.
> 
> ok?

I would prefer to remove the netlock from these integer varibles.

They are used in multiple places in the stack, but they are
independent.  So atomic operations within each function should be
enough.

pfkeyv2_dosend() deals with userland
in_baddynamic() is used when binding other sockets
ipsp_process_done() is encrypting and encapsulating IPsec
udp_input() is matching encrypted packets before decrypting
udp_ctlinput() deals with ICMP packets containiung UDP with IPsec

Neither udpencap_enable nor udpencap_port are evaluated twice for
the same packet.  pfkeyv2_dosend() has them in disjuct swtich cases.
I think atomic_load_int() is the way to go.

bluhm

> Index: sys/netinet/in_proto.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/in_proto.c,v
> retrieving revision 1.123
> diff -u -p -r1.123 in_proto.c
> --- sys/netinet/in_proto.c	20 May 2025 18:41:06 -0000	1.123
> +++ sys/netinet/in_proto.c	13 Jun 2025 06:54:19 -0000
> @@ -296,7 +296,7 @@ const struct protosw inetsw[] = {
>    .pr_type	= SOCK_RAW,
>    .pr_domain	= &inetdomain,
>    .pr_protocol	= IPPROTO_ESP,
> -  .pr_flags	= PR_ATOMIC|PR_ADDR,
> +  .pr_flags	= PR_ATOMIC|PR_ADDR|PR_MPSYSCTL,
>    .pr_input	= esp46_input,
>    .pr_ctlinput	= esp4_ctlinput,
>    .pr_ctloutput	= rip_ctloutput,
> Index: sys/netinet/ipsec_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ipsec_input.c,v
> retrieving revision 1.217
> diff -u -p -r1.217 ipsec_input.c
> --- sys/netinet/ipsec_input.c	3 Jun 2025 14:49:05 -0000	1.217
> +++ sys/netinet/ipsec_input.c	13 Jun 2025 06:54:20 -0000
> @@ -126,10 +126,6 @@ const struct sysctl_bounded_args espctl_
>  	{ESPCTL_ENABLE, &esp_enable, 0, 1},
>  };
>  
> -const struct sysctl_bounded_args espctl_vars_locked[] = {
> -	{ESPCTL_UDPENCAP_ENABLE, &udpencap_enable, 0, 1},
> -	{ESPCTL_UDPENCAP_PORT, &udpencap_port, 0, 65535},
> -};
>  const struct sysctl_bounded_args ahctl_vars[] = {
>  	{AHCTL_ENABLE, &ah_enable, 0, 1},
>  };
> @@ -718,8 +714,6 @@ int
>  esp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
>      size_t newlen)
>  {
> -	int error;
> -
>  	/* All sysctl names at this level are terminal. */
>  	if (namelen != 1)
>  		return (ENOTDIR);
> @@ -727,16 +721,33 @@ esp_sysctl(int *name, u_int namelen, voi
>  	switch (name[0]) {
>  	case ESPCTL_STATS:
>  		return (esp_sysctl_espstat(oldp, oldlenp, newp));
> -	case ESPCTL_ENABLE:
> -		error = sysctl_bounded_arr(espctl_vars, nitems(espctl_vars),
> -		    name, namelen, oldp, oldlenp, newp, newlen);
> -	default:
> -		NET_LOCK();
> -		error = sysctl_bounded_arr(espctl_vars_locked,
> -		    nitems(espctl_vars_locked),
> -		    name, namelen, oldp, oldlenp, newp, newlen);
> -		NET_UNLOCK();
> +	case ESPCTL_UDPENCAP_ENABLE:
> +	case ESPCTL_UDPENCAP_PORT: {
> +		int *var, upper_bound, oval, nval, error;
> +
> +		if (name[0] == ESPCTL_UDPENCAP_ENABLE) {
> +			var = &udpencap_enable;
> +			upper_bound = 1;
> +		} else {
> +			var = &udpencap_port;
> +			upper_bound = 65535;
> +		}
> +
> +		oval = nval = atomic_load_int(var);
> +		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> +		    &nval, 0, upper_bound);
> +
> +		if (error == 0 && oval != nval) {
> +			NET_LOCK();
> +			atomic_store_int(var, nval);
> +			NET_UNLOCK();
> +		}
> +
>  		return (error);
> +	}
> +	default:
> +		return (sysctl_bounded_arr(espctl_vars, nitems(espctl_vars),
> +		    name, namelen, oldp, oldlenp, newp, newlen));
>  	}
>  }
>  
> Index: sys/netinet/ipsec_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ipsec_output.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 ipsec_output.c
> --- sys/netinet/ipsec_output.c	3 Jun 2025 14:49:05 -0000	1.102
> +++ sys/netinet/ipsec_output.c	13 Jun 2025 06:54:20 -0000
> @@ -51,6 +51,11 @@
>  #include <crypto/cryptodev.h>
>  #include <crypto/xform.h>
>  
> +/*
> + * Locks used to protect data:
> + *	N	net lock
> + */
> + 
>  #ifdef ENCDEBUG
>  #define DPRINTF(fmt, args...)						\
>  	do {								\
> @@ -62,8 +67,8 @@
>  	do { } while (0)
>  #endif
>  
> -int	udpencap_enable = 1;	/* enabled by default */
> -int	udpencap_port = 4500;	/* triggers decapsulation */
> +int	udpencap_enable = 1;	/* [N] enabled by default */
> +int	udpencap_port = 4500;	/* [N] triggers decapsulation */
>  
>  /*
>   * Loop over a tdb chain, taking into consideration protocol tunneling. The
> Index: sys/netinet6/in6_proto.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
> retrieving revision 1.127
> diff -u -p -r1.127 in6_proto.c
> --- sys/netinet6/in6_proto.c	20 May 2025 18:41:06 -0000	1.127
> +++ sys/netinet6/in6_proto.c	13 Jun 2025 06:54:20 -0000
> @@ -215,7 +215,7 @@ const struct protosw inet6sw[] = {
>    .pr_type	= SOCK_RAW,
>    .pr_domain	= &inet6domain,
>    .pr_protocol	= IPPROTO_ESP,
> -  .pr_flags	= PR_ATOMIC|PR_ADDR,
> +  .pr_flags	= PR_ATOMIC|PR_ADDR|PR_MPSYSCTL,
>    .pr_input	= esp46_input,
>    .pr_ctloutput	= rip6_ctloutput,
>    .pr_usrreqs	= &rip6_usrreqs,