Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
Re: sysctl: relax netlock around ESPCTL_UDPENCAP_ENABLE and ESPCTL_UDPENCAP_PORT
To:
Alexander Bluhm <bluhm@openbsd.org>
Cc:
tech@openbsd.org
Date:
Sat, 14 Jun 2025 19:23:31 +0300

Download raw body.

Thread
On Fri, Jun 13, 2025 at 04:48:17PM +0200, Alexander Bluhm wrote:
> On Fri, Jun 13, 2025 at 04:33:32PM +0300, Vitaliy Makkoveev wrote:
> > On Fri, Jun 13, 2025 at 03:03:55PM +0200, Alexander Bluhm wrote:
> > > On Fri, Jun 13, 2025 at 10:10:59AM +0300, Vitaliy Makkoveev wrote:
> > > > On Sat, Jun 07, 2025 at 04:25:58AM +0300, Vitaliy Makkoveev wrote:
> > > > > Both `udpencap_enable' and `udpencap_port' are inetgers with the
> > > > > read-write access via sysctl(2) and read-only access from the stack.
> > > > > However, the recursive calls of ipsp_process_packet(),
> > > > > ipsp_process_done() and (*xf_output)() make it complicated to be
> > > > > atomically loads.
> > > > > 
> > > > > So, keep the sysctl modification protected by netlock, but move the
> > > > > sysctl read-only access and copying out of netlock.
> > > > > 
> > > > 
> > > > The previous diff makes the whole esp_sysctl() path mp-safe, so move it
> > > > out of locking.
> > > > 
> > > > Note, ipsec(4) completely excluded from small kernel, this diff will not
> > > > affect it.
> > > > 
> > > > ok?
> > > 
> > > I would prefer to remove the netlock from these integer varibles.
> > > 
> > > They are used in multiple places in the stack, but they are
> > > independent.  So atomic operations within each function should be
> > > enough.
> > > 
> > > pfkeyv2_dosend() deals with userland
> > > in_baddynamic() is used when binding other sockets
> > > ipsp_process_done() is encrypting and encapsulating IPsec
> > > udp_input() is matching encrypted packets before decrypting
> > > udp_ctlinput() deals with ICMP packets containiung UDP with IPsec
> > > 
> > > Neither udpencap_enable nor udpencap_port are evaluated twice for
> > > the same packet.  pfkeyv2_dosend() has them in disjuct swtich cases.
> > > I think atomic_load_int() is the way to go.
> > > 
> > 
> > We have the recursion. ipsp_process_done() calls ipsp_process_packet()
> > which calls ipsp_process_done(). Should we keep cached values or not?
> 
> The recursion happens only with bundled TDB.  This is an obscure
> feature anyway.  I have no idea what happens when you encapsulate
> a packet with UDP and then process it with a second TDB.  Note that
> ipsecctl(8) supports bundling and iked(8) configures UDP encap.  I
> don't know whether we have a userland tool than can do both.
> 
> The worst thing that can happen with udpencap_enable and udpencap_port
> is that the packet is dropped or a double UDP encapsulation has
> different ports.  There is nothing to worry about.
> 
> Just add atomic loads and a local copy within the function.
> 
> bluhm
> 

Updated to use atomic_load_int(). `udpencap_enable' and `udpencap_port'
mostly used together, so unlock them both.

Index: sys/net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.268
diff -u -p -r1.268 pfkeyv2.c
--- sys/net/pfkeyv2.c	3 Jun 2025 14:49:05 -0000	1.268
+++ sys/net/pfkeyv2.c	14 Jun 2025 15:37:52 -0000
@@ -1287,7 +1287,7 @@ pfkeyv2_dosend(struct socket *so, void *
 #ifdef IPSEC
 		/* UDP encap has to be enabled and is only supported for ESP */
 		if (headers[SADB_X_EXT_UDPENCAP] &&
-		    (!udpencap_enable ||
+		    (!atomic_load_int(&udpencap_enable) ||
 		    smsg->sadb_msg_satype != SADB_SATYPE_ESP)) {
 			rval = EINVAL;
 			goto ret;
@@ -1462,7 +1462,7 @@ pfkeyv2_dosend(struct socket *so, void *
 #ifdef IPSEC
 		/* UDP encap has to be enabled and is only supported for ESP */
 		if (headers[SADB_X_EXT_UDPENCAP] &&
-		    (!udpencap_enable ||
+		    (!atomic_load_int(&udpencap_enable) ||
 		    smsg->sadb_msg_satype != SADB_SATYPE_ESP)) {
 			rval = EINVAL;
 			goto ret;
Index: sys/netinet/in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.315
diff -u -p -r1.315 in_pcb.c
--- sys/netinet/in_pcb.c	3 Jun 2025 16:51:26 -0000	1.315
+++ sys/netinet/in_pcb.c	14 Jun 2025 15:37:52 -0000
@@ -202,7 +202,7 @@ in_baddynamic(u_int16_t port, u_int16_t 
 	case IPPROTO_UDP:
 #ifdef IPSEC
 		/* Cannot preset this as it is a sysctl */
-		if (port == udpencap_port)
+		if (port == atomic_load_int(&udpencap_port))
 			return (1);
 #endif
 		return (DP_ISSET(baddynamicports.udp, port));
Index: sys/netinet/in_proto.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.124
diff -u -p -r1.124 in_proto.c
--- sys/netinet/in_proto.c	12 Jun 2025 20:37:59 -0000	1.124
+++ sys/netinet/in_proto.c	14 Jun 2025 15:37:52 -0000
@@ -310,7 +310,7 @@ const struct protosw inetsw[] = {
   .pr_type	= SOCK_RAW,
   .pr_domain	= &inetdomain,
   .pr_protocol	= IPPROTO_ESP,
-  .pr_flags	= PR_ATOMIC|PR_ADDR,
+  .pr_flags	= PR_ATOMIC|PR_ADDR|PR_MPSYSCTL,
   .pr_input	= esp46_input,
   .pr_ctlinput	= esp4_ctlinput,
   .pr_ctloutput	= rip_ctloutput,
Index: sys/netinet/ipsec_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ipsec_input.c,v
retrieving revision 1.217
diff -u -p -r1.217 ipsec_input.c
--- sys/netinet/ipsec_input.c	3 Jun 2025 14:49:05 -0000	1.217
+++ sys/netinet/ipsec_input.c	14 Jun 2025 15:37:52 -0000
@@ -124,12 +124,10 @@ int ipcomp_enable = 0;		/* [a] */
 
 const struct sysctl_bounded_args espctl_vars[] = {
 	{ESPCTL_ENABLE, &esp_enable, 0, 1},
-};
-
-const struct sysctl_bounded_args espctl_vars_locked[] = {
 	{ESPCTL_UDPENCAP_ENABLE, &udpencap_enable, 0, 1},
 	{ESPCTL_UDPENCAP_PORT, &udpencap_port, 0, 65535},
 };
+
 const struct sysctl_bounded_args ahctl_vars[] = {
 	{AHCTL_ENABLE, &ah_enable, 0, 1},
 };
@@ -718,8 +716,6 @@ int
 esp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen)
 {
-	int error;
-
 	/* All sysctl names at this level are terminal. */
 	if (namelen != 1)
 		return (ENOTDIR);
@@ -727,16 +723,9 @@ esp_sysctl(int *name, u_int namelen, voi
 	switch (name[0]) {
 	case ESPCTL_STATS:
 		return (esp_sysctl_espstat(oldp, oldlenp, newp));
-	case ESPCTL_ENABLE:
-		error = sysctl_bounded_arr(espctl_vars, nitems(espctl_vars),
-		    name, namelen, oldp, oldlenp, newp, newlen);
 	default:
-		NET_LOCK();
-		error = sysctl_bounded_arr(espctl_vars_locked,
-		    nitems(espctl_vars_locked),
-		    name, namelen, oldp, oldlenp, newp, newlen);
-		NET_UNLOCK();
-		return (error);
+		return (sysctl_bounded_arr(espctl_vars, nitems(espctl_vars),
+		    name, namelen, oldp, oldlenp, newp, newlen));
 	}
 }
 
Index: sys/netinet/ipsec_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ipsec_output.c,v
retrieving revision 1.102
diff -u -p -r1.102 ipsec_output.c
--- sys/netinet/ipsec_output.c	3 Jun 2025 14:49:05 -0000	1.102
+++ sys/netinet/ipsec_output.c	14 Jun 2025 15:37:52 -0000
@@ -51,6 +51,11 @@
 #include <crypto/cryptodev.h>
 #include <crypto/xform.h>
 
+/*
+ * Locks used to protect data:
+ *	a	atomic
+ */
+
 #ifdef ENCDEBUG
 #define DPRINTF(fmt, args...)						\
 	do {								\
@@ -62,8 +67,8 @@
 	do { } while (0)
 #endif
 
-int	udpencap_enable = 1;	/* enabled by default */
-int	udpencap_port = 4500;	/* triggers decapsulation */
+int	udpencap_enable = 1;	/* [a] enabled by default */
+int	udpencap_port = 4500;	/* [a] triggers decapsulation */
 
 /*
  * Loop over a tdb chain, taking into consideration protocol tunneling. The
@@ -417,8 +422,10 @@ ipsp_process_done(struct mbuf *m, struct
 		struct mbuf *mi;
 		struct udphdr *uh;
 		int iphlen;
+		int udpencap_port_local = atomic_load_int(&udpencap_port);
 
-		if (!udpencap_enable || !udpencap_port) {
+		if (!atomic_load_int(&udpencap_enable) ||
+		    !udpencap_port_local) {
 			error = ENXIO;
 			goto drop;
 		}
@@ -445,7 +452,7 @@ ipsp_process_done(struct mbuf *m, struct
 			goto drop;
 		}
 		uh = (struct udphdr *)(mtod(mi, caddr_t) + roff);
-		uh->uh_sport = uh->uh_dport = htons(udpencap_port);
+		uh->uh_sport = uh->uh_dport = htons(udpencap_port_local);
 		if (tdb->tdb_udpencap_port)
 			uh->uh_dport = tdb->tdb_udpencap_port;
 
Index: sys/netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.342
diff -u -p -r1.342 udp_usrreq.c
--- sys/netinet/udp_usrreq.c	12 Jun 2025 20:37:59 -0000	1.342
+++ sys/netinet/udp_usrreq.c	14 Jun 2025 15:37:52 -0000
@@ -212,6 +212,9 @@ udp_input(struct mbuf **mp, int *offp, i
 	} srcsa, dstsa;
 	struct ip6_hdr *ip6 = NULL;
 	u_int32_t ipsecflowinfo = 0;
+#ifdef IPSEC
+	int udpencap_port_local = atomic_load_int(&udpencap_port);
+#endif /* IPSEC */
 
 	udpstat_inc(udps_ipackets);
 
@@ -305,11 +308,12 @@ udp_input(struct mbuf **mp, int *offp, i
 	CLR(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT);
 
 #ifdef IPSEC
-	if (udpencap_enable && udpencap_port && atomic_load_int(&esp_enable) &&
+	if (atomic_load_int(&udpencap_enable) && udpencap_port_local &&
+	    atomic_load_int(&esp_enable) &&
 #if NPF > 0
 	    !(m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) &&
 #endif
-	    uh->uh_dport == htons(udpencap_port)) {
+	    uh->uh_dport == htons(udpencap_port_local)) {
 		u_int32_t spi;
 		int skip = iphlen + sizeof(struct udphdr);
 
@@ -909,12 +913,16 @@ udp_ctlinput(int cmd, struct sockaddr *s
 	if (ip) {
 		struct inpcb *inp;
 		struct socket *so = NULL;
+#ifdef IPSEC
+		int udpencap_port_local = atomic_load_int(&udpencap_port);
+#endif
 
 		uhp = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 #ifdef IPSEC
 		/* PMTU discovery for udpencap */
-		if (cmd == PRC_MSGSIZE && ip_mtudisc && udpencap_enable &&
-		    udpencap_port && uhp->uh_sport == htons(udpencap_port)) {
+		if (cmd == PRC_MSGSIZE && ip_mtudisc &&
+		    atomic_load_int(&udpencap_enable) && udpencap_port_local &&
+		    uhp->uh_sport == udpencap_port_local) {
 			udpencap_ctlinput(cmd, sa, rdomain, v);
 			return;
 		}
Index: sys/netinet6/in6_proto.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.128
diff -u -p -r1.128 in6_proto.c
--- sys/netinet6/in6_proto.c	12 Jun 2025 20:37:59 -0000	1.128
+++ sys/netinet6/in6_proto.c	14 Jun 2025 15:37:52 -0000
@@ -223,7 +223,7 @@ const struct protosw inet6sw[] = {
   .pr_type	= SOCK_RAW,
   .pr_domain	= &inet6domain,
   .pr_protocol	= IPPROTO_ESP,
-  .pr_flags	= PR_ATOMIC|PR_ADDR,
+  .pr_flags	= PR_ATOMIC|PR_ADDR|PR_MPSYSCTL,
   .pr_input	= esp46_input,
   .pr_ctloutput	= rip6_ctloutput,
   .pr_usrreqs	= &rip6_usrreqs,