Download raw body.
sysctl: relax netlock around ESPCTL_UDPENCAP_ENABLE and ESPCTL_UDPENCAP_PORT
sysctl: relax netlock around ESPCTL_UDPENCAP_ENABLE and ESPCTL_UDPENCAP_PORT
sysctl: relax netlock around ESPCTL_UDPENCAP_ENABLE and ESPCTL_UDPENCAP_PORT
On Fri, Jun 13, 2025 at 04:48:17PM +0200, Alexander Bluhm wrote:
> On Fri, Jun 13, 2025 at 04:33:32PM +0300, Vitaliy Makkoveev wrote:
> > On Fri, Jun 13, 2025 at 03:03:55PM +0200, Alexander Bluhm wrote:
> > > On Fri, Jun 13, 2025 at 10:10:59AM +0300, Vitaliy Makkoveev wrote:
> > > > On Sat, Jun 07, 2025 at 04:25:58AM +0300, Vitaliy Makkoveev wrote:
> > > > > Both `udpencap_enable' and `udpencap_port' are inetgers with the
> > > > > read-write access via sysctl(2) and read-only access from the stack.
> > > > > However, the recursive calls of ipsp_process_packet(),
> > > > > ipsp_process_done() and (*xf_output)() make it complicated to be
> > > > > atomically loads.
> > > > >
> > > > > So, keep the sysctl modification protected by netlock, but move the
> > > > > sysctl read-only access and copying out of netlock.
> > > > >
> > > >
> > > > The previous diff makes the whole esp_sysctl() path mp-safe, so move it
> > > > out of locking.
> > > >
> > > > Note, ipsec(4) completely excluded from small kernel, this diff will not
> > > > affect it.
> > > >
> > > > ok?
> > >
> > > I would prefer to remove the netlock from these integer varibles.
> > >
> > > They are used in multiple places in the stack, but they are
> > > independent. So atomic operations within each function should be
> > > enough.
> > >
> > > pfkeyv2_dosend() deals with userland
> > > in_baddynamic() is used when binding other sockets
> > > ipsp_process_done() is encrypting and encapsulating IPsec
> > > udp_input() is matching encrypted packets before decrypting
> > > udp_ctlinput() deals with ICMP packets containiung UDP with IPsec
> > >
> > > Neither udpencap_enable nor udpencap_port are evaluated twice for
> > > the same packet. pfkeyv2_dosend() has them in disjuct swtich cases.
> > > I think atomic_load_int() is the way to go.
> > >
> >
> > We have the recursion. ipsp_process_done() calls ipsp_process_packet()
> > which calls ipsp_process_done(). Should we keep cached values or not?
>
> The recursion happens only with bundled TDB. This is an obscure
> feature anyway. I have no idea what happens when you encapsulate
> a packet with UDP and then process it with a second TDB. Note that
> ipsecctl(8) supports bundling and iked(8) configures UDP encap. I
> don't know whether we have a userland tool than can do both.
>
> The worst thing that can happen with udpencap_enable and udpencap_port
> is that the packet is dropped or a double UDP encapsulation has
> different ports. There is nothing to worry about.
>
> Just add atomic loads and a local copy within the function.
>
> bluhm
>
Updated to use atomic_load_int(). `udpencap_enable' and `udpencap_port'
mostly used together, so unlock them both.
Index: sys/net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.268
diff -u -p -r1.268 pfkeyv2.c
--- sys/net/pfkeyv2.c 3 Jun 2025 14:49:05 -0000 1.268
+++ sys/net/pfkeyv2.c 14 Jun 2025 15:37:52 -0000
@@ -1287,7 +1287,7 @@ pfkeyv2_dosend(struct socket *so, void *
#ifdef IPSEC
/* UDP encap has to be enabled and is only supported for ESP */
if (headers[SADB_X_EXT_UDPENCAP] &&
- (!udpencap_enable ||
+ (!atomic_load_int(&udpencap_enable) ||
smsg->sadb_msg_satype != SADB_SATYPE_ESP)) {
rval = EINVAL;
goto ret;
@@ -1462,7 +1462,7 @@ pfkeyv2_dosend(struct socket *so, void *
#ifdef IPSEC
/* UDP encap has to be enabled and is only supported for ESP */
if (headers[SADB_X_EXT_UDPENCAP] &&
- (!udpencap_enable ||
+ (!atomic_load_int(&udpencap_enable) ||
smsg->sadb_msg_satype != SADB_SATYPE_ESP)) {
rval = EINVAL;
goto ret;
Index: sys/netinet/in_pcb.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.315
diff -u -p -r1.315 in_pcb.c
--- sys/netinet/in_pcb.c 3 Jun 2025 16:51:26 -0000 1.315
+++ sys/netinet/in_pcb.c 14 Jun 2025 15:37:52 -0000
@@ -202,7 +202,7 @@ in_baddynamic(u_int16_t port, u_int16_t
case IPPROTO_UDP:
#ifdef IPSEC
/* Cannot preset this as it is a sysctl */
- if (port == udpencap_port)
+ if (port == atomic_load_int(&udpencap_port))
return (1);
#endif
return (DP_ISSET(baddynamicports.udp, port));
Index: sys/netinet/in_proto.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.124
diff -u -p -r1.124 in_proto.c
--- sys/netinet/in_proto.c 12 Jun 2025 20:37:59 -0000 1.124
+++ sys/netinet/in_proto.c 14 Jun 2025 15:37:52 -0000
@@ -310,7 +310,7 @@ const struct protosw inetsw[] = {
.pr_type = SOCK_RAW,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_ESP,
- .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_MPSYSCTL,
.pr_input = esp46_input,
.pr_ctlinput = esp4_ctlinput,
.pr_ctloutput = rip_ctloutput,
Index: sys/netinet/ipsec_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ipsec_input.c,v
retrieving revision 1.217
diff -u -p -r1.217 ipsec_input.c
--- sys/netinet/ipsec_input.c 3 Jun 2025 14:49:05 -0000 1.217
+++ sys/netinet/ipsec_input.c 14 Jun 2025 15:37:52 -0000
@@ -124,12 +124,10 @@ int ipcomp_enable = 0; /* [a] */
const struct sysctl_bounded_args espctl_vars[] = {
{ESPCTL_ENABLE, &esp_enable, 0, 1},
-};
-
-const struct sysctl_bounded_args espctl_vars_locked[] = {
{ESPCTL_UDPENCAP_ENABLE, &udpencap_enable, 0, 1},
{ESPCTL_UDPENCAP_PORT, &udpencap_port, 0, 65535},
};
+
const struct sysctl_bounded_args ahctl_vars[] = {
{AHCTL_ENABLE, &ah_enable, 0, 1},
};
@@ -718,8 +716,6 @@ int
esp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
size_t newlen)
{
- int error;
-
/* All sysctl names at this level are terminal. */
if (namelen != 1)
return (ENOTDIR);
@@ -727,16 +723,9 @@ esp_sysctl(int *name, u_int namelen, voi
switch (name[0]) {
case ESPCTL_STATS:
return (esp_sysctl_espstat(oldp, oldlenp, newp));
- case ESPCTL_ENABLE:
- error = sysctl_bounded_arr(espctl_vars, nitems(espctl_vars),
- name, namelen, oldp, oldlenp, newp, newlen);
default:
- NET_LOCK();
- error = sysctl_bounded_arr(espctl_vars_locked,
- nitems(espctl_vars_locked),
- name, namelen, oldp, oldlenp, newp, newlen);
- NET_UNLOCK();
- return (error);
+ return (sysctl_bounded_arr(espctl_vars, nitems(espctl_vars),
+ name, namelen, oldp, oldlenp, newp, newlen));
}
}
Index: sys/netinet/ipsec_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ipsec_output.c,v
retrieving revision 1.102
diff -u -p -r1.102 ipsec_output.c
--- sys/netinet/ipsec_output.c 3 Jun 2025 14:49:05 -0000 1.102
+++ sys/netinet/ipsec_output.c 14 Jun 2025 15:37:52 -0000
@@ -51,6 +51,11 @@
#include <crypto/cryptodev.h>
#include <crypto/xform.h>
+/*
+ * Locks used to protect data:
+ * a atomic
+ */
+
#ifdef ENCDEBUG
#define DPRINTF(fmt, args...) \
do { \
@@ -62,8 +67,8 @@
do { } while (0)
#endif
-int udpencap_enable = 1; /* enabled by default */
-int udpencap_port = 4500; /* triggers decapsulation */
+int udpencap_enable = 1; /* [a] enabled by default */
+int udpencap_port = 4500; /* [a] triggers decapsulation */
/*
* Loop over a tdb chain, taking into consideration protocol tunneling. The
@@ -417,8 +422,10 @@ ipsp_process_done(struct mbuf *m, struct
struct mbuf *mi;
struct udphdr *uh;
int iphlen;
+ int udpencap_port_local = atomic_load_int(&udpencap_port);
- if (!udpencap_enable || !udpencap_port) {
+ if (!atomic_load_int(&udpencap_enable) ||
+ !udpencap_port_local) {
error = ENXIO;
goto drop;
}
@@ -445,7 +452,7 @@ ipsp_process_done(struct mbuf *m, struct
goto drop;
}
uh = (struct udphdr *)(mtod(mi, caddr_t) + roff);
- uh->uh_sport = uh->uh_dport = htons(udpencap_port);
+ uh->uh_sport = uh->uh_dport = htons(udpencap_port_local);
if (tdb->tdb_udpencap_port)
uh->uh_dport = tdb->tdb_udpencap_port;
Index: sys/netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.342
diff -u -p -r1.342 udp_usrreq.c
--- sys/netinet/udp_usrreq.c 12 Jun 2025 20:37:59 -0000 1.342
+++ sys/netinet/udp_usrreq.c 14 Jun 2025 15:37:52 -0000
@@ -212,6 +212,9 @@ udp_input(struct mbuf **mp, int *offp, i
} srcsa, dstsa;
struct ip6_hdr *ip6 = NULL;
u_int32_t ipsecflowinfo = 0;
+#ifdef IPSEC
+ int udpencap_port_local = atomic_load_int(&udpencap_port);
+#endif /* IPSEC */
udpstat_inc(udps_ipackets);
@@ -305,11 +308,12 @@ udp_input(struct mbuf **mp, int *offp, i
CLR(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT);
#ifdef IPSEC
- if (udpencap_enable && udpencap_port && atomic_load_int(&esp_enable) &&
+ if (atomic_load_int(&udpencap_enable) && udpencap_port_local &&
+ atomic_load_int(&esp_enable) &&
#if NPF > 0
!(m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) &&
#endif
- uh->uh_dport == htons(udpencap_port)) {
+ uh->uh_dport == htons(udpencap_port_local)) {
u_int32_t spi;
int skip = iphlen + sizeof(struct udphdr);
@@ -909,12 +913,16 @@ udp_ctlinput(int cmd, struct sockaddr *s
if (ip) {
struct inpcb *inp;
struct socket *so = NULL;
+#ifdef IPSEC
+ int udpencap_port_local = atomic_load_int(&udpencap_port);
+#endif
uhp = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
#ifdef IPSEC
/* PMTU discovery for udpencap */
- if (cmd == PRC_MSGSIZE && ip_mtudisc && udpencap_enable &&
- udpencap_port && uhp->uh_sport == htons(udpencap_port)) {
+ if (cmd == PRC_MSGSIZE && ip_mtudisc &&
+ atomic_load_int(&udpencap_enable) && udpencap_port_local &&
+ uhp->uh_sport == udpencap_port_local) {
udpencap_ctlinput(cmd, sa, rdomain, v);
return;
}
Index: sys/netinet6/in6_proto.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.128
diff -u -p -r1.128 in6_proto.c
--- sys/netinet6/in6_proto.c 12 Jun 2025 20:37:59 -0000 1.128
+++ sys/netinet6/in6_proto.c 14 Jun 2025 15:37:52 -0000
@@ -223,7 +223,7 @@ const struct protosw inet6sw[] = {
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_ESP,
- .pr_flags = PR_ATOMIC|PR_ADDR,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_MPSYSCTL,
.pr_input = esp46_input,
.pr_ctloutput = rip6_ctloutput,
.pr_usrreqs = &rip6_usrreqs,
sysctl: relax netlock around ESPCTL_UDPENCAP_ENABLE and ESPCTL_UDPENCAP_PORT
sysctl: relax netlock around ESPCTL_UDPENCAP_ENABLE and ESPCTL_UDPENCAP_PORT
sysctl: relax netlock around ESPCTL_UDPENCAP_ENABLE and ESPCTL_UDPENCAP_PORT