From: Vitaliy Makkoveev Subject: Re: sysctl: relax netlock around ESPCTL_UDPENCAP_ENABLE and ESPCTL_UDPENCAP_PORT To: Alexander Bluhm Cc: tech@openbsd.org Date: Sat, 14 Jun 2025 19:23:31 +0300 On Fri, Jun 13, 2025 at 04:48:17PM +0200, Alexander Bluhm wrote: > On Fri, Jun 13, 2025 at 04:33:32PM +0300, Vitaliy Makkoveev wrote: > > On Fri, Jun 13, 2025 at 03:03:55PM +0200, Alexander Bluhm wrote: > > > On Fri, Jun 13, 2025 at 10:10:59AM +0300, Vitaliy Makkoveev wrote: > > > > On Sat, Jun 07, 2025 at 04:25:58AM +0300, Vitaliy Makkoveev wrote: > > > > > Both `udpencap_enable' and `udpencap_port' are inetgers with the > > > > > read-write access via sysctl(2) and read-only access from the stack. > > > > > However, the recursive calls of ipsp_process_packet(), > > > > > ipsp_process_done() and (*xf_output)() make it complicated to be > > > > > atomically loads. > > > > > > > > > > So, keep the sysctl modification protected by netlock, but move the > > > > > sysctl read-only access and copying out of netlock. > > > > > > > > > > > > > The previous diff makes the whole esp_sysctl() path mp-safe, so move it > > > > out of locking. > > > > > > > > Note, ipsec(4) completely excluded from small kernel, this diff will not > > > > affect it. > > > > > > > > ok? > > > > > > I would prefer to remove the netlock from these integer varibles. > > > > > > They are used in multiple places in the stack, but they are > > > independent. So atomic operations within each function should be > > > enough. > > > > > > pfkeyv2_dosend() deals with userland > > > in_baddynamic() is used when binding other sockets > > > ipsp_process_done() is encrypting and encapsulating IPsec > > > udp_input() is matching encrypted packets before decrypting > > > udp_ctlinput() deals with ICMP packets containiung UDP with IPsec > > > > > > Neither udpencap_enable nor udpencap_port are evaluated twice for > > > the same packet. pfkeyv2_dosend() has them in disjuct swtich cases. > > > I think atomic_load_int() is the way to go. > > > > > > > We have the recursion. ipsp_process_done() calls ipsp_process_packet() > > which calls ipsp_process_done(). Should we keep cached values or not? > > The recursion happens only with bundled TDB. This is an obscure > feature anyway. I have no idea what happens when you encapsulate > a packet with UDP and then process it with a second TDB. Note that > ipsecctl(8) supports bundling and iked(8) configures UDP encap. I > don't know whether we have a userland tool than can do both. > > The worst thing that can happen with udpencap_enable and udpencap_port > is that the packet is dropped or a double UDP encapsulation has > different ports. There is nothing to worry about. > > Just add atomic loads and a local copy within the function. > > bluhm > Updated to use atomic_load_int(). `udpencap_enable' and `udpencap_port' mostly used together, so unlock them both. Index: sys/net/pfkeyv2.c =================================================================== RCS file: /cvs/src/sys/net/pfkeyv2.c,v retrieving revision 1.268 diff -u -p -r1.268 pfkeyv2.c --- sys/net/pfkeyv2.c 3 Jun 2025 14:49:05 -0000 1.268 +++ sys/net/pfkeyv2.c 14 Jun 2025 15:37:52 -0000 @@ -1287,7 +1287,7 @@ pfkeyv2_dosend(struct socket *so, void * #ifdef IPSEC /* UDP encap has to be enabled and is only supported for ESP */ if (headers[SADB_X_EXT_UDPENCAP] && - (!udpencap_enable || + (!atomic_load_int(&udpencap_enable) || smsg->sadb_msg_satype != SADB_SATYPE_ESP)) { rval = EINVAL; goto ret; @@ -1462,7 +1462,7 @@ pfkeyv2_dosend(struct socket *so, void * #ifdef IPSEC /* UDP encap has to be enabled and is only supported for ESP */ if (headers[SADB_X_EXT_UDPENCAP] && - (!udpencap_enable || + (!atomic_load_int(&udpencap_enable) || smsg->sadb_msg_satype != SADB_SATYPE_ESP)) { rval = EINVAL; goto ret; Index: sys/netinet/in_pcb.c =================================================================== RCS file: /cvs/src/sys/netinet/in_pcb.c,v retrieving revision 1.315 diff -u -p -r1.315 in_pcb.c --- sys/netinet/in_pcb.c 3 Jun 2025 16:51:26 -0000 1.315 +++ sys/netinet/in_pcb.c 14 Jun 2025 15:37:52 -0000 @@ -202,7 +202,7 @@ in_baddynamic(u_int16_t port, u_int16_t case IPPROTO_UDP: #ifdef IPSEC /* Cannot preset this as it is a sysctl */ - if (port == udpencap_port) + if (port == atomic_load_int(&udpencap_port)) return (1); #endif return (DP_ISSET(baddynamicports.udp, port)); Index: sys/netinet/in_proto.c =================================================================== RCS file: /cvs/src/sys/netinet/in_proto.c,v retrieving revision 1.124 diff -u -p -r1.124 in_proto.c --- sys/netinet/in_proto.c 12 Jun 2025 20:37:59 -0000 1.124 +++ sys/netinet/in_proto.c 14 Jun 2025 15:37:52 -0000 @@ -310,7 +310,7 @@ const struct protosw inetsw[] = { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_ESP, - .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_MPSYSCTL, .pr_input = esp46_input, .pr_ctlinput = esp4_ctlinput, .pr_ctloutput = rip_ctloutput, Index: sys/netinet/ipsec_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ipsec_input.c,v retrieving revision 1.217 diff -u -p -r1.217 ipsec_input.c --- sys/netinet/ipsec_input.c 3 Jun 2025 14:49:05 -0000 1.217 +++ sys/netinet/ipsec_input.c 14 Jun 2025 15:37:52 -0000 @@ -124,12 +124,10 @@ int ipcomp_enable = 0; /* [a] */ const struct sysctl_bounded_args espctl_vars[] = { {ESPCTL_ENABLE, &esp_enable, 0, 1}, -}; - -const struct sysctl_bounded_args espctl_vars_locked[] = { {ESPCTL_UDPENCAP_ENABLE, &udpencap_enable, 0, 1}, {ESPCTL_UDPENCAP_PORT, &udpencap_port, 0, 65535}, }; + const struct sysctl_bounded_args ahctl_vars[] = { {AHCTL_ENABLE, &ah_enable, 0, 1}, }; @@ -718,8 +716,6 @@ int esp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - int error; - /* All sysctl names at this level are terminal. */ if (namelen != 1) return (ENOTDIR); @@ -727,16 +723,9 @@ esp_sysctl(int *name, u_int namelen, voi switch (name[0]) { case ESPCTL_STATS: return (esp_sysctl_espstat(oldp, oldlenp, newp)); - case ESPCTL_ENABLE: - error = sysctl_bounded_arr(espctl_vars, nitems(espctl_vars), - name, namelen, oldp, oldlenp, newp, newlen); default: - NET_LOCK(); - error = sysctl_bounded_arr(espctl_vars_locked, - nitems(espctl_vars_locked), - name, namelen, oldp, oldlenp, newp, newlen); - NET_UNLOCK(); - return (error); + return (sysctl_bounded_arr(espctl_vars, nitems(espctl_vars), + name, namelen, oldp, oldlenp, newp, newlen)); } } Index: sys/netinet/ipsec_output.c =================================================================== RCS file: /cvs/src/sys/netinet/ipsec_output.c,v retrieving revision 1.102 diff -u -p -r1.102 ipsec_output.c --- sys/netinet/ipsec_output.c 3 Jun 2025 14:49:05 -0000 1.102 +++ sys/netinet/ipsec_output.c 14 Jun 2025 15:37:52 -0000 @@ -51,6 +51,11 @@ #include #include +/* + * Locks used to protect data: + * a atomic + */ + #ifdef ENCDEBUG #define DPRINTF(fmt, args...) \ do { \ @@ -62,8 +67,8 @@ do { } while (0) #endif -int udpencap_enable = 1; /* enabled by default */ -int udpencap_port = 4500; /* triggers decapsulation */ +int udpencap_enable = 1; /* [a] enabled by default */ +int udpencap_port = 4500; /* [a] triggers decapsulation */ /* * Loop over a tdb chain, taking into consideration protocol tunneling. The @@ -417,8 +422,10 @@ ipsp_process_done(struct mbuf *m, struct struct mbuf *mi; struct udphdr *uh; int iphlen; + int udpencap_port_local = atomic_load_int(&udpencap_port); - if (!udpencap_enable || !udpencap_port) { + if (!atomic_load_int(&udpencap_enable) || + !udpencap_port_local) { error = ENXIO; goto drop; } @@ -445,7 +452,7 @@ ipsp_process_done(struct mbuf *m, struct goto drop; } uh = (struct udphdr *)(mtod(mi, caddr_t) + roff); - uh->uh_sport = uh->uh_dport = htons(udpencap_port); + uh->uh_sport = uh->uh_dport = htons(udpencap_port_local); if (tdb->tdb_udpencap_port) uh->uh_dport = tdb->tdb_udpencap_port; Index: sys/netinet/udp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v retrieving revision 1.342 diff -u -p -r1.342 udp_usrreq.c --- sys/netinet/udp_usrreq.c 12 Jun 2025 20:37:59 -0000 1.342 +++ sys/netinet/udp_usrreq.c 14 Jun 2025 15:37:52 -0000 @@ -212,6 +212,9 @@ udp_input(struct mbuf **mp, int *offp, i } srcsa, dstsa; struct ip6_hdr *ip6 = NULL; u_int32_t ipsecflowinfo = 0; +#ifdef IPSEC + int udpencap_port_local = atomic_load_int(&udpencap_port); +#endif /* IPSEC */ udpstat_inc(udps_ipackets); @@ -305,11 +308,12 @@ udp_input(struct mbuf **mp, int *offp, i CLR(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); #ifdef IPSEC - if (udpencap_enable && udpencap_port && atomic_load_int(&esp_enable) && + if (atomic_load_int(&udpencap_enable) && udpencap_port_local && + atomic_load_int(&esp_enable) && #if NPF > 0 !(m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) && #endif - uh->uh_dport == htons(udpencap_port)) { + uh->uh_dport == htons(udpencap_port_local)) { u_int32_t spi; int skip = iphlen + sizeof(struct udphdr); @@ -909,12 +913,16 @@ udp_ctlinput(int cmd, struct sockaddr *s if (ip) { struct inpcb *inp; struct socket *so = NULL; +#ifdef IPSEC + int udpencap_port_local = atomic_load_int(&udpencap_port); +#endif uhp = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2)); #ifdef IPSEC /* PMTU discovery for udpencap */ - if (cmd == PRC_MSGSIZE && ip_mtudisc && udpencap_enable && - udpencap_port && uhp->uh_sport == htons(udpencap_port)) { + if (cmd == PRC_MSGSIZE && ip_mtudisc && + atomic_load_int(&udpencap_enable) && udpencap_port_local && + uhp->uh_sport == udpencap_port_local) { udpencap_ctlinput(cmd, sa, rdomain, v); return; } Index: sys/netinet6/in6_proto.c =================================================================== RCS file: /cvs/src/sys/netinet6/in6_proto.c,v retrieving revision 1.128 diff -u -p -r1.128 in6_proto.c --- sys/netinet6/in6_proto.c 12 Jun 2025 20:37:59 -0000 1.128 +++ sys/netinet6/in6_proto.c 14 Jun 2025 15:37:52 -0000 @@ -223,7 +223,7 @@ const struct protosw inet6sw[] = { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_ESP, - .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_MPSYSCTL, .pr_input = esp46_input, .pr_ctloutput = rip6_ctloutput, .pr_usrreqs = &rip6_usrreqs,