Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
sysctl: unlock IPCTL_MTUDISC
To:
Alexander Bluhm <bluhm@openbsd.org>, tech@openbsd.org
Date:
Mon, 23 Jun 2025 15:06:23 +0300

Download raw body.

Thread
`ip_mtudisc' is atomically accessed boolean, so allow only 0 and 1
values to set. Also, while `ip_mtudisc' is 0, the rt_timer_queue_flush()
will be triggered all the times even if we do read access. There is no
reason for that, so flush the queue only if this thread successfully
assigned 0 value. The rt_timer_queue_flush() requires to be serialized
with the netlock.


Index: sys/net/if_bridge.c
===================================================================
RCS file: /cvs/src/sys/net/if_bridge.c,v
diff -u -p -r1.374 if_bridge.c
--- sys/net/if_bridge.c	2 Mar 2025 21:28:31 -0000	1.374
+++ sys/net/if_bridge.c	23 Jun 2025 11:47:14 -0000
@@ -1640,7 +1640,8 @@ bridge_ipsec(struct ifnet *ifp, struct e
 
 			ip = mtod(m, struct ip *);
 			if ((af == AF_INET) &&
-			    ip_mtudisc && (ip->ip_off & htons(IP_DF)) &&
+			    atomic_load_int(&ip_mtudisc) &&
+			    (ip->ip_off & htons(IP_DF)) &&
 			    tdb->tdb_mtu && ntohs(ip->ip_len) > tdb->tdb_mtu &&
 			    tdb->tdb_mtutimeout > gettime()) {
 				bridge_send_icmp_err(ifp, eh, m,
Index: sys/net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
diff -u -p -r1.1214 pf.c
--- sys/net/pf.c	23 Jun 2025 09:16:32 -0000	1.1214
+++ sys/net/pf.c	23 Jun 2025 11:47:14 -0000
@@ -3326,7 +3326,7 @@ pf_build_tcp(const struct pf_rule *r, sa
 		h->ip_hl = sizeof(*h) >> 2;
 		h->ip_tos = IPTOS_LOWDELAY;
 		h->ip_len = htons(len);
-		h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
+		h->ip_off = htons(atomic_load_int(&ip_mtudisc) ? IP_DF : 0);
 		h->ip_ttl = ttl ? ttl : ip_defttl;
 		h->ip_sum = 0;
 		h->ip_src.s_addr = saddr->v4.s_addr;
Index: sys/netinet/ip_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_input.c,v
diff -u -p -r1.411 ip_input.c
--- sys/netinet/ip_input.c	23 Jun 2025 09:16:32 -0000	1.411
+++ sys/netinet/ip_input.c	23 Jun 2025 11:47:14 -0000
@@ -97,7 +97,7 @@ int	ipmultipath = 0;			/* [a] */
 int	ip_sendredirects = 1;			/* [a] */
 int	ip_dosourceroute = 0;			/* [a] */
 int	ip_defttl = IPDEFTTL;
-int	ip_mtudisc = 1;
+int	ip_mtudisc = 1;				/* [a] */
 int	ip_mtudisc_timeout = IPMTUDISCTIMEOUT;	/* [a] */
 int	ip_directedbcast = 0;			/* [a] */
 
@@ -1743,12 +1743,18 @@ ip_sysctl(int *name, u_int namelen, void
 		return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
 		    &ip_dosourceroute));
 	case IPCTL_MTUDISC:
-		NET_LOCK();
-		error = sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtudisc);
-		if (ip_mtudisc == 0)
+		oldval = newval = atomic_load_int(&ip_mtudisc);
+		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
+		    &newval, 0, 1);
+		if (error == 0 && oldval != newval &&
+		    oldval == atomic_cas_uint(&ip_mtudisc, oldval, newval) &&
+		    newval == 0) {
+			NET_LOCK();
 			rt_timer_queue_flush(&ip_mtudisc_timeout_q);
-		NET_UNLOCK();
-		return error;
+			NET_UNLOCK();
+		}
+
+		return (error);
 	case IPCTL_MTUDISCTIMEOUT:
 		oldval = newval = atomic_load_int(&ip_mtudisc_timeout);
 		error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
Index: sys/netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
diff -u -p -r1.409 ip_output.c
--- sys/netinet/ip_output.c	14 May 2025 14:32:15 -0000	1.409
+++ sys/netinet/ip_output.c	23 Jun 2025 11:47:14 -0000
@@ -445,7 +445,7 @@ reroute:
 	 */
 	if (ip->ip_off & htons(IP_DF)) {
 #ifdef IPSEC
-		if (ip_mtudisc)
+		if (atomic_load_int(&ip_mtudisc))
 			ipsec_adjust_mtu(m, ifp->if_mtu);
 #endif
 		error = EMSGSIZE;
@@ -583,7 +583,8 @@ ip_output_ipsec_send(struct tdb *tdb, st
 	struct ip *ip;
 	struct in_addr dst;
 	u_int len;
-	int error, tso = 0;
+	int tso = 0, ip_mtudisc_local = atomic_load_int(&ip_mtudisc);
+	int error;
 
 #if NPF > 0
 	/*
@@ -616,7 +617,7 @@ ip_output_ipsec_send(struct tdb *tdb, st
 
 	/* Check if we are allowed to fragment */
 	dst = ip->ip_dst;
-	if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu &&
+	if (ip_mtudisc_local && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu &&
 	    len > tdb->tdb_mtu && tdb->tdb_mtutimeout > gettime()) {
 		ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid);
 		ipsec_adjust_mtu(m, tdb->tdb_mtu);
@@ -624,7 +625,7 @@ ip_output_ipsec_send(struct tdb *tdb, st
 		return EMSGSIZE;
 	}
 	/* propagate IP_DF for v4-over-v6 */
-	if (ip_mtudisc && ip->ip_off & htons(IP_DF))
+	if (ip_mtudisc_local && ip->ip_off & htons(IP_DF))
 		SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
 
 	/*
@@ -661,7 +662,7 @@ ip_output_ipsec_send(struct tdb *tdb, st
 	}
 	if (!error && tso)
 		tcpstat_inc(tcps_outswtso);
-	if (ip_mtudisc && error == EMSGSIZE)
+	if (ip_mtudisc_local && error == EMSGSIZE)
 		ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid);
 	return error;
 }
Index: sys/netinet/ipsec_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/ipsec_input.c,v
diff -u -p -r1.219 ipsec_input.c
--- sys/netinet/ipsec_input.c	21 Jun 2025 14:21:17 -0000	1.219
+++ sys/netinet/ipsec_input.c	23 Jun 2025 11:47:14 -0000
@@ -941,7 +941,8 @@ ipsec_common_ctlinput(u_int rdomain, int
 {
 	struct ip *ip = v;
 
-	if (cmd == PRC_MSGSIZE && ip && ip_mtudisc && ip->ip_v == 4) {
+	if (cmd == PRC_MSGSIZE && ip && atomic_load_int(&ip_mtudisc) &&
+	    ip->ip_v == 4) {
 		struct tdb *tdbp;
 		struct sockaddr_in dst;
 		struct icmp *icp;
Index: sys/netinet/tcp_input.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_input.c,v
diff -u -p -r1.453 tcp_input.c
--- sys/netinet/tcp_input.c	18 Jun 2025 16:15:46 -0000	1.453
+++ sys/netinet/tcp_input.c	23 Jun 2025 11:47:14 -0000
@@ -2952,7 +2952,7 @@ tcp_mss(struct tcpcb *tp, int offer)
 	} else if (ifp->if_flags & IFF_LOOPBACK) {
 		mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
 	} else if (tp->pf == AF_INET) {
-		if (ip_mtudisc)
+		if (atomic_load_int(&ip_mtudisc))
 			mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
 	}
 #ifdef INET6
@@ -4284,7 +4284,7 @@ syn_cache_respond(struct syn_cache *sc, 
 			ip->ip_tos = inp->inp_ip.ip_tos;
 
 		error = ip_output(m, sc->sc_ipopts, &sc->sc_route,
-		    (ip_mtudisc ? IP_MTUDISC : 0),  NULL,
+		    (atomic_load_int(&ip_mtudisc) ? IP_MTUDISC : 0),  NULL,
 		    inp ? &inp->inp_seclevel : NULL, 0);
 		break;
 #ifdef INET6
Index: sys/netinet/tcp_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_output.c,v
diff -u -p -r1.154 tcp_output.c
--- sys/netinet/tcp_output.c	21 Apr 2025 09:54:53 -0000	1.154
+++ sys/netinet/tcp_output.c	23 Jun 2025 11:47:14 -0000
@@ -1095,7 +1095,7 @@ send:
 		}
 		error = ip_output(m, tp->t_inpcb->inp_options,
 		    &tp->t_inpcb->inp_route,
-		    (ip_mtudisc ? IP_MTUDISC : 0), NULL,
+		    (atomic_load_int(&ip_mtudisc) ? IP_MTUDISC : 0), NULL,
 		    &tp->t_inpcb->inp_seclevel, 0);
 		break;
 #ifdef INET6
Index: sys/netinet/tcp_subr.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
diff -u -p -r1.212 tcp_subr.c
--- sys/netinet/tcp_subr.c	8 Jun 2025 17:06:19 -0000	1.212
+++ sys/netinet/tcp_subr.c	23 Jun 2025 11:47:14 -0000
@@ -412,7 +412,7 @@ tcp_respond(struct tcpcb *tp, caddr_t te
 		ip->ip_tos = 0;
 		ip_output(m, NULL,
 		    tp ? &tp->t_inpcb->inp_route : NULL,
-		    ip_mtudisc ? IP_MTUDISC : 0, NULL,
+		    atomic_load_int(&ip_mtudisc) ? IP_MTUDISC : 0, NULL,
 		    tp ? &tp->t_inpcb->inp_seclevel : NULL, 0);
 		break;
 	}
@@ -744,7 +744,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
 		return;
 	else if (PRC_IS_REDIRECT(cmd))
 		notify = in_pcbrtchange, ip = NULL;
-	else if (cmd == PRC_MSGSIZE && ip_mtudisc && ip) {
+	else if (cmd == PRC_MSGSIZE && atomic_load_int(&ip_mtudisc) && ip) {
 		struct inpcb *inp;
 		struct socket *so = NULL;
 		struct tcpcb *tp = NULL;
Index: sys/netinet/tcp_timer.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_timer.c,v
diff -u -p -r1.85 tcp_timer.c
--- sys/netinet/tcp_timer.c	8 Jun 2025 17:06:19 -0000	1.85
+++ sys/netinet/tcp_timer.c	23 Jun 2025 11:47:14 -0000
@@ -277,7 +277,7 @@ tcp_timer_rexmt(void *arg)
 	 * lots more sophisticated searching to find the right
 	 * value here...
 	 */
-	if (ip_mtudisc &&
+	if (atomic_load_int(&ip_mtudisc) &&
 	    TCPS_HAVEESTABLISHED(tp->t_state) &&
 	    tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
 		struct rtentry *rt = NULL;
Index: sys/netinet/udp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
diff -u -p -r1.344 udp_usrreq.c
--- sys/netinet/udp_usrreq.c	18 Jun 2025 16:15:46 -0000	1.344
+++ sys/netinet/udp_usrreq.c	23 Jun 2025 11:47:14 -0000
@@ -919,7 +919,7 @@ udp_ctlinput(int cmd, struct sockaddr *s
 		uhp = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 #ifdef IPSEC
 		/* PMTU discovery for udpencap */
-		if (cmd == PRC_MSGSIZE && ip_mtudisc &&
+		if (cmd == PRC_MSGSIZE && atomic_load_int(&ip_mtudisc) &&
 		    atomic_load_int(&udpencap_enable) && udpencap_port_local &&
 		    uhp->uh_sport == udpencap_port_local) {
 			udpencap_ctlinput(cmd, sa, rdomain, v);
Index: sys/netinet6/ip6_output.c
===================================================================
RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
diff -u -p -r1.299 ip6_output.c
--- sys/netinet6/ip6_output.c	14 May 2025 14:32:15 -0000	1.299
+++ sys/netinet6/ip6_output.c	23 Jun 2025 11:47:14 -0000
@@ -693,7 +693,7 @@ reroute:
 
 	if (dontfrag && tlen > ifp->if_mtu) {		/* case 2-b */
 #ifdef IPSEC
-		if (ip_mtudisc)
+		if (atomic_load_int(&ip_mtudisc))
 			ipsec_adjust_mtu(m, mtu);
 #endif
 		error = EMSGSIZE;
@@ -2836,7 +2836,8 @@ ip6_output_ipsec_send(struct tdb *tdb, s
 	struct ip6_hdr *ip6;
 	struct in6_addr dst;
 	u_int len;
-	int error, ifidx, tso = 0;
+	int ifidx, tso = 0, ip_mtudisc_local =  atomic_load_int(&ip_mtudisc);
+	int error;
 
 #if NPF > 0
 	/*
@@ -2870,7 +2871,7 @@ ip6_output_ipsec_send(struct tdb *tdb, s
 	/* Check if we are allowed to fragment */
 	dst = ip6->ip6_dst;
 	ifidx = m->m_pkthdr.ph_ifidx;
-	if (ip_mtudisc && tdb->tdb_mtu &&
+	if (ip_mtudisc_local && tdb->tdb_mtu &&
 	    len > tdb->tdb_mtu && tdb->tdb_mtutimeout > gettime()) {
 		int transportmode;
 
@@ -2889,7 +2890,7 @@ ip6_output_ipsec_send(struct tdb *tdb, s
 		return EMSGSIZE;
 	}
 	/* propagate don't fragment for v6-over-v6 */
-	if (ip_mtudisc)
+	if (ip_mtudisc_local)
 		SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
 
 	/*
@@ -2926,7 +2927,7 @@ ip6_output_ipsec_send(struct tdb *tdb, s
 	}
 	if (!error && tso)
 		tcpstat_inc(tcps_outswtso);
-	if (ip_mtudisc && error == EMSGSIZE)
+	if (ip_mtudisc_local && error == EMSGSIZE)
 		ip6_output_ipsec_pmtu_update(tdb, ro, &dst, ifidx, rtableid, 0);
 	return error;
 }