Download raw body.
sysctl: unlock IPCTL_MTUDISC
On Mon, Jun 23, 2025 at 03:06:23PM +0300, Vitaliy Makkoveev wrote:
> `ip_mtudisc' is atomically accessed boolean, so allow only 0 and 1
> values to set. Also, while `ip_mtudisc' is 0, the rt_timer_queue_flush()
> will be triggered all the times even if we do read access. There is no
> reason for that, so flush the queue only if this thread successfully
> assigned 0 value. The rt_timer_queue_flush() requires to be serialized
> with the netlock.
I would guess shared netlock would be enough for rt_timer_queue_flush().
I think rtdeletemsg() needs it. But there are also custom callbacks.
Better safe than sorry and use exclusive netlock. Routing code is
fragile enough.
OK bluhm@
> Index: sys/net/if_bridge.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_bridge.c,v
> diff -u -p -r1.374 if_bridge.c
> --- sys/net/if_bridge.c 2 Mar 2025 21:28:31 -0000 1.374
> +++ sys/net/if_bridge.c 23 Jun 2025 11:47:14 -0000
> @@ -1640,7 +1640,8 @@ bridge_ipsec(struct ifnet *ifp, struct e
>
> ip = mtod(m, struct ip *);
> if ((af == AF_INET) &&
> - ip_mtudisc && (ip->ip_off & htons(IP_DF)) &&
> + atomic_load_int(&ip_mtudisc) &&
> + (ip->ip_off & htons(IP_DF)) &&
> tdb->tdb_mtu && ntohs(ip->ip_len) > tdb->tdb_mtu &&
> tdb->tdb_mtutimeout > gettime()) {
> bridge_send_icmp_err(ifp, eh, m,
> Index: sys/net/pf.c
> ===================================================================
> RCS file: /cvs/src/sys/net/pf.c,v
> diff -u -p -r1.1214 pf.c
> --- sys/net/pf.c 23 Jun 2025 09:16:32 -0000 1.1214
> +++ sys/net/pf.c 23 Jun 2025 11:47:14 -0000
> @@ -3326,7 +3326,7 @@ pf_build_tcp(const struct pf_rule *r, sa
> h->ip_hl = sizeof(*h) >> 2;
> h->ip_tos = IPTOS_LOWDELAY;
> h->ip_len = htons(len);
> - h->ip_off = htons(ip_mtudisc ? IP_DF : 0);
> + h->ip_off = htons(atomic_load_int(&ip_mtudisc) ? IP_DF : 0);
> h->ip_ttl = ttl ? ttl : ip_defttl;
> h->ip_sum = 0;
> h->ip_src.s_addr = saddr->v4.s_addr;
> Index: sys/netinet/ip_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> diff -u -p -r1.411 ip_input.c
> --- sys/netinet/ip_input.c 23 Jun 2025 09:16:32 -0000 1.411
> +++ sys/netinet/ip_input.c 23 Jun 2025 11:47:14 -0000
> @@ -97,7 +97,7 @@ int ipmultipath = 0; /* [a] */
> int ip_sendredirects = 1; /* [a] */
> int ip_dosourceroute = 0; /* [a] */
> int ip_defttl = IPDEFTTL;
> -int ip_mtudisc = 1;
> +int ip_mtudisc = 1; /* [a] */
> int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; /* [a] */
> int ip_directedbcast = 0; /* [a] */
>
> @@ -1743,12 +1743,18 @@ ip_sysctl(int *name, u_int namelen, void
> return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
> &ip_dosourceroute));
> case IPCTL_MTUDISC:
> - NET_LOCK();
> - error = sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtudisc);
> - if (ip_mtudisc == 0)
> + oldval = newval = atomic_load_int(&ip_mtudisc);
> + error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> + &newval, 0, 1);
> + if (error == 0 && oldval != newval &&
> + oldval == atomic_cas_uint(&ip_mtudisc, oldval, newval) &&
> + newval == 0) {
> + NET_LOCK();
> rt_timer_queue_flush(&ip_mtudisc_timeout_q);
> - NET_UNLOCK();
> - return error;
> + NET_UNLOCK();
> + }
> +
> + return (error);
> case IPCTL_MTUDISCTIMEOUT:
> oldval = newval = atomic_load_int(&ip_mtudisc_timeout);
> error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
> Index: sys/netinet/ip_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ip_output.c,v
> diff -u -p -r1.409 ip_output.c
> --- sys/netinet/ip_output.c 14 May 2025 14:32:15 -0000 1.409
> +++ sys/netinet/ip_output.c 23 Jun 2025 11:47:14 -0000
> @@ -445,7 +445,7 @@ reroute:
> */
> if (ip->ip_off & htons(IP_DF)) {
> #ifdef IPSEC
> - if (ip_mtudisc)
> + if (atomic_load_int(&ip_mtudisc))
> ipsec_adjust_mtu(m, ifp->if_mtu);
> #endif
> error = EMSGSIZE;
> @@ -583,7 +583,8 @@ ip_output_ipsec_send(struct tdb *tdb, st
> struct ip *ip;
> struct in_addr dst;
> u_int len;
> - int error, tso = 0;
> + int tso = 0, ip_mtudisc_local = atomic_load_int(&ip_mtudisc);
> + int error;
>
> #if NPF > 0
> /*
> @@ -616,7 +617,7 @@ ip_output_ipsec_send(struct tdb *tdb, st
>
> /* Check if we are allowed to fragment */
> dst = ip->ip_dst;
> - if (ip_mtudisc && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu &&
> + if (ip_mtudisc_local && (ip->ip_off & htons(IP_DF)) && tdb->tdb_mtu &&
> len > tdb->tdb_mtu && tdb->tdb_mtutimeout > gettime()) {
> ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid);
> ipsec_adjust_mtu(m, tdb->tdb_mtu);
> @@ -624,7 +625,7 @@ ip_output_ipsec_send(struct tdb *tdb, st
> return EMSGSIZE;
> }
> /* propagate IP_DF for v4-over-v6 */
> - if (ip_mtudisc && ip->ip_off & htons(IP_DF))
> + if (ip_mtudisc_local && ip->ip_off & htons(IP_DF))
> SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
>
> /*
> @@ -661,7 +662,7 @@ ip_output_ipsec_send(struct tdb *tdb, st
> }
> if (!error && tso)
> tcpstat_inc(tcps_outswtso);
> - if (ip_mtudisc && error == EMSGSIZE)
> + if (ip_mtudisc_local && error == EMSGSIZE)
> ip_output_ipsec_pmtu_update(tdb, ro, dst, rtableid);
> return error;
> }
> Index: sys/netinet/ipsec_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/ipsec_input.c,v
> diff -u -p -r1.219 ipsec_input.c
> --- sys/netinet/ipsec_input.c 21 Jun 2025 14:21:17 -0000 1.219
> +++ sys/netinet/ipsec_input.c 23 Jun 2025 11:47:14 -0000
> @@ -941,7 +941,8 @@ ipsec_common_ctlinput(u_int rdomain, int
> {
> struct ip *ip = v;
>
> - if (cmd == PRC_MSGSIZE && ip && ip_mtudisc && ip->ip_v == 4) {
> + if (cmd == PRC_MSGSIZE && ip && atomic_load_int(&ip_mtudisc) &&
> + ip->ip_v == 4) {
> struct tdb *tdbp;
> struct sockaddr_in dst;
> struct icmp *icp;
> Index: sys/netinet/tcp_input.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_input.c,v
> diff -u -p -r1.453 tcp_input.c
> --- sys/netinet/tcp_input.c 18 Jun 2025 16:15:46 -0000 1.453
> +++ sys/netinet/tcp_input.c 23 Jun 2025 11:47:14 -0000
> @@ -2952,7 +2952,7 @@ tcp_mss(struct tcpcb *tp, int offer)
> } else if (ifp->if_flags & IFF_LOOPBACK) {
> mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
> } else if (tp->pf == AF_INET) {
> - if (ip_mtudisc)
> + if (atomic_load_int(&ip_mtudisc))
> mss = ifp->if_mtu - iphlen - sizeof(struct tcphdr);
> }
> #ifdef INET6
> @@ -4284,7 +4284,7 @@ syn_cache_respond(struct syn_cache *sc,
> ip->ip_tos = inp->inp_ip.ip_tos;
>
> error = ip_output(m, sc->sc_ipopts, &sc->sc_route,
> - (ip_mtudisc ? IP_MTUDISC : 0), NULL,
> + (atomic_load_int(&ip_mtudisc) ? IP_MTUDISC : 0), NULL,
> inp ? &inp->inp_seclevel : NULL, 0);
> break;
> #ifdef INET6
> Index: sys/netinet/tcp_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_output.c,v
> diff -u -p -r1.154 tcp_output.c
> --- sys/netinet/tcp_output.c 21 Apr 2025 09:54:53 -0000 1.154
> +++ sys/netinet/tcp_output.c 23 Jun 2025 11:47:14 -0000
> @@ -1095,7 +1095,7 @@ send:
> }
> error = ip_output(m, tp->t_inpcb->inp_options,
> &tp->t_inpcb->inp_route,
> - (ip_mtudisc ? IP_MTUDISC : 0), NULL,
> + (atomic_load_int(&ip_mtudisc) ? IP_MTUDISC : 0), NULL,
> &tp->t_inpcb->inp_seclevel, 0);
> break;
> #ifdef INET6
> Index: sys/netinet/tcp_subr.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
> diff -u -p -r1.212 tcp_subr.c
> --- sys/netinet/tcp_subr.c 8 Jun 2025 17:06:19 -0000 1.212
> +++ sys/netinet/tcp_subr.c 23 Jun 2025 11:47:14 -0000
> @@ -412,7 +412,7 @@ tcp_respond(struct tcpcb *tp, caddr_t te
> ip->ip_tos = 0;
> ip_output(m, NULL,
> tp ? &tp->t_inpcb->inp_route : NULL,
> - ip_mtudisc ? IP_MTUDISC : 0, NULL,
> + atomic_load_int(&ip_mtudisc) ? IP_MTUDISC : 0, NULL,
> tp ? &tp->t_inpcb->inp_seclevel : NULL, 0);
> break;
> }
> @@ -744,7 +744,7 @@ tcp_ctlinput(int cmd, struct sockaddr *s
> return;
> else if (PRC_IS_REDIRECT(cmd))
> notify = in_pcbrtchange, ip = NULL;
> - else if (cmd == PRC_MSGSIZE && ip_mtudisc && ip) {
> + else if (cmd == PRC_MSGSIZE && atomic_load_int(&ip_mtudisc) && ip) {
> struct inpcb *inp;
> struct socket *so = NULL;
> struct tcpcb *tp = NULL;
> Index: sys/netinet/tcp_timer.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_timer.c,v
> diff -u -p -r1.85 tcp_timer.c
> --- sys/netinet/tcp_timer.c 8 Jun 2025 17:06:19 -0000 1.85
> +++ sys/netinet/tcp_timer.c 23 Jun 2025 11:47:14 -0000
> @@ -277,7 +277,7 @@ tcp_timer_rexmt(void *arg)
> * lots more sophisticated searching to find the right
> * value here...
> */
> - if (ip_mtudisc &&
> + if (atomic_load_int(&ip_mtudisc) &&
> TCPS_HAVEESTABLISHED(tp->t_state) &&
> tp->t_rxtshift > TCP_MAXRXTSHIFT / 6) {
> struct rtentry *rt = NULL;
> Index: sys/netinet/udp_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
> diff -u -p -r1.344 udp_usrreq.c
> --- sys/netinet/udp_usrreq.c 18 Jun 2025 16:15:46 -0000 1.344
> +++ sys/netinet/udp_usrreq.c 23 Jun 2025 11:47:14 -0000
> @@ -919,7 +919,7 @@ udp_ctlinput(int cmd, struct sockaddr *s
> uhp = (struct udphdr *)((caddr_t)ip + (ip->ip_hl << 2));
> #ifdef IPSEC
> /* PMTU discovery for udpencap */
> - if (cmd == PRC_MSGSIZE && ip_mtudisc &&
> + if (cmd == PRC_MSGSIZE && atomic_load_int(&ip_mtudisc) &&
> atomic_load_int(&udpencap_enable) && udpencap_port_local &&
> uhp->uh_sport == udpencap_port_local) {
> udpencap_ctlinput(cmd, sa, rdomain, v);
> Index: sys/netinet6/ip6_output.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/ip6_output.c,v
> diff -u -p -r1.299 ip6_output.c
> --- sys/netinet6/ip6_output.c 14 May 2025 14:32:15 -0000 1.299
> +++ sys/netinet6/ip6_output.c 23 Jun 2025 11:47:14 -0000
> @@ -693,7 +693,7 @@ reroute:
>
> if (dontfrag && tlen > ifp->if_mtu) { /* case 2-b */
> #ifdef IPSEC
> - if (ip_mtudisc)
> + if (atomic_load_int(&ip_mtudisc))
> ipsec_adjust_mtu(m, mtu);
> #endif
> error = EMSGSIZE;
> @@ -2836,7 +2836,8 @@ ip6_output_ipsec_send(struct tdb *tdb, s
> struct ip6_hdr *ip6;
> struct in6_addr dst;
> u_int len;
> - int error, ifidx, tso = 0;
> + int ifidx, tso = 0, ip_mtudisc_local = atomic_load_int(&ip_mtudisc);
> + int error;
>
> #if NPF > 0
> /*
> @@ -2870,7 +2871,7 @@ ip6_output_ipsec_send(struct tdb *tdb, s
> /* Check if we are allowed to fragment */
> dst = ip6->ip6_dst;
> ifidx = m->m_pkthdr.ph_ifidx;
> - if (ip_mtudisc && tdb->tdb_mtu &&
> + if (ip_mtudisc_local && tdb->tdb_mtu &&
> len > tdb->tdb_mtu && tdb->tdb_mtutimeout > gettime()) {
> int transportmode;
>
> @@ -2889,7 +2890,7 @@ ip6_output_ipsec_send(struct tdb *tdb, s
> return EMSGSIZE;
> }
> /* propagate don't fragment for v6-over-v6 */
> - if (ip_mtudisc)
> + if (ip_mtudisc_local)
> SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
>
> /*
> @@ -2926,7 +2927,7 @@ ip6_output_ipsec_send(struct tdb *tdb, s
> }
> if (!error && tso)
> tcpstat_inc(tcps_outswtso);
> - if (ip_mtudisc && error == EMSGSIZE)
> + if (ip_mtudisc_local && error == EMSGSIZE)
> ip6_output_ipsec_pmtu_update(tdb, ro, &dst, ifidx, rtableid, 0);
> return error;
> }
sysctl: unlock IPCTL_MTUDISC