From: Vitaliy Makkoveev Subject: sysctl: unlock IP{,V6}CTL_MTUDISCTIMEOUT To: Alexander Bluhm , tech@openbsd.org Date: Wed, 18 Jun 2025 22:39:36 +0300 They are identical, so unlock them both. I want to serialize the new value assignment with the following rt_timer_queue_change() because on hybrid CPU the thread on faster core could be executed between them. I used the new temporary `ip_sysctl_lock' rwlock(9) for serialization. Netlock is not required, so do not stop packets processing. Index: sys/netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v retrieving revision 1.409 diff -u -p -r1.409 ip_input.c --- sys/netinet/ip_input.c 12 Jun 2025 20:37:59 -0000 1.409 +++ sys/netinet/ip_input.c 18 Jun 2025 19:17:01 -0000 @@ -98,7 +98,7 @@ int ip_sendredirects = 1; /* [a] */ int ip_dosourceroute = 0; /* [a] */ int ip_defttl = IPDEFTTL; int ip_mtudisc = 1; -int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; +int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; /* [a] */ int ip_directedbcast = 0; /* [a] */ /* Protects `ipq' and `ip_frags'. */ @@ -1723,11 +1723,15 @@ ip_forward(struct mbuf *m, struct ifnet } #ifndef SMALL_KERNEL + +/* Temporary, to avoid sysctl_lock recursion. */ +struct rwlock ip_sysctl_lock = RWLOCK_INITIALIZER("ipslk"); + int ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - int oldval, error; + int oldval, newval, error; /* Almost all sysctl names at this level are terminal. */ if (namelen != 1 && name[0] != IPCTL_IFQUEUE && @@ -1746,12 +1750,16 @@ ip_sysctl(int *name, u_int namelen, void NET_UNLOCK(); return error; case IPCTL_MTUDISCTIMEOUT: - NET_LOCK(); + oldval = newval = atomic_load_int(&ip_mtudisc_timeout); error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, - &ip_mtudisc_timeout, 0, INT_MAX); - rt_timer_queue_change(&ip_mtudisc_timeout_q, - ip_mtudisc_timeout); - NET_UNLOCK(); + &newval, 0, INT_MAX); + if (error == 0 && oldval != newval) { + rw_enter_write(&ip_sysctl_lock); + atomic_store_int(&ip_mtudisc_timeout, newval); + rt_timer_queue_change(&ip_mtudisc_timeout_q, newval); + rw_exit_write(&ip_sysctl_lock); + } + return (error); #ifdef IPSEC case IPCTL_ENCDEBUG: Index: sys/netinet/ipsec_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ipsec_input.c,v retrieving revision 1.218 diff -u -p -r1.218 ipsec_input.c --- sys/netinet/ipsec_input.c 16 Jun 2025 07:11:58 -0000 1.218 +++ sys/netinet/ipsec_input.c 18 Jun 2025 19:17:01 -0000 @@ -928,7 +928,8 @@ ipsec_set_mtu(struct tdb *tdbp, u_int32_ /* Store adjusted MTU in tdb */ tdbp->tdb_mtu = mtu; - tdbp->tdb_mtutimeout = gettime() + ip_mtudisc_timeout; + tdbp->tdb_mtutimeout = gettime() + + atomic_load_int(&ip_mtudisc_timeout); DPRINTF("spi %08x mtu %d adjust %ld", ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust); } Index: sys/netinet/ipsec_output.c =================================================================== RCS file: /cvs/src/sys/netinet/ipsec_output.c,v retrieving revision 1.103 diff -u -p -r1.103 ipsec_output.c --- sys/netinet/ipsec_output.c 16 Jun 2025 07:11:58 -0000 1.103 +++ sys/netinet/ipsec_output.c 18 Jun 2025 19:17:01 -0000 @@ -650,7 +650,8 @@ ipsec_adjust_mtu(struct mbuf *m, u_int32 mtu -= adjust; tdbp->tdb_mtu = mtu; - tdbp->tdb_mtutimeout = gettime() + ip_mtudisc_timeout; + tdbp->tdb_mtutimeout = gettime() + + atomic_load_int(&ip_mtudisc_timeout); DPRINTF("spi %08x mtu %d adjust %ld mbuf %p", ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust, m); tdb_unref(tdbp); Index: sys/netinet6/in6_proto.c =================================================================== RCS file: /cvs/src/sys/netinet6/in6_proto.c,v retrieving revision 1.129 diff -u -p -r1.129 in6_proto.c --- sys/netinet6/in6_proto.c 16 Jun 2025 07:11:58 -0000 1.129 +++ sys/netinet6/in6_proto.c 18 Jun 2025 19:17:01 -0000 @@ -350,6 +350,11 @@ const struct domain inet6domain = { }; /* + * Locks used to protect global variables in this file: + * a atomic operations + */ + +/* * Internet configuration info */ int ip6_forwarding = 0; /* [a] no forwarding unless sysctl to enable */ @@ -384,4 +389,4 @@ u_long rip6_recvspace = RIPV6RCVQ; /* ICMPV6 parameters */ int icmp6_redirtimeout = 10 * 60; /* 10 minutes */ int icmp6errppslim = 100; /* 100pps */ -int ip6_mtudisc_timeout = IPMTUDISCTIMEOUT; +int ip6_mtudisc_timeout = IPMTUDISCTIMEOUT; /* [a] */ Index: sys/netinet6/ip6_input.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_input.c,v retrieving revision 1.273 diff -u -p -r1.273 ip6_input.c --- sys/netinet6/ip6_input.c 12 Jun 2025 20:37:59 -0000 1.273 +++ sys/netinet6/ip6_input.c 18 Jun 2025 19:17:01 -0000 @@ -1547,14 +1547,22 @@ ip6_sysctl(int *name, u_int namelen, voi case IPV6CTL_MRTMFC: return (EOPNOTSUPP); #endif - case IPV6CTL_MTUDISCTIMEOUT: - NET_LOCK(); + case IPV6CTL_MTUDISCTIMEOUT: { + extern struct rwlock ip_sysctl_lock; + int oldval, newval; + + oldval = newval = atomic_load_int(&ip6_mtudisc_timeout); error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, - &ip6_mtudisc_timeout, 0, INT_MAX); - rt_timer_queue_change(&icmp6_mtudisc_timeout_q, - ip6_mtudisc_timeout); - NET_UNLOCK(); + &newval, 0, INT_MAX); + if (error == 0 && oldval != newval) { + rw_enter_write(&ip_sysctl_lock); + atomic_store_int(&ip6_mtudisc_timeout, newval); + rt_timer_queue_change(&icmp6_mtudisc_timeout_q, newval); + rw_exit_write(&ip_sysctl_lock); + } + return (error); + } case IPV6CTL_IFQUEUE: return (sysctl_niq(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, &ip6intrq));