From: Alexander Bluhm Subject: Re: sysctl: unlock IP{,V6}CTL_MTUDISCTIMEOUT To: Vitaliy Makkoveev Cc: tech@openbsd.org Date: Wed, 18 Jun 2025 21:59:06 +0200 On Wed, Jun 18, 2025 at 10:39:36PM +0300, Vitaliy Makkoveev wrote: > They are identical, so unlock them both. I want to serialize the new > value assignment with the following rt_timer_queue_change() because on > hybrid CPU the thread on faster core could be executed between them. > I used the new temporary `ip_sysctl_lock' rwlock(9) for serialization. > Netlock is not required, so do not stop packets processing. Can you put "extern struct rwlock ip_sysctl_lock" into netinet/ip_var.h? I don't like extern in C files as then the code may get out of sync. OK bluhm@ > Index: sys/netinet/ip_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ip_input.c,v > retrieving revision 1.409 > diff -u -p -r1.409 ip_input.c > --- sys/netinet/ip_input.c 12 Jun 2025 20:37:59 -0000 1.409 > +++ sys/netinet/ip_input.c 18 Jun 2025 19:17:01 -0000 > @@ -98,7 +98,7 @@ int ip_sendredirects = 1; /* [a] */ > int ip_dosourceroute = 0; /* [a] */ > int ip_defttl = IPDEFTTL; > int ip_mtudisc = 1; > -int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; > +int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; /* [a] */ > int ip_directedbcast = 0; /* [a] */ > > /* Protects `ipq' and `ip_frags'. */ > @@ -1723,11 +1723,15 @@ ip_forward(struct mbuf *m, struct ifnet > } > > #ifndef SMALL_KERNEL > + > +/* Temporary, to avoid sysctl_lock recursion. */ > +struct rwlock ip_sysctl_lock = RWLOCK_INITIALIZER("ipslk"); > + > int > ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, > size_t newlen) > { > - int oldval, error; > + int oldval, newval, error; > > /* Almost all sysctl names at this level are terminal. */ > if (namelen != 1 && name[0] != IPCTL_IFQUEUE && > @@ -1746,12 +1750,16 @@ ip_sysctl(int *name, u_int namelen, void > NET_UNLOCK(); > return error; > case IPCTL_MTUDISCTIMEOUT: > - NET_LOCK(); > + oldval = newval = atomic_load_int(&ip_mtudisc_timeout); > error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, > - &ip_mtudisc_timeout, 0, INT_MAX); > - rt_timer_queue_change(&ip_mtudisc_timeout_q, > - ip_mtudisc_timeout); > - NET_UNLOCK(); > + &newval, 0, INT_MAX); > + if (error == 0 && oldval != newval) { > + rw_enter_write(&ip_sysctl_lock); > + atomic_store_int(&ip_mtudisc_timeout, newval); > + rt_timer_queue_change(&ip_mtudisc_timeout_q, newval); > + rw_exit_write(&ip_sysctl_lock); > + } > + > return (error); > #ifdef IPSEC > case IPCTL_ENCDEBUG: > Index: sys/netinet/ipsec_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ipsec_input.c,v > retrieving revision 1.218 > diff -u -p -r1.218 ipsec_input.c > --- sys/netinet/ipsec_input.c 16 Jun 2025 07:11:58 -0000 1.218 > +++ sys/netinet/ipsec_input.c 18 Jun 2025 19:17:01 -0000 > @@ -928,7 +928,8 @@ ipsec_set_mtu(struct tdb *tdbp, u_int32_ > > /* Store adjusted MTU in tdb */ > tdbp->tdb_mtu = mtu; > - tdbp->tdb_mtutimeout = gettime() + ip_mtudisc_timeout; > + tdbp->tdb_mtutimeout = gettime() + > + atomic_load_int(&ip_mtudisc_timeout); > DPRINTF("spi %08x mtu %d adjust %ld", > ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust); > } > Index: sys/netinet/ipsec_output.c > =================================================================== > RCS file: /cvs/src/sys/netinet/ipsec_output.c,v > retrieving revision 1.103 > diff -u -p -r1.103 ipsec_output.c > --- sys/netinet/ipsec_output.c 16 Jun 2025 07:11:58 -0000 1.103 > +++ sys/netinet/ipsec_output.c 18 Jun 2025 19:17:01 -0000 > @@ -650,7 +650,8 @@ ipsec_adjust_mtu(struct mbuf *m, u_int32 > > mtu -= adjust; > tdbp->tdb_mtu = mtu; > - tdbp->tdb_mtutimeout = gettime() + ip_mtudisc_timeout; > + tdbp->tdb_mtutimeout = gettime() + > + atomic_load_int(&ip_mtudisc_timeout); > DPRINTF("spi %08x mtu %d adjust %ld mbuf %p", > ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust, m); > tdb_unref(tdbp); > Index: sys/netinet6/in6_proto.c > =================================================================== > RCS file: /cvs/src/sys/netinet6/in6_proto.c,v > retrieving revision 1.129 > diff -u -p -r1.129 in6_proto.c > --- sys/netinet6/in6_proto.c 16 Jun 2025 07:11:58 -0000 1.129 > +++ sys/netinet6/in6_proto.c 18 Jun 2025 19:17:01 -0000 > @@ -350,6 +350,11 @@ const struct domain inet6domain = { > }; > > /* > + * Locks used to protect global variables in this file: > + * a atomic operations > + */ > + > +/* > * Internet configuration info > */ > int ip6_forwarding = 0; /* [a] no forwarding unless sysctl to enable */ > @@ -384,4 +389,4 @@ u_long rip6_recvspace = RIPV6RCVQ; > /* ICMPV6 parameters */ > int icmp6_redirtimeout = 10 * 60; /* 10 minutes */ > int icmp6errppslim = 100; /* 100pps */ > -int ip6_mtudisc_timeout = IPMTUDISCTIMEOUT; > +int ip6_mtudisc_timeout = IPMTUDISCTIMEOUT; /* [a] */ > Index: sys/netinet6/ip6_input.c > =================================================================== > RCS file: /cvs/src/sys/netinet6/ip6_input.c,v > retrieving revision 1.273 > diff -u -p -r1.273 ip6_input.c > --- sys/netinet6/ip6_input.c 12 Jun 2025 20:37:59 -0000 1.273 > +++ sys/netinet6/ip6_input.c 18 Jun 2025 19:17:01 -0000 > @@ -1547,14 +1547,22 @@ ip6_sysctl(int *name, u_int namelen, voi > case IPV6CTL_MRTMFC: > return (EOPNOTSUPP); > #endif > - case IPV6CTL_MTUDISCTIMEOUT: > - NET_LOCK(); > + case IPV6CTL_MTUDISCTIMEOUT: { > + extern struct rwlock ip_sysctl_lock; > + int oldval, newval; > + > + oldval = newval = atomic_load_int(&ip6_mtudisc_timeout); > error = sysctl_int_bounded(oldp, oldlenp, newp, newlen, > - &ip6_mtudisc_timeout, 0, INT_MAX); > - rt_timer_queue_change(&icmp6_mtudisc_timeout_q, > - ip6_mtudisc_timeout); > - NET_UNLOCK(); > + &newval, 0, INT_MAX); > + if (error == 0 && oldval != newval) { > + rw_enter_write(&ip_sysctl_lock); > + atomic_store_int(&ip6_mtudisc_timeout, newval); > + rt_timer_queue_change(&icmp6_mtudisc_timeout_q, newval); > + rw_exit_write(&ip_sysctl_lock); > + } > + > return (error); > + } > case IPV6CTL_IFQUEUE: > return (sysctl_niq(name + 1, namelen - 1, > oldp, oldlenp, newp, newlen, &ip6intrq));