From: Alexander Bluhm Subject: Re: tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT To: Vitaliy Makkoveev Cc: tech@openbsd.org Date: Tue, 20 May 2025 21:02:50 +0900 On Tue, May 20, 2025 at 02:23:05AM +0300, Vitaliy Makkoveev wrote: > On Mon, May 19, 2025 at 11:10:48AM +0900, Alexander Bluhm wrote: > > On Tue, May 13, 2025 at 01:45:36PM +0300, Vitaliy Makkoveev wrote: > > > On Sat, May 10, 2025 at 08:00:27AM +0300, Vitaliy Makkoveev wrote: > > > > They are identical, so unlock them both. The temporary `tcp_sysctl_lock' > > > > and the pool_sethardlimit() serialization are useless because we are > > > > serialized with sysctl_lock rwlock(9). However I want to do this > > > > serialization for consistency reason. I will replace `tcp_sysctl_lock' > > > > with `sysctl_lock' after tcp_sysctl() unlocked. > > > > I am not sure why the serialized with sysctl_lock is actually needed. > > Are you preventing that two userland threads call sysctl() and > > modify pool_sethardlimit() in parallel? Or is it about the oval > > that should be consistent for these threads? > > > > What concerns me a bit is that pool_sethardlimit() checks (n < > > pp->pr_nout) while another thread working with pools may modify it. > > Also pool_sethardlimit() is setting a bunch of pr_hardlimit values > > without lock. > > > > Should we add pl_enter(pp, &pp->pr_lock) in pool_sethardlimit()? > > It is not the hot path, would prevent modifying the limits in > > parallel and prevents using inconsistent limits while setting them. > > > > Then some of your serialization code in tcp_sysctl() can go away. > > > > This should be the no problem, because the "pp->pr_nout < > pp->pr_hardlimit" is the normal case and the code handles it. However, > I'm fine to do this check and modification locked. > > Please note, the `pr_hardlimit_warning', `pr_hardlimit_ratecap' and > `pr_hardlimit_warning_last' are not used. I want to delete them, with > another diff. > > > > + if (error == 0 && oval != nval) { > > > + extern struct rwlock sysctl_lock; > > > + > > > + error = rw_enter(&sysctl_lock, RW_WRITE | RW_INTR); > > > + if (error) > > > + return (error); > > > + if (nval != atomic_load_int(var)) { > > > + error = pool_sethardlimit(pool, nval, > > > + NULL, 0); > > > + if (error == 0) > > > + atomic_store_int(var, nval); > > > + } > > > + rw_exit(&sysctl_lock); > > > } > > I want to keep the `sysctl_lock' as is. I can't imagine something worse > than pool_sethardlimit() dry-run, but I prefer to keep this serialized > even with the pool_sethardlimit() serialized within. This is not the > fast path, the interruptible `sysctl_lock' doesn't make it worse, but we > 100% sure there is no race effects here. I don't care much about this additional sysctl_lock. OK bluhm@ > Index: sys/kern/subr_pool.c > =================================================================== > RCS file: /cvs/src/sys/kern/subr_pool.c,v > retrieving revision 1.237 > diff -u -p -r1.237 subr_pool.c > --- sys/kern/subr_pool.c 4 Jan 2025 09:26:01 -0000 1.237 > +++ sys/kern/subr_pool.c 19 May 2025 23:22:01 -0000 > @@ -1096,6 +1096,8 @@ pool_sethardlimit(struct pool *pp, u_int > { > int error = 0; > > + pl_enter(pp, &pp->pr_lock); > + > if (n < pp->pr_nout) { > error = EINVAL; > goto done; > @@ -1106,8 +1108,9 @@ pool_sethardlimit(struct pool *pp, u_int > pp->pr_hardlimit_ratecap.tv_sec = ratecap; > pp->pr_hardlimit_warning_last.tv_sec = 0; > pp->pr_hardlimit_warning_last.tv_usec = 0; > - > done: > + pl_leave(pp, &pp->pr_lock); > + > return (error); > } > > Index: sys/netinet/in_proto.c > =================================================================== > RCS file: /cvs/src/sys/netinet/in_proto.c,v > retrieving revision 1.122 > diff -u -p -r1.122 in_proto.c > --- sys/netinet/in_proto.c 26 Apr 2025 13:58:08 -0000 1.122 > +++ sys/netinet/in_proto.c 19 May 2025 23:22:01 -0000 > @@ -198,7 +198,7 @@ const struct protosw inetsw[] = { > .pr_domain = &inetdomain, > .pr_protocol = IPPROTO_TCP, > .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE| > - PR_MPINPUT, > + PR_MPINPUT|PR_MPSYSCTL, > .pr_input = tcp_input, > .pr_ctlinput = tcp_ctlinput, > .pr_ctloutput = tcp_ctloutput, > Index: sys/netinet/tcp_usrreq.c > =================================================================== > RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v > retrieving revision 1.246 > diff -u -p -r1.246 tcp_usrreq.c > --- sys/netinet/tcp_usrreq.c 13 May 2025 09:17:41 -0000 1.246 > +++ sys/netinet/tcp_usrreq.c 19 May 2025 23:22:01 -0000 > @@ -1460,29 +1460,38 @@ tcp_sysctl(int *name, u_int namelen, voi > return tcp_ident(oldp, oldlenp, newp, newlen, 1); > > case TCPCTL_REASS_LIMIT: > - NET_LOCK(); > - nval = tcp_reass_limit; > - error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); > - if (!error && nval != tcp_reass_limit) { > - error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0); > - if (!error) > - tcp_reass_limit = nval; > + case TCPCTL_SACKHOLE_LIMIT: { > + struct pool *pool; > + int *var; > + > + if (name[0] == TCPCTL_REASS_LIMIT) { > + pool = &tcpqe_pool; > + var = &tcp_reass_limit; > + } else { > + pool = &sackhl_pool; > + var = &tcp_sackhole_limit; > } > - NET_UNLOCK(); > - return (error); > > - case TCPCTL_SACKHOLE_LIMIT: > - NET_LOCK(); > - nval = tcp_sackhole_limit; > + oval = nval = atomic_load_int(var); > error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); > - if (!error && nval != tcp_sackhole_limit) { > - error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0); > - if (!error) > - tcp_sackhole_limit = nval; > + > + if (error == 0 && oval != nval) { > + extern struct rwlock sysctl_lock; > + > + error = rw_enter(&sysctl_lock, RW_WRITE | RW_INTR); > + if (error) > + return (error); > + if (nval != atomic_load_int(var)) { > + error = pool_sethardlimit(pool, nval, > + NULL, 0); > + if (error == 0) > + atomic_store_int(var, nval); > + } > + rw_exit(&sysctl_lock); > } > - NET_UNLOCK(); > - return (error); > > + return (error); > + } > case TCPCTL_STATS: > return (tcp_sysctl_tcpstat(oldp, oldlenp, newp)); > > Index: sys/netinet6/in6_proto.c > =================================================================== > RCS file: /cvs/src/sys/netinet6/in6_proto.c,v > retrieving revision 1.125 > diff -u -p -r1.125 in6_proto.c > --- sys/netinet6/in6_proto.c 26 Apr 2025 13:58:08 -0000 1.125 > +++ sys/netinet6/in6_proto.c 19 May 2025 23:22:01 -0000 > @@ -148,7 +148,7 @@ const struct protosw inet6sw[] = { > .pr_domain = &inet6domain, > .pr_protocol = IPPROTO_TCP, > .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE| > - PR_MPINPUT, > + PR_MPINPUT|PR_MPSYSCTL, > .pr_input = tcp_input, > .pr_ctlinput = tcp6_ctlinput, > .pr_ctloutput = tcp_ctloutput,