Download raw body.
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT
On Mon, May 19, 2025 at 11:10:48AM +0900, Alexander Bluhm wrote:
> On Tue, May 13, 2025 at 01:45:36PM +0300, Vitaliy Makkoveev wrote:
> > On Sat, May 10, 2025 at 08:00:27AM +0300, Vitaliy Makkoveev wrote:
> > > They are identical, so unlock them both. The temporary `tcp_sysctl_lock'
> > > and the pool_sethardlimit() serialization are useless because we are
> > > serialized with sysctl_lock rwlock(9). However I want to do this
> > > serialization for consistency reason. I will replace `tcp_sysctl_lock'
> > > with `sysctl_lock' after tcp_sysctl() unlocked.
>
> I am not sure why the serialized with sysctl_lock is actually needed.
> Are you preventing that two userland threads call sysctl() and
> modify pool_sethardlimit() in parallel? Or is it about the oval
> that should be consistent for these threads?
>
> What concerns me a bit is that pool_sethardlimit() checks (n <
> pp->pr_nout) while another thread working with pools may modify it.
> Also pool_sethardlimit() is setting a bunch of pr_hardlimit values
> without lock.
>
> Should we add pl_enter(pp, &pp->pr_lock) in pool_sethardlimit()?
> It is not the hot path, would prevent modifying the limits in
> parallel and prevents using inconsistent limits while setting them.
>
> Then some of your serialization code in tcp_sysctl() can go away.
>
This should be the no problem, because the "pp->pr_nout <
pp->pr_hardlimit" is the normal case and the code handles it. However,
I'm fine to do this check and modification locked.
Please note, the `pr_hardlimit_warning', `pr_hardlimit_ratecap' and
`pr_hardlimit_warning_last' are not used. I want to delete them, with
another diff.
> > + if (error == 0 && oval != nval) {
> > + extern struct rwlock sysctl_lock;
> > +
> > + error = rw_enter(&sysctl_lock, RW_WRITE | RW_INTR);
> > + if (error)
> > + return (error);
> > + if (nval != atomic_load_int(var)) {
> > + error = pool_sethardlimit(pool, nval,
> > + NULL, 0);
> > + if (error == 0)
> > + atomic_store_int(var, nval);
> > + }
> > + rw_exit(&sysctl_lock);
> > }
I want to keep the `sysctl_lock' as is. I can't imagine something worse
than pool_sethardlimit() dry-run, but I prefer to keep this serialized
even with the pool_sethardlimit() serialized within. This is not the
fast path, the interruptible `sysctl_lock' doesn't make it worse, but we
100% sure there is no race effects here.
Index: sys/kern/subr_pool.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.237
diff -u -p -r1.237 subr_pool.c
--- sys/kern/subr_pool.c 4 Jan 2025 09:26:01 -0000 1.237
+++ sys/kern/subr_pool.c 19 May 2025 23:22:01 -0000
@@ -1096,6 +1096,8 @@ pool_sethardlimit(struct pool *pp, u_int
{
int error = 0;
+ pl_enter(pp, &pp->pr_lock);
+
if (n < pp->pr_nout) {
error = EINVAL;
goto done;
@@ -1106,8 +1108,9 @@ pool_sethardlimit(struct pool *pp, u_int
pp->pr_hardlimit_ratecap.tv_sec = ratecap;
pp->pr_hardlimit_warning_last.tv_sec = 0;
pp->pr_hardlimit_warning_last.tv_usec = 0;
-
done:
+ pl_leave(pp, &pp->pr_lock);
+
return (error);
}
Index: sys/netinet/in_proto.c
===================================================================
RCS file: /cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.122
diff -u -p -r1.122 in_proto.c
--- sys/netinet/in_proto.c 26 Apr 2025 13:58:08 -0000 1.122
+++ sys/netinet/in_proto.c 19 May 2025 23:22:01 -0000
@@ -198,7 +198,7 @@ const struct protosw inetsw[] = {
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_TCP,
.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE|
- PR_MPINPUT,
+ PR_MPINPUT|PR_MPSYSCTL,
.pr_input = tcp_input,
.pr_ctlinput = tcp_ctlinput,
.pr_ctloutput = tcp_ctloutput,
Index: sys/netinet/tcp_usrreq.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.246
diff -u -p -r1.246 tcp_usrreq.c
--- sys/netinet/tcp_usrreq.c 13 May 2025 09:17:41 -0000 1.246
+++ sys/netinet/tcp_usrreq.c 19 May 2025 23:22:01 -0000
@@ -1460,29 +1460,38 @@ tcp_sysctl(int *name, u_int namelen, voi
return tcp_ident(oldp, oldlenp, newp, newlen, 1);
case TCPCTL_REASS_LIMIT:
- NET_LOCK();
- nval = tcp_reass_limit;
- error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
- if (!error && nval != tcp_reass_limit) {
- error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0);
- if (!error)
- tcp_reass_limit = nval;
+ case TCPCTL_SACKHOLE_LIMIT: {
+ struct pool *pool;
+ int *var;
+
+ if (name[0] == TCPCTL_REASS_LIMIT) {
+ pool = &tcpqe_pool;
+ var = &tcp_reass_limit;
+ } else {
+ pool = &sackhl_pool;
+ var = &tcp_sackhole_limit;
}
- NET_UNLOCK();
- return (error);
- case TCPCTL_SACKHOLE_LIMIT:
- NET_LOCK();
- nval = tcp_sackhole_limit;
+ oval = nval = atomic_load_int(var);
error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
- if (!error && nval != tcp_sackhole_limit) {
- error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0);
- if (!error)
- tcp_sackhole_limit = nval;
+
+ if (error == 0 && oval != nval) {
+ extern struct rwlock sysctl_lock;
+
+ error = rw_enter(&sysctl_lock, RW_WRITE | RW_INTR);
+ if (error)
+ return (error);
+ if (nval != atomic_load_int(var)) {
+ error = pool_sethardlimit(pool, nval,
+ NULL, 0);
+ if (error == 0)
+ atomic_store_int(var, nval);
+ }
+ rw_exit(&sysctl_lock);
}
- NET_UNLOCK();
- return (error);
+ return (error);
+ }
case TCPCTL_STATS:
return (tcp_sysctl_tcpstat(oldp, oldlenp, newp));
Index: sys/netinet6/in6_proto.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.125
diff -u -p -r1.125 in6_proto.c
--- sys/netinet6/in6_proto.c 26 Apr 2025 13:58:08 -0000 1.125
+++ sys/netinet6/in6_proto.c 19 May 2025 23:22:01 -0000
@@ -148,7 +148,7 @@ const struct protosw inet6sw[] = {
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_TCP,
.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE|
- PR_MPINPUT,
+ PR_MPINPUT|PR_MPSYSCTL,
.pr_input = tcp_input,
.pr_ctlinput = tcp6_ctlinput,
.pr_ctloutput = tcp_ctloutput,
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT