Download raw body.
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT
On Tue, May 20, 2025 at 02:23:05AM +0300, Vitaliy Makkoveev wrote:
> On Mon, May 19, 2025 at 11:10:48AM +0900, Alexander Bluhm wrote:
> > On Tue, May 13, 2025 at 01:45:36PM +0300, Vitaliy Makkoveev wrote:
> > > On Sat, May 10, 2025 at 08:00:27AM +0300, Vitaliy Makkoveev wrote:
> > > > They are identical, so unlock them both. The temporary `tcp_sysctl_lock'
> > > > and the pool_sethardlimit() serialization are useless because we are
> > > > serialized with sysctl_lock rwlock(9). However I want to do this
> > > > serialization for consistency reason. I will replace `tcp_sysctl_lock'
> > > > with `sysctl_lock' after tcp_sysctl() unlocked.
> >
> > I am not sure why the serialized with sysctl_lock is actually needed.
> > Are you preventing that two userland threads call sysctl() and
> > modify pool_sethardlimit() in parallel? Or is it about the oval
> > that should be consistent for these threads?
> >
> > What concerns me a bit is that pool_sethardlimit() checks (n <
> > pp->pr_nout) while another thread working with pools may modify it.
> > Also pool_sethardlimit() is setting a bunch of pr_hardlimit values
> > without lock.
> >
> > Should we add pl_enter(pp, &pp->pr_lock) in pool_sethardlimit()?
> > It is not the hot path, would prevent modifying the limits in
> > parallel and prevents using inconsistent limits while setting them.
> >
> > Then some of your serialization code in tcp_sysctl() can go away.
> >
>
> This should be the no problem, because the "pp->pr_nout <
> pp->pr_hardlimit" is the normal case and the code handles it. However,
> I'm fine to do this check and modification locked.
>
> Please note, the `pr_hardlimit_warning', `pr_hardlimit_ratecap' and
> `pr_hardlimit_warning_last' are not used. I want to delete them, with
> another diff.
>
> > > + if (error == 0 && oval != nval) {
> > > + extern struct rwlock sysctl_lock;
> > > +
> > > + error = rw_enter(&sysctl_lock, RW_WRITE | RW_INTR);
> > > + if (error)
> > > + return (error);
> > > + if (nval != atomic_load_int(var)) {
> > > + error = pool_sethardlimit(pool, nval,
> > > + NULL, 0);
> > > + if (error == 0)
> > > + atomic_store_int(var, nval);
> > > + }
> > > + rw_exit(&sysctl_lock);
> > > }
>
> I want to keep the `sysctl_lock' as is. I can't imagine something worse
> than pool_sethardlimit() dry-run, but I prefer to keep this serialized
> even with the pool_sethardlimit() serialized within. This is not the
> fast path, the interruptible `sysctl_lock' doesn't make it worse, but we
> 100% sure there is no race effects here.
I don't care much about this additional sysctl_lock.
OK bluhm@
> Index: sys/kern/subr_pool.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/subr_pool.c,v
> retrieving revision 1.237
> diff -u -p -r1.237 subr_pool.c
> --- sys/kern/subr_pool.c 4 Jan 2025 09:26:01 -0000 1.237
> +++ sys/kern/subr_pool.c 19 May 2025 23:22:01 -0000
> @@ -1096,6 +1096,8 @@ pool_sethardlimit(struct pool *pp, u_int
> {
> int error = 0;
>
> + pl_enter(pp, &pp->pr_lock);
> +
> if (n < pp->pr_nout) {
> error = EINVAL;
> goto done;
> @@ -1106,8 +1108,9 @@ pool_sethardlimit(struct pool *pp, u_int
> pp->pr_hardlimit_ratecap.tv_sec = ratecap;
> pp->pr_hardlimit_warning_last.tv_sec = 0;
> pp->pr_hardlimit_warning_last.tv_usec = 0;
> -
> done:
> + pl_leave(pp, &pp->pr_lock);
> +
> return (error);
> }
>
> Index: sys/netinet/in_proto.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/in_proto.c,v
> retrieving revision 1.122
> diff -u -p -r1.122 in_proto.c
> --- sys/netinet/in_proto.c 26 Apr 2025 13:58:08 -0000 1.122
> +++ sys/netinet/in_proto.c 19 May 2025 23:22:01 -0000
> @@ -198,7 +198,7 @@ const struct protosw inetsw[] = {
> .pr_domain = &inetdomain,
> .pr_protocol = IPPROTO_TCP,
> .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE|
> - PR_MPINPUT,
> + PR_MPINPUT|PR_MPSYSCTL,
> .pr_input = tcp_input,
> .pr_ctlinput = tcp_ctlinput,
> .pr_ctloutput = tcp_ctloutput,
> Index: sys/netinet/tcp_usrreq.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
> retrieving revision 1.246
> diff -u -p -r1.246 tcp_usrreq.c
> --- sys/netinet/tcp_usrreq.c 13 May 2025 09:17:41 -0000 1.246
> +++ sys/netinet/tcp_usrreq.c 19 May 2025 23:22:01 -0000
> @@ -1460,29 +1460,38 @@ tcp_sysctl(int *name, u_int namelen, voi
> return tcp_ident(oldp, oldlenp, newp, newlen, 1);
>
> case TCPCTL_REASS_LIMIT:
> - NET_LOCK();
> - nval = tcp_reass_limit;
> - error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
> - if (!error && nval != tcp_reass_limit) {
> - error = pool_sethardlimit(&tcpqe_pool, nval, NULL, 0);
> - if (!error)
> - tcp_reass_limit = nval;
> + case TCPCTL_SACKHOLE_LIMIT: {
> + struct pool *pool;
> + int *var;
> +
> + if (name[0] == TCPCTL_REASS_LIMIT) {
> + pool = &tcpqe_pool;
> + var = &tcp_reass_limit;
> + } else {
> + pool = &sackhl_pool;
> + var = &tcp_sackhole_limit;
> }
> - NET_UNLOCK();
> - return (error);
>
> - case TCPCTL_SACKHOLE_LIMIT:
> - NET_LOCK();
> - nval = tcp_sackhole_limit;
> + oval = nval = atomic_load_int(var);
> error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
> - if (!error && nval != tcp_sackhole_limit) {
> - error = pool_sethardlimit(&sackhl_pool, nval, NULL, 0);
> - if (!error)
> - tcp_sackhole_limit = nval;
> +
> + if (error == 0 && oval != nval) {
> + extern struct rwlock sysctl_lock;
> +
> + error = rw_enter(&sysctl_lock, RW_WRITE | RW_INTR);
> + if (error)
> + return (error);
> + if (nval != atomic_load_int(var)) {
> + error = pool_sethardlimit(pool, nval,
> + NULL, 0);
> + if (error == 0)
> + atomic_store_int(var, nval);
> + }
> + rw_exit(&sysctl_lock);
> }
> - NET_UNLOCK();
> - return (error);
>
> + return (error);
> + }
> case TCPCTL_STATS:
> return (tcp_sysctl_tcpstat(oldp, oldlenp, newp));
>
> Index: sys/netinet6/in6_proto.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet6/in6_proto.c,v
> retrieving revision 1.125
> diff -u -p -r1.125 in6_proto.c
> --- sys/netinet6/in6_proto.c 26 Apr 2025 13:58:08 -0000 1.125
> +++ sys/netinet6/in6_proto.c 19 May 2025 23:22:01 -0000
> @@ -148,7 +148,7 @@ const struct protosw inet6sw[] = {
> .pr_domain = &inet6domain,
> .pr_protocol = IPPROTO_TCP,
> .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_ABRTACPTDIS|PR_SPLICE|
> - PR_MPINPUT,
> + PR_MPINPUT|PR_MPSYSCTL,
> .pr_input = tcp_input,
> .pr_ctlinput = tcp6_ctlinput,
> .pr_ctloutput = tcp_ctloutput,
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT
tcp, sysctl: unlock TCPCTL_REASS_LIMIT and TCPCTL_SACKHOLE_LIMIT