From: Vitaliy Makkoveev Subject: Unlock the KERN_MAXCLUSTERS case of kern_sysctl() To: tech@openbsd.org Date: Tue, 5 Aug 2025 12:59:38 +0300 Again. Previously it was proposed to drop the `mbuf_mem_limit' and to do calculations in run-time. I see no reason for that. The only `nmbclust' user is the DIOCSETLIMIT case of pfioctl(), which only checks passed fragments reference count against it and sets the hard limit on corresponding pf(4) pool. Not on mbuf(9) pool. It also does not touch the `mbuf_mem_limit' calculated within nmbclust_update(). Meanwhile the kern_sysctl() thread only do pool_wakeup() on mbuf(9) and mbuf(9) clusters pools, but does not touch the pf(4) pools. So I can say the usage paths of `nmbclust' and `buf_mem_limit' are the different path and we don't need to serialize the load of `nmbclust' and the `mbuf_mem_limit' update. However, the update within the sysctl(2) path should be serialized. ok? Index: sys/conf/param.c =================================================================== RCS file: /cvs/src/sys/conf/param.c,v diff -u -p -r1.52 param.c --- sys/conf/param.c 20 Aug 2024 13:29:25 -0000 1.52 +++ sys/conf/param.c 5 Aug 2025 09:22:57 -0000 @@ -75,7 +75,7 @@ int initialvnodes = NVNODE; int maxprocess = NPROCESS; /* [a] */ int maxthread = 2 * NPROCESS; /* [a] */ int maxfiles = 5 * (NPROCESS + MAXUSERS) + 80; /* [a] */ -long nmbclust = NMBCLUSTERS; +long nmbclust = NMBCLUSTERS; /* [a] */ #ifndef BUFCACHEPERCENT #define BUFCACHEPERCENT 20 Index: sys/kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v diff -u -p -r1.481 kern_sysctl.c --- sys/kern/kern_sysctl.c 24 Jul 2025 19:42:41 -0000 1.481 +++ sys/kern/kern_sysctl.c 5 Aug 2025 09:22:58 -0000 @@ -610,6 +610,20 @@ kern_sysctl(int *name, u_int namelen, vo microboottime(&bt); return (sysctl_rdstruct(oldp, oldlenp, newp, &bt, sizeof bt)); } + case KERN_MAXCLUSTERS: { + int oldval, newval; + + oldval = newval = atomic_load_long(&nmbclust); + error = sysctl_int(oldp, oldlenp, newp, newlen, &newval); + + if (error == 0 && oldval != newval) { + rw_enter_write(&sysctl_lock); + error = nmbclust_update(newval); + rw_exit_write(&sysctl_lock); + } + + return (error); + } case KERN_MBSTAT: { uint64_t counters[mbs_ncounters]; struct mbstat mbs; @@ -765,13 +779,6 @@ kern_sysctl_locked(int *name, u_int name stackgap_random = stackgap; return (0); } - case KERN_MAXCLUSTERS: { - int val = nmbclust; - error = sysctl_int(oldp, oldlenp, newp, newlen, &val); - if (error == 0 && val != nmbclust) - error = nmbclust_update(val); - return (error); - } case KERN_CACHEPCT: { u_int64_t dmapages; int opct, pgs; Index: sys/kern/uipc_mbuf.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v diff -u -p -r1.301 uipc_mbuf.c --- sys/kern/uipc_mbuf.c 17 Jul 2025 17:30:47 -0000 1.301 +++ sys/kern/uipc_mbuf.c 5 Aug 2025 09:22:58 -0000 @@ -217,8 +217,8 @@ nmbclust_update(long newval) if (newval <= 0 || newval > LONG_MAX / MCLBYTES) return ERANGE; /* update the global mbuf memory limit */ - nmbclust = newval; - atomic_store_long(&mbuf_mem_limit, nmbclust * MCLBYTES); + atomic_store_long(&nmbclust, newval); + atomic_store_long(&mbuf_mem_limit, newval * MCLBYTES); pool_wakeup(&mbpool); for (i = 0; i < nitems(mclsizes); i++) Index: sys/net/pf_ioctl.c =================================================================== RCS file: /cvs/src/sys/net/pf_ioctl.c,v diff -u -p -r1.422 pf_ioctl.c --- sys/net/pf_ioctl.c 7 Jul 2025 02:28:50 -0000 1.422 +++ sys/net/pf_ioctl.c 5 Aug 2025 09:22:58 -0000 @@ -2129,7 +2129,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a goto fail; } /* Fragments reference mbuf clusters. */ - if (pl->index == PF_LIMIT_FRAGS && pl->limit > nmbclust) { + if (pl->index == PF_LIMIT_FRAGS && + pl->limit > atomic_load_long(&nmbclust)) { error = EINVAL; PF_UNLOCK(); goto fail;