From: Vitaliy Makkoveev Subject: Re: Unlock sysctl kern.maxclusters To: Alexander Bluhm Cc: tech@openbsd.org Date: Tue, 3 Jun 2025 13:25:15 +0300 On Tue, Jun 03, 2025 at 11:51:11AM +0200, Alexander Bluhm wrote: > Hi, > > Sysctl KERN_MAXCLUSTERS only needs a mutex to keep nmbclust and > mbuf_mem_limit in sync. All other read access happens atomically. > > ok? > > bluhm > > Index: kern/kern_sysctl.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v > diff -u -p -r1.469 kern_sysctl.c > --- kern/kern_sysctl.c 1 Jun 2025 03:43:48 -0000 1.469 > +++ kern/kern_sysctl.c 2 Jun 2025 18:27:49 -0000 > @@ -553,6 +553,15 @@ kern_sysctl(int *name, u_int namelen, vo > microboottime(&bt); > return (sysctl_rdstruct(oldp, oldlenp, newp, &bt, sizeof bt)); > } > + case KERN_MAXCLUSTERS: { > + int oval, nval; > + > + oval = nval = atomic_load_long(&nmbclust); > + error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); > + if (error == 0 && oval != nval) > + error = nmbclust_update(nval); > + return (error); > + } > case KERN_MBSTAT: { > uint64_t counters[MBSTAT_COUNT]; > struct mbstat mbs; > @@ -758,13 +767,6 @@ kern_sysctl_locked(int *name, u_int name > return (EINVAL); > stackgap_random = stackgap; > return (0); > - case KERN_MAXCLUSTERS: { > - int val = nmbclust; > - error = sysctl_int(oldp, oldlenp, newp, newlen, &val); > - if (error == 0 && val != nmbclust) > - error = nmbclust_update(val); > - return (error); > - } > case KERN_CACHEPCT: { > u_int64_t dmapages; > int opct, pgs; > Index: kern/uipc_mbuf.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_mbuf.c,v > diff -u -p -r1.296 uipc_mbuf.c > --- kern/uipc_mbuf.c 1 Jan 2025 13:44:22 -0000 1.296 > +++ kern/uipc_mbuf.c 2 Jun 2025 18:27:49 -0000 > @@ -129,6 +129,9 @@ struct mutex m_extref_mtx = MUTEX_INITIA > void m_extfree(struct mbuf *); > void m_zero(struct mbuf *); > > +/* keep nmbclust and mbuf_mem_limit in sync */ > +struct mutex mbuf_nmbclust_mtx = MUTEX_INITIALIZER(IPL_NET); > + > unsigned long mbuf_mem_limit; /* [a] how much memory can be allocated */ > unsigned long mbuf_mem_alloc; /* [a] how much memory has been allocated */ > > @@ -218,8 +221,10 @@ nmbclust_update(long newval) > if (newval <= 0 || newval > LONG_MAX / MCLBYTES) > return ERANGE; > /* update the global mbuf memory limit */ > - nmbclust = newval; > - atomic_store_long(&mbuf_mem_limit, nmbclust * MCLBYTES); > + mtx_enter(&mbuf_nmbclust_mtx); > + atomic_store_long(&nmbclust, newval); > + atomic_store_long(&mbuf_mem_limit, newval * MCLBYTES); > + mtx_leave(&mbuf_nmbclust_mtx); > > pool_wakeup(&mbpool); > for (i = 0; i < nitems(mclsizes); i++) > Index: net/pf_ioctl.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf_ioctl.c,v > diff -u -p -r1.421 pf_ioctl.c > --- net/pf_ioctl.c 22 May 2025 06:34:03 -0000 1.421 > +++ net/pf_ioctl.c 2 Jun 2025 18:27:49 -0000 > @@ -2134,7 +2134,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a > goto fail; > } > /* Fragments reference mbuf clusters. */ > - if (pl->index == PF_LIMIT_FRAGS && pl->limit > nmbclust) { > + if (pl->index == PF_LIMIT_FRAGS && pl->limit > > + (long)atomic_load_long(&nmbclust)) { > error = EINVAL; > PF_UNLOCK(); > goto fail; > I made similar diff some times ago. Against your version, it relies on existing `sysctl_lock' rwlock(9) instead of new mutex(9). This is the only mbclust_update() call, the new mutex(9) seems to be overkill. Can you use existing `sysctl_lock' in your diff like below? Index: sys/kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.466 diff -u -p -r1.466 kern_sysctl.c --- sys/kern/kern_sysctl.c 4 May 2025 13:42:07 -0000 1.466 +++ sys/kern/kern_sysctl.c 4 May 2025 14:30:36 -0000 @@ -551,6 +551,22 @@ kern_sysctl(int *name, u_int namelen, vo microboottime(&bt); return (sysctl_rdstruct(oldp, oldlenp, newp, &bt, sizeof bt)); } + case KERN_MAXCLUSTERS: { + int oldval, newval; + + oldval = newval = atomic_load_long(&nmbclust); + + error = sysctl_int(oldp, oldlenp, newp, newlen, &newval); + if (error == 0 && oldval != newval) { + error = rw_enter(&sysctl_lock, RW_WRITE|RW_INTR); + if (error) + return (error); + if (newval != atomic_load_long(&nmbclust)) + error = nmbclust_update(newval); + rw_exit(&sysctl_lock); + } + return (error); + } case KERN_MBSTAT: { uint64_t counters[MBSTAT_COUNT]; struct mbstat mbs; @@ -726,13 +742,6 @@ kern_sysctl_locked(int *name, u_int name return (EINVAL); stackgap_random = stackgap; return (0); - case KERN_MAXCLUSTERS: { - int val = nmbclust; - error = sysctl_int(oldp, oldlenp, newp, newlen, &val); - if (error == 0 && val != nmbclust) - error = nmbclust_update(val); - return (error); - } case KERN_CACHEPCT: { u_int64_t dmapages; int opct, pgs;