From: Mark Kettenis Subject: Re: Unlock sysctl kern.maxclusters To: Alexander Bluhm Cc: tech@openbsd.org Date: Tue, 03 Jun 2025 15:07:31 +0200 > Date: Tue, 3 Jun 2025 11:51:11 +0200 > From: Alexander Bluhm > > Hi, > > Sysctl KERN_MAXCLUSTERS only needs a mutex to keep nmbclust and > mbuf_mem_limit in sync. All other read access happens atomically. > > ok? This is questionable. Either it doesn't matter that these variables are in sync, or it doesn't. And if it does matter I'd expect to see the mutex used to access the variables. Now what I think you're trying to prevent here is two processes doing sysctl kern.maxclusters=xxx racing eachother producing resulting inconsistent values of nmbclust and mbuf_mem_limit. There is a much better way to achieve that though: get rid of one of the variables and calculate it from the other one when you need it. I'd probably get rid of mbuf_mem_limit in this case. > Index: kern/kern_sysctl.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v > diff -u -p -r1.469 kern_sysctl.c > --- kern/kern_sysctl.c 1 Jun 2025 03:43:48 -0000 1.469 > +++ kern/kern_sysctl.c 2 Jun 2025 18:27:49 -0000 > @@ -553,6 +553,15 @@ kern_sysctl(int *name, u_int namelen, vo > microboottime(&bt); > return (sysctl_rdstruct(oldp, oldlenp, newp, &bt, sizeof bt)); > } > + case KERN_MAXCLUSTERS: { > + int oval, nval; > + > + oval = nval = atomic_load_long(&nmbclust); > + error = sysctl_int(oldp, oldlenp, newp, newlen, &nval); > + if (error == 0 && oval != nval) > + error = nmbclust_update(nval); > + return (error); > + } > case KERN_MBSTAT: { > uint64_t counters[MBSTAT_COUNT]; > struct mbstat mbs; > @@ -758,13 +767,6 @@ kern_sysctl_locked(int *name, u_int name > return (EINVAL); > stackgap_random = stackgap; > return (0); > - case KERN_MAXCLUSTERS: { > - int val = nmbclust; > - error = sysctl_int(oldp, oldlenp, newp, newlen, &val); > - if (error == 0 && val != nmbclust) > - error = nmbclust_update(val); > - return (error); > - } > case KERN_CACHEPCT: { > u_int64_t dmapages; > int opct, pgs; > Index: kern/uipc_mbuf.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_mbuf.c,v > diff -u -p -r1.296 uipc_mbuf.c > --- kern/uipc_mbuf.c 1 Jan 2025 13:44:22 -0000 1.296 > +++ kern/uipc_mbuf.c 2 Jun 2025 18:27:49 -0000 > @@ -129,6 +129,9 @@ struct mutex m_extref_mtx = MUTEX_INITIA > void m_extfree(struct mbuf *); > void m_zero(struct mbuf *); > > +/* keep nmbclust and mbuf_mem_limit in sync */ > +struct mutex mbuf_nmbclust_mtx = MUTEX_INITIALIZER(IPL_NET); > + > unsigned long mbuf_mem_limit; /* [a] how much memory can be allocated */ > unsigned long mbuf_mem_alloc; /* [a] how much memory has been allocated */ > > @@ -218,8 +221,10 @@ nmbclust_update(long newval) > if (newval <= 0 || newval > LONG_MAX / MCLBYTES) > return ERANGE; > /* update the global mbuf memory limit */ > - nmbclust = newval; > - atomic_store_long(&mbuf_mem_limit, nmbclust * MCLBYTES); > + mtx_enter(&mbuf_nmbclust_mtx); > + atomic_store_long(&nmbclust, newval); > + atomic_store_long(&mbuf_mem_limit, newval * MCLBYTES); > + mtx_leave(&mbuf_nmbclust_mtx); > > pool_wakeup(&mbpool); > for (i = 0; i < nitems(mclsizes); i++) > Index: net/pf_ioctl.c > =================================================================== > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf_ioctl.c,v > diff -u -p -r1.421 pf_ioctl.c > --- net/pf_ioctl.c 22 May 2025 06:34:03 -0000 1.421 > +++ net/pf_ioctl.c 2 Jun 2025 18:27:49 -0000 > @@ -2134,7 +2134,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a > goto fail; > } > /* Fragments reference mbuf clusters. */ > - if (pl->index == PF_LIMIT_FRAGS && pl->limit > nmbclust) { > + if (pl->index == PF_LIMIT_FRAGS && pl->limit > > + (long)atomic_load_long(&nmbclust)) { > error = EINVAL; > PF_UNLOCK(); > goto fail; > >