Download raw body.
Unlock sysctl kern.maxclusters
On Tue, Jun 03, 2025 at 03:07:31PM +0200, Mark Kettenis wrote:
> > Date: Tue, 3 Jun 2025 11:51:11 +0200
> > From: Alexander Bluhm <bluhm@openbsd.org>
> >
> > Hi,
> >
> > Sysctl KERN_MAXCLUSTERS only needs a mutex to keep nmbclust and
> > mbuf_mem_limit in sync. All other read access happens atomically.
> >
> > ok?
>
> This is questionable. Either it doesn't matter that these variables
> are in sync, or it doesn't. And if it does matter I'd expect to see
> the mutex used to access the variables.
>
> Now what I think you're trying to prevent here is two processes doing
> sysctl kern.maxclusters=xxx racing eachother producing resulting
> inconsistent values of nmbclust and mbuf_mem_limit. There is a much
> better way to achieve that though: get rid of one of the variables and
> calculate it from the other one when you need it. I'd probably get
> rid of mbuf_mem_limit in this case.
>
`nmbclust' used only in sysctl path and only for `mbuf_mem_limit'
calculation. Why the `nmbclust' and `mbuf_mem_limit' serialization is
not enough? Why do we need to move "nmbclust * MCLBYTES" to the hot path
and use it instead of pre-calculated `mbuf_mem_limit'?
The only thing I don't like is the non serialized pool_wakeup() calls,
but mostly for hypothetical pl_enter() for the pool_runqueue() dry runs.
That's why I propose to use existing `sysctl_lock' rwlock to serialize
the whole nmbclust_update(). But how often do we modify kern.maxclusters
in the most common case? Once per /etc/sysctl.conf or never?
>
> > Index: kern/kern_sysctl.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_sysctl.c,v
> > diff -u -p -r1.469 kern_sysctl.c
> > --- kern/kern_sysctl.c 1 Jun 2025 03:43:48 -0000 1.469
> > +++ kern/kern_sysctl.c 2 Jun 2025 18:27:49 -0000
> > @@ -553,6 +553,15 @@ kern_sysctl(int *name, u_int namelen, vo
> > microboottime(&bt);
> > return (sysctl_rdstruct(oldp, oldlenp, newp, &bt, sizeof bt));
> > }
> > + case KERN_MAXCLUSTERS: {
> > + int oval, nval;
> > +
> > + oval = nval = atomic_load_long(&nmbclust);
> > + error = sysctl_int(oldp, oldlenp, newp, newlen, &nval);
> > + if (error == 0 && oval != nval)
> > + error = nmbclust_update(nval);
> > + return (error);
> > + }
> > case KERN_MBSTAT: {
> > uint64_t counters[MBSTAT_COUNT];
> > struct mbstat mbs;
> > @@ -758,13 +767,6 @@ kern_sysctl_locked(int *name, u_int name
> > return (EINVAL);
> > stackgap_random = stackgap;
> > return (0);
> > - case KERN_MAXCLUSTERS: {
> > - int val = nmbclust;
> > - error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
> > - if (error == 0 && val != nmbclust)
> > - error = nmbclust_update(val);
> > - return (error);
> > - }
> > case KERN_CACHEPCT: {
> > u_int64_t dmapages;
> > int opct, pgs;
> > Index: kern/uipc_mbuf.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_mbuf.c,v
> > diff -u -p -r1.296 uipc_mbuf.c
> > --- kern/uipc_mbuf.c 1 Jan 2025 13:44:22 -0000 1.296
> > +++ kern/uipc_mbuf.c 2 Jun 2025 18:27:49 -0000
> > @@ -129,6 +129,9 @@ struct mutex m_extref_mtx = MUTEX_INITIA
> > void m_extfree(struct mbuf *);
> > void m_zero(struct mbuf *);
> >
> > +/* keep nmbclust and mbuf_mem_limit in sync */
> > +struct mutex mbuf_nmbclust_mtx = MUTEX_INITIALIZER(IPL_NET);
> > +
> > unsigned long mbuf_mem_limit; /* [a] how much memory can be allocated */
> > unsigned long mbuf_mem_alloc; /* [a] how much memory has been allocated */
> >
> > @@ -218,8 +221,10 @@ nmbclust_update(long newval)
> > if (newval <= 0 || newval > LONG_MAX / MCLBYTES)
> > return ERANGE;
> > /* update the global mbuf memory limit */
> > - nmbclust = newval;
> > - atomic_store_long(&mbuf_mem_limit, nmbclust * MCLBYTES);
> > + mtx_enter(&mbuf_nmbclust_mtx);
> > + atomic_store_long(&nmbclust, newval);
> > + atomic_store_long(&mbuf_mem_limit, newval * MCLBYTES);
> > + mtx_leave(&mbuf_nmbclust_mtx);
> >
> > pool_wakeup(&mbpool);
> > for (i = 0; i < nitems(mclsizes); i++)
> > Index: net/pf_ioctl.c
> > ===================================================================
> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf_ioctl.c,v
> > diff -u -p -r1.421 pf_ioctl.c
> > --- net/pf_ioctl.c 22 May 2025 06:34:03 -0000 1.421
> > +++ net/pf_ioctl.c 2 Jun 2025 18:27:49 -0000
> > @@ -2134,7 +2134,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
> > goto fail;
> > }
> > /* Fragments reference mbuf clusters. */
> > - if (pl->index == PF_LIMIT_FRAGS && pl->limit > nmbclust) {
> > + if (pl->index == PF_LIMIT_FRAGS && pl->limit >
> > + (long)atomic_load_long(&nmbclust)) {
> > error = EINVAL;
> > PF_UNLOCK();
> > goto fail;
> >
> >
>
Unlock sysctl kern.maxclusters