Download raw body.
sysctl: unlock KERN_CONSBUF and KERN_MSGBUF
On Tue, May 06, 2025 at 06:56:00PM +0200, Alexander Bluhm wrote:
> > + return (0);
>
> if (newp)
> return (EPERM);
> if (oldp == NULL)
> return (0);
> if ((hlen + mp->msg_bufs) > *oldlenp)
> return (ENOMEM);
>
> I would prefer this order as it checks oldp only once.
>
I also don't like this double check. The error path is not expected by
userland in normal work, so the error order is not critical. Reworked.
> > +
> > + memset(&ump, 0, sizeof(ump));
> > + ump.msg_magic = mp->msg_magic;
> > + ump.msg_bufs = mp->msg_bufs;
> > + mtx_enter(&log_mtx);
> > + ump.msg_bufx = mp->msg_bufx;
> > + ump.msg_bufr = mp->msg_bufr;
> > + ump.msg_bufd = mp->msg_bufd;
> > + mtx_leave(&log_mtx);
>
> Could you put the memset() and all assignments into the mutex block?
> Then the compiler could optmize away the memset of fields that are
> set immediately afterwards. Although our clang is dumb and does
> double stores, maybe other compiler behave smarter.
>
I doubt the old gcc is such smart :) The whole `ump' filling under mutex
could make sense, because it makes code easier to read, but putting
memset() of local data makes code worse.
Also, I doubt that filling of solid memory chunk is slower that filling
memory chunk with holes.
> > +
> > + /* copy header... */
> > + if ((error = copyout(&ump, oldp, hlen)))
> > + return (error);
> > + /* ...and the data. */
> > + error = copyout(mp->msg_bufc, oldp + hlen, mp->msg_bufs);
> > +
> > + return (error);
> > + }
> > + case KERN_CONSBUFSIZE:
> > + case KERN_MSGBUFSIZE: {
> > + struct msgbuf *mp = (name[0] == KERN_MSGBUFSIZE) ?
> > + msgbufp : consbufp;
> > +
>
> This case does not look very MP-safe. Although userland should be
> only interested in msg_bufs and this is an atomic long read, we
> provide the whole structure without protection.
>
> Could we merge that into the previous cases? Then we don't have
> to duplicate the copying with mutex.
>
> size_t bufsize =
> (name[0] == KERN_CONSBUF || name[0] == KERN_MSGBUF) ?
> mp->msg_bufs : 0;
> if ((hlen + bufsize) > *oldlenp)
> ...
> error = copyout(mp->msg_bufc, oldp + hlen, bufsize);
>
I don't understand you. The type of long `msg_bufs' is immutable, we set
it only once during bootstrap for both `msgbufp' and `consbufp'.
KERN_*BUFSIZE cases don't want the whole header, they want only one
integer containing buffer size. We pass only the `msg_bufs' value to the
sysctl_rdint(), not the address and not the whole structure. There is
absolutely no differences with unlocked `msg_magic' loads.
Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.466
diff -u -p -r1.466 kern_sysctl.c
--- sys/kern/kern_sysctl.c 4 May 2025 13:42:07 -0000 1.466
+++ sys/kern/kern_sysctl.c 6 May 2025 17:25:13 -0000
@@ -574,10 +574,52 @@ kern_sysctl(int *name, u_int namelen, vo
return (sysctl_rdstruct(oldp, oldlenp, newp,
&mbs, sizeof(mbs)));
}
- case KERN_MSGBUFSIZE:
- case KERN_CONSBUFSIZE: {
- struct msgbuf *mp;
- mp = (name[0] == KERN_MSGBUFSIZE) ? msgbufp : consbufp;
+ case KERN_CONSBUF:
+ if ((error = suser(p)))
+ return (error);
+ /* FALLTHROUGH */
+ case KERN_MSGBUF: {
+ extern struct mutex log_mtx;
+ const size_t hlen = offsetof(struct msgbuf, msg_bufc);
+ struct msgbuf ump, *mp = (name[0] == KERN_MSGBUF) ?
+ msgbufp : consbufp;
+
+ /*
+ * deal with cases where the message buffer has
+ * become corrupted.
+ */
+ if (!mp || mp->msg_magic != MSG_MAGIC)
+ return (ENXIO);
+ if (newp)
+ return (EPERM);
+ if (oldp) {
+ if ((hlen + mp->msg_bufs) > *oldlenp)
+ return (ENOMEM);
+ } else
+ return (0);
+
+ memset(&ump, 0, sizeof(ump));
+ mtx_enter(&log_mtx);
+ ump.msg_magic = mp->msg_magic;
+ ump.msg_bufs = mp->msg_bufs;
+ ump.msg_bufx = mp->msg_bufx;
+ ump.msg_bufr = mp->msg_bufr;
+ ump.msg_bufd = mp->msg_bufd;
+ mtx_leave(&log_mtx);
+
+ /* copy header... */
+ if ((error = copyout(&ump, oldp, hlen)))
+ return (error);
+ /* ...and the data. */
+ error = copyout(mp->msg_bufc, oldp + hlen, mp->msg_bufs);
+
+ return (error);
+ }
+ case KERN_CONSBUFSIZE:
+ case KERN_MSGBUFSIZE: {
+ struct msgbuf *mp = (name[0] == KERN_MSGBUFSIZE) ?
+ msgbufp : consbufp;
+
/*
* deal with cases where the message buffer has
* become corrupted.
@@ -668,22 +710,6 @@ kern_sysctl_locked(int *name, u_int name
if (newp && !error)
domainnamelen = newlen;
return (error);
- case KERN_CONSBUF:
- if ((error = suser(p)))
- return (error);
- /* FALLTHROUGH */
- case KERN_MSGBUF: {
- struct msgbuf *mp;
- mp = (name[0] == KERN_MSGBUF) ? msgbufp : consbufp;
- /*
- * deal with cases where the message buffer has
- * become corrupted.
- */
- if (!mp || mp->msg_magic != MSG_MAGIC)
- return (ENXIO);
- return (sysctl_rdstruct(oldp, oldlenp, newp, mp,
- mp->msg_bufs + offsetof(struct msgbuf, msg_bufc)));
- }
case KERN_CPTIME:
{
CPU_INFO_ITERATOR cii;
sysctl: unlock KERN_CONSBUF and KERN_MSGBUF