From: Vitaliy Makkoveev Subject: Re: sysctl: unlock KERN_CONSBUF and KERN_MSGBUF To: Alexander Bluhm Cc: tech@openbsd.org Date: Tue, 6 May 2025 20:46:11 +0300 On Tue, May 06, 2025 at 06:56:00PM +0200, Alexander Bluhm wrote: > > + return (0); > > if (newp) > return (EPERM); > if (oldp == NULL) > return (0); > if ((hlen + mp->msg_bufs) > *oldlenp) > return (ENOMEM); > > I would prefer this order as it checks oldp only once. > I also don't like this double check. The error path is not expected by userland in normal work, so the error order is not critical. Reworked. > > + > > + memset(&ump, 0, sizeof(ump)); > > + ump.msg_magic = mp->msg_magic; > > + ump.msg_bufs = mp->msg_bufs; > > + mtx_enter(&log_mtx); > > + ump.msg_bufx = mp->msg_bufx; > > + ump.msg_bufr = mp->msg_bufr; > > + ump.msg_bufd = mp->msg_bufd; > > + mtx_leave(&log_mtx); > > Could you put the memset() and all assignments into the mutex block? > Then the compiler could optmize away the memset of fields that are > set immediately afterwards. Although our clang is dumb and does > double stores, maybe other compiler behave smarter. > I doubt the old gcc is such smart :) The whole `ump' filling under mutex could make sense, because it makes code easier to read, but putting memset() of local data makes code worse. Also, I doubt that filling of solid memory chunk is slower that filling memory chunk with holes. > > + > > + /* copy header... */ > > + if ((error = copyout(&ump, oldp, hlen))) > > + return (error); > > + /* ...and the data. */ > > + error = copyout(mp->msg_bufc, oldp + hlen, mp->msg_bufs); > > + > > + return (error); > > + } > > + case KERN_CONSBUFSIZE: > > + case KERN_MSGBUFSIZE: { > > + struct msgbuf *mp = (name[0] == KERN_MSGBUFSIZE) ? > > + msgbufp : consbufp; > > + > > This case does not look very MP-safe. Although userland should be > only interested in msg_bufs and this is an atomic long read, we > provide the whole structure without protection. > > Could we merge that into the previous cases? Then we don't have > to duplicate the copying with mutex. > > size_t bufsize = > (name[0] == KERN_CONSBUF || name[0] == KERN_MSGBUF) ? > mp->msg_bufs : 0; > if ((hlen + bufsize) > *oldlenp) > ... > error = copyout(mp->msg_bufc, oldp + hlen, bufsize); > I don't understand you. The type of long `msg_bufs' is immutable, we set it only once during bootstrap for both `msgbufp' and `consbufp'. KERN_*BUFSIZE cases don't want the whole header, they want only one integer containing buffer size. We pass only the `msg_bufs' value to the sysctl_rdint(), not the address and not the whole structure. There is absolutely no differences with unlocked `msg_magic' loads. Index: sys/kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.466 diff -u -p -r1.466 kern_sysctl.c --- sys/kern/kern_sysctl.c 4 May 2025 13:42:07 -0000 1.466 +++ sys/kern/kern_sysctl.c 6 May 2025 17:25:13 -0000 @@ -574,10 +574,52 @@ kern_sysctl(int *name, u_int namelen, vo return (sysctl_rdstruct(oldp, oldlenp, newp, &mbs, sizeof(mbs))); } - case KERN_MSGBUFSIZE: - case KERN_CONSBUFSIZE: { - struct msgbuf *mp; - mp = (name[0] == KERN_MSGBUFSIZE) ? msgbufp : consbufp; + case KERN_CONSBUF: + if ((error = suser(p))) + return (error); + /* FALLTHROUGH */ + case KERN_MSGBUF: { + extern struct mutex log_mtx; + const size_t hlen = offsetof(struct msgbuf, msg_bufc); + struct msgbuf ump, *mp = (name[0] == KERN_MSGBUF) ? + msgbufp : consbufp; + + /* + * deal with cases where the message buffer has + * become corrupted. + */ + if (!mp || mp->msg_magic != MSG_MAGIC) + return (ENXIO); + if (newp) + return (EPERM); + if (oldp) { + if ((hlen + mp->msg_bufs) > *oldlenp) + return (ENOMEM); + } else + return (0); + + memset(&ump, 0, sizeof(ump)); + mtx_enter(&log_mtx); + ump.msg_magic = mp->msg_magic; + ump.msg_bufs = mp->msg_bufs; + ump.msg_bufx = mp->msg_bufx; + ump.msg_bufr = mp->msg_bufr; + ump.msg_bufd = mp->msg_bufd; + mtx_leave(&log_mtx); + + /* copy header... */ + if ((error = copyout(&ump, oldp, hlen))) + return (error); + /* ...and the data. */ + error = copyout(mp->msg_bufc, oldp + hlen, mp->msg_bufs); + + return (error); + } + case KERN_CONSBUFSIZE: + case KERN_MSGBUFSIZE: { + struct msgbuf *mp = (name[0] == KERN_MSGBUFSIZE) ? + msgbufp : consbufp; + /* * deal with cases where the message buffer has * become corrupted. @@ -668,22 +710,6 @@ kern_sysctl_locked(int *name, u_int name if (newp && !error) domainnamelen = newlen; return (error); - case KERN_CONSBUF: - if ((error = suser(p))) - return (error); - /* FALLTHROUGH */ - case KERN_MSGBUF: { - struct msgbuf *mp; - mp = (name[0] == KERN_MSGBUF) ? msgbufp : consbufp; - /* - * deal with cases where the message buffer has - * become corrupted. - */ - if (!mp || mp->msg_magic != MSG_MAGIC) - return (ENXIO); - return (sysctl_rdstruct(oldp, oldlenp, newp, mp, - mp->msg_bufs + offsetof(struct msgbuf, msg_bufc))); - } case KERN_CPTIME: { CPU_INFO_ITERATOR cii;