Download raw body.
gprof: Profiling a multi-threaded application (revived)
From: "Theo de Raadt" <deraadt@openbsd.org>
Subject: Re: gprof: Profiling a multi-threaded application (revived)
Date: Fri, 13 Jun 2025 07:19:07 -0600
>> First, we update libc only and bump the libc major. At this point,
>> we add just the _gmon_alloc function and export the symbol. No one
>> calls the _gmon_alloc function; it prepares the following updates.
>>
>> And then wait for the next release that will update xenocara and
>> all the packages to link to the new libc.
>
> I don't think splitting into two moments solves the problem. But
> then, I don't actually see the problem Mark is talking about.
>
> If packages need to crank versions to to collect newer DT_NEEDED
> references to libpthread, then that's going to be the same story as
> new version eference for newer libc, which we've been doing forever
> on a regular basis.
>
> Obviously, you and I have compiled this in a base system, and noticed
> nothing except that after the major crank of both libraries, the
> already installed packages use the older libc/libpthread versions which
> does not have the new interface, and it is assumed after the next package
> build and update they will use the new versions (but basically not use
> this interface at all, because they are not static profiling binaries).
>
> The one detail is that newer libc version has to have storage for
> "thread->gmonparam" which is used by libpthread, but it will have
> that.
>
> I don't understand the problem.
>
After the major crank of libc and libpthread, I succeeded in building
xenocara and X Window System worked for me. And then, I installed the
desktop environment Xfce4. I could invoke xfce4-terminal without errors,
but I saw that xfce4-terminal linked the old libc and 2 libpthread
libraries, which are old and new. The libpthread doesn't have DT_NEEDED
record against libc, so that the old libc is linked to xfce4-terminal,
because it is linked at compile-time. The new libpthread is required by
libX11, and the old one is required by libcairo.
Even if libpthread has DT_NEEDED record against libc, ld.so's implementation
doesn't allow linking multiple libc libraries by the following code.
https://github.com/openbsd/src/blob/master/libexec/ld.so/loader.c#L362-L385
I'm guessing multiple libpthread-linked applications seem to run normally;
it is just luck. I think this is what Mark is worrying about.
The '_gmon_alloc' reference to libc is needed only for the profile library.
I'll introduce the compile option that tells if the build is for the
profile library or not. The PFLAGS variable is used for this purpose and
handed in the bsd.lib.mk. We don't need to bump the libpthread major,
because the shared library won't be changed.
diff --git a/lib/libc/Symbols.list b/lib/libc/Symbols.list
index 6eb2cca93e5..d5d5497cf93 100644
--- a/lib/libc/Symbols.list
+++ b/lib/libc/Symbols.list
@@ -880,6 +880,7 @@ _mcleanup
_monstartup
moncontrol
monstartup
+_gmon_alloc
/* hash */
MD5Data
diff --git a/lib/libc/gmon/gmon.c b/lib/libc/gmon/gmon.c
index 99fd42d7824..93176edd11d 100644
--- a/lib/libc/gmon/gmon.c
+++ b/lib/libc/gmon/gmon.c
@@ -42,12 +42,21 @@
struct gmonparam _gmonparam = { GMON_PROF_OFF };
+#include <pthread.h>
+#include <thread_private.h>
+
+static SLIST_HEAD(, gmonparam) _gmonfree = SLIST_HEAD_INITIALIZER(_gmonfree);
+static SLIST_HEAD(, gmonparam) _gmoninuse = SLIST_HEAD_INITIALIZER(_gmoninuse);
+_THREAD_PRIVATE_MUTEX(_gmonlock);
+static pthread_key_t _gmonkey;
+
static int s_scale;
/* see profil(2) where this is describe (incorrectly) */
#define SCALE_1_TO_1 0x10000L
#define ERR(s) write(STDERR_FILENO, s, sizeof(s))
+PROTO_NORMAL(_gmon_alloc);
PROTO_NORMAL(moncontrol);
PROTO_DEPRECATED(monstartup);
@@ -62,6 +71,19 @@ monstartup(u_long lowpc, u_long highpc)
#define PAGEMASK (PAGESIZE - 1)
#define PAGEROUND(x) (((x) + (PAGEMASK)) & ~PAGEMASK)
+static void
+_gmon_destructor(void *arg)
+{
+ struct gmonparam *p = arg, *q, **prev;
+
+ _THREAD_PRIVATE_MUTEX_LOCK(_gmonlock);
+ SLIST_REMOVE(&_gmoninuse, p, gmonparam, list);
+ SLIST_INSERT_HEAD(&_gmonfree, p, list);
+ _THREAD_PRIVATE_MUTEX_UNLOCK(_gmonlock);
+
+ pthread_setspecific(_gmonkey, NULL);
+}
+
void
_monstartup(u_long lowpc, u_long highpc)
{
@@ -130,12 +152,166 @@ _monstartup(u_long lowpc, u_long highpc)
else
p->dirfd = -1;
+ pthread_key_create(&_gmonkey, _gmon_destructor);
+
moncontrol(1);
if (p->dirfd != -1)
close(p->dirfd);
}
+struct gmonparam *
+_gmon_alloc(void)
+{
+ struct gmonparam *p;
+ char *a;
+
+ if (_gmonparam.state == GMON_PROF_OFF)
+ return NULL;
+
+ _THREAD_PRIVATE_MUTEX_LOCK(_gmonlock);
+ p = SLIST_FIRST(&_gmonfree);
+ if (p != NULL) {
+ SLIST_REMOVE_HEAD(&_gmonfree, list);
+ SLIST_INSERT_HEAD(&_gmoninuse, p, list);
+ } else {
+ _THREAD_PRIVATE_MUTEX_UNLOCK(_gmonlock);
+ a = mmap(NULL,
+ _ALIGN(sizeof(*p)) + _ALIGN(_gmonparam.fromssize) +
+ _ALIGN(_gmonparam.tossize),
+ PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+ if (a == MAP_FAILED) {
+ pthread_setspecific(_gmonkey, NULL);
+ ERR("_gmon_alloc: out of memory\n");
+ return NULL;
+ }
+ p = (struct gmonparam *)a;
+ *p = _gmonparam;
+ p->kcount = NULL;
+ p->kcountsize = 0;
+ p->froms = (void *)(a + _ALIGN(sizeof(*p)));
+ p->tos = (void *)(a + _ALIGN(sizeof(*p)) +
+ _ALIGN(_gmonparam.fromssize));
+ _THREAD_PRIVATE_MUTEX_LOCK(_gmonlock);
+ SLIST_INSERT_HEAD(&_gmoninuse, p, list);
+ }
+ _THREAD_PRIVATE_MUTEX_UNLOCK(_gmonlock);
+ pthread_setspecific(_gmonkey, p);
+
+ return p;
+}
+DEF_WEAK(_gmon_alloc);
+
+static void
+_gmon_merge_two(struct gmonparam *p, struct gmonparam *q)
+{
+ u_long fromindex, selfpc, endfrom;
+ u_short *frompcindex, qtoindex, toindex;
+ long count;
+ struct tostruct *top;
+
+ endfrom = (q->fromssize / sizeof(*q->froms));
+ for (fromindex = 0; fromindex < endfrom; fromindex++) {
+ if (q->froms[fromindex] == 0)
+ continue;
+ for (qtoindex = q->froms[fromindex]; qtoindex != 0;
+ qtoindex = q->tos[qtoindex].link) {
+ selfpc = q->tos[qtoindex].selfpc;
+ count = q->tos[qtoindex].count;
+ /* cribbed from mcount */
+ frompcindex = &p->froms[fromindex];
+ toindex = *frompcindex;
+ if (toindex == 0) {
+ /*
+ * first time traversing this arc
+ */
+ toindex = ++p->tos[0].link;
+ if (toindex >= p->tolimit)
+ /* halt further profiling */
+ goto overflow;
+
+ *frompcindex = (u_short)toindex;
+ top = &p->tos[(size_t)toindex];
+ top->selfpc = selfpc;
+ top->count = count;
+ top->link = 0;
+ goto done;
+ }
+ top = &p->tos[(size_t)toindex];
+ if (top->selfpc == selfpc) {
+ /*
+ * arc at front of chain; usual case.
+ */
+ top->count+= count;
+ goto done;
+ }
+ /*
+ * have to go looking down chain for it.
+ * top points to what we are looking at,
+ * we know it is not at the head of the chain.
+ */
+ for (; /* goto done */; ) {
+ if (top->link == 0) {
+ /*
+ * top is end of the chain and
+ * none of the chain had
+ * top->selfpc == selfpc. so
+ * we allocate a new tostruct
+ * and link it to the head of
+ * the chain.
+ */
+ toindex = ++p->tos[0].link;
+ if (toindex >= p->tolimit)
+ goto overflow;
+ top = &p->tos[(size_t)toindex];
+ top->selfpc = selfpc;
+ top->count = count;
+ top->link = *frompcindex;
+ *frompcindex = (u_short)toindex;
+ goto done;
+ }
+ /*
+ * otherwise, check the next arc on the chain.
+ */
+ top = &p->tos[top->link];
+ if (top->selfpc == selfpc) {
+ /*
+ * there it is.
+ * add to its count.
+ */
+ top->count += count;
+ goto done;
+ }
+
+ }
+
+ done: ;
+ }
+
+ }
+overflow: ;
+
+}
+
+static void
+_gmon_merge(void)
+{
+ struct gmonparam *q;
+
+ _THREAD_PRIVATE_MUTEX_LOCK(_gmonlock);
+
+ SLIST_FOREACH(q, &_gmonfree, list)
+ _gmon_merge_two(&_gmonparam, q);
+
+ SLIST_FOREACH(q, &_gmoninuse, list) {
+ q->state = GMON_PROF_OFF;
+ _gmon_merge_two(&_gmonparam, q);
+ }
+
+ _THREAD_PRIVATE_MUTEX_UNLOCK(_gmonlock);
+}
+
+
void
_mcleanup(void)
{
@@ -179,6 +355,9 @@ _mcleanup(void)
p->kcount, p->kcountsize);
write(log, dbuf, strlen(dbuf));
#endif
+
+ _gmon_merge();
+
hdr = (struct gmonhdr *)p->outbuf;
hdr->lpc = p->lowpc;
hdr->hpc = p->highpc;
diff --git a/lib/libc/gmon/mcount.c b/lib/libc/gmon/mcount.c
index 6846c10c857..f5ecb3f7cdc 100644
--- a/lib/libc/gmon/mcount.c
+++ b/lib/libc/gmon/mcount.c
@@ -31,6 +31,11 @@
#include <sys/types.h>
#include <sys/gmon.h>
+#ifndef _KERNEL
+#include <thread_private.h>
+#include <tib.h>
+#endif
+
/*
* mcount is called on entry to each function compiled with the profiling
* switch set. _mcount(), which is declared in a machine-dependent way
@@ -65,13 +70,14 @@ _MCOUNT_DECL(u_long frompc, u_long selfpc)
if ((p = curcpu()->ci_gmon) == NULL)
return;
#else
- p = &_gmonparam;
+ p = (__isthreaded) ?
+ ((pthread_t)TIB_GET()->tib_thread)->gmonparam : &_gmonparam;
#endif
/*
* check that we are profiling
* and that we aren't recursively invoked.
*/
- if (p->state != GMON_PROF_ON)
+ if (p == NULL || p->state != GMON_PROF_ON)
return;
#ifdef _KERNEL
MCOUNT_ENTER;
diff --git a/lib/libc/include/thread_private.h b/lib/libc/include/thread_private.h
index 1ec10711615..6b8164bab43 100644
--- a/lib/libc/include/thread_private.h
+++ b/lib/libc/include/thread_private.h
@@ -5,6 +5,9 @@
#ifndef _THREAD_PRIVATE_H_
#define _THREAD_PRIVATE_H_
+#include <sys/types.h>
+#include <sys/gmon.h>
+
extern int __isthreaded;
#define _MALLOC_MUTEXES 32
@@ -390,6 +393,7 @@ struct pthread {
/* cancel received in a delayed cancel block? */
int delayed_cancel;
+ struct gmonparam *gmonparam;
};
/* flags in pthread->flags */
#define THREAD_DONE 0x001
diff --git a/lib/libc/shlib_version b/lib/libc/shlib_version
index 7a749b0c915..98af1dc86b6 100644
--- a/lib/libc/shlib_version
+++ b/lib/libc/shlib_version
@@ -1,4 +1,4 @@
-major=100
-minor=3
+major=101
+minor=0
# note: If changes were made to include/thread_private.h or if system calls
# were added/changed then librthread/shlib_version must also be updated.
diff --git a/lib/libc/thread/rthread.c b/lib/libc/thread/rthread.c
index ae74aceb51b..f8116e8a0e0 100644
--- a/lib/libc/thread/rthread.c
+++ b/lib/libc/thread/rthread.c
@@ -21,6 +21,7 @@
#include <sys/types.h>
#include <sys/atomic.h>
+#include <sys/gmon.h>
#include <pthread.h>
#include <stdlib.h>
@@ -103,6 +104,9 @@ _rthread_init(void)
}
_threads_inited = 1;
+
+ /* Ignore errors. NULL is OK for a non-profiling case. */
+ thread->gmonparam = _gmon_alloc();
}
/*
diff --git a/lib/librthread/Makefile b/lib/librthread/Makefile
index 67a2678574d..30ef87ca7fd 100644
--- a/lib/librthread/Makefile
+++ b/lib/librthread/Makefile
@@ -10,6 +10,7 @@ CFLAGS+=-I${.CURDIR} -include namespace.h \
-I${LIBCSRCDIR}/arch/${MACHINE_CPU} -I${LIBCSRCDIR}/include
CDIAGFLAGS=
LDADD = -Wl,-znodelete
+PFLAGS= -D__PROFIL_SRC__
.if defined(NOPIC)
CFLAGS+=-DNO_PIC
diff --git a/lib/librthread/rthread.c b/lib/librthread/rthread.c
index 53f1d9ca82b..1b37a3dead1 100644
--- a/lib/librthread/rthread.c
+++ b/lib/librthread/rthread.c
@@ -25,6 +25,9 @@
#include <elf.h>
#pragma weak _DYNAMIC
#endif
+#ifdef __PROFIL_SRC__
+#include <sys/gmon.h>
+#endif
#include <stdlib.h>
#include <unistd.h>
@@ -393,6 +396,12 @@ pthread_create(pthread_t *threadp, const pthread_attr_t *attr,
/* we're going to be multi-threaded real soon now */
__isthreaded = 1;
+
+#ifdef __PROFIL_SRC__
+ /* Ignore errors. NULL is OK for a non-profiling case. */
+ thread->gmonparam = _gmon_alloc();
+#endif
+
rc = __tfork_thread(¶m, sizeof(param), _rthread_start, thread);
if (rc != -1) {
/* success */
diff --git a/share/mk/bsd.lib.mk b/share/mk/bsd.lib.mk
index 9cf9ffdf8e6..138e206f7a9 100644
--- a/share/mk/bsd.lib.mk
+++ b/share/mk/bsd.lib.mk
@@ -33,6 +33,7 @@ CFLAGS+= ${NOPIE_FLAGS}
CXXFLAGS+= ${NOPIE_FLAGS}
AFLAGS+= ${NOPIE_FLAGS}
.endif
+PFLAGS+=-p
.c.o:
@echo "${COMPILE.c} ${.IMPSRC} -o ${.TARGET}"
@@ -42,8 +43,8 @@ AFLAGS+= ${NOPIE_FLAGS}
@rm -f ${.TARGET}.o
.c.po:
- @echo "${COMPILE.c} -p ${.IMPSRC} -o ${.TARGET}"
- @${COMPILE.c} ${DFLAGS} -p ${.IMPSRC} -o ${.TARGET}.o
+ @echo "${COMPILE.c} ${PFLAGS} ${.IMPSRC} -o ${.TARGET}"
+ @${COMPILE.c} ${DFLAGS} ${PFLAGS} ${.IMPSRC} -o ${.TARGET}.o
@-mv $@.d $*.d
@${LD} -X -r ${.TARGET}.o -o ${.TARGET}
@rm -f ${.TARGET}.o
@@ -70,8 +71,8 @@ AFLAGS+= ${NOPIE_FLAGS}
@rm -f ${.TARGET}.o
.cc.po .cpp.po .C.po .cxx.po:
- @echo "${COMPILE.cc} -p ${.IMPSRC} -o ${.TARGET}"
- @${COMPILE.cc} ${DFLAGS} -p ${.IMPSRC} -o ${.TARGET}.o
+ @echo "${COMPILE.cc} ${PFLAGS} ${.IMPSRC} -o ${.TARGET}"
+ @${COMPILE.cc} ${DFLAGS} ${PFLAGS} ${.IMPSRC} -o ${.TARGET}.o
@-mv $@.d $*.d
@${LD} -X -r ${.TARGET}.o -o ${.TARGET}
@rm -f ${.TARGET}.o
@@ -92,8 +93,8 @@ AFLAGS+= ${NOPIE_FLAGS}
@rm -f ${.TARGET}.o
.f.po:
- @echo "${COMPILE.f} -p ${.IMPSRC} -o ${.TARGET}"
- @${COMPILE.f} ${DFLAGS} -p ${.IMPSRC} -o ${.TARGET}.o
+ @echo "${COMPILE.f} ${PFLAGS} ${.IMPSRC} -o ${.TARGET}"
+ @${COMPILE.f} ${DFLAGS} ${PFLAGS} ${.IMPSRC} -o ${.TARGET}.o
@-mv $@.d $*.d
@${LD} -X -r ${.TARGET}.o -o ${.TARGET}
@rm -f ${.TARGET}.o
diff --git a/sys/lib/libkern/mcount.c b/sys/lib/libkern/mcount.c
index e020dede07a..59b815a02d5 100644
--- a/sys/lib/libkern/mcount.c
+++ b/sys/lib/libkern/mcount.c
@@ -33,6 +33,11 @@
#include <sys/param.h>
#include <sys/gmon.h>
+#ifndef _KERNEL
+#include <thread_private.h>
+#include <tib.h>
+#endif
+
/*
* mcount is called on entry to each function compiled with the profiling
* switch set. _mcount(), which is declared in a machine-dependent way
@@ -67,13 +72,14 @@ _MCOUNT_DECL(u_long frompc, u_long selfpc)
if ((p = curcpu()->ci_gmon) == NULL)
return;
#else
- p = &_gmonparam;
+ p = (__isthreaded) ?
+ ((pthread_t)TIB_GET()->tib_thread)->gmonparam : &_gmonparam;
#endif
/*
* check that we are profiling
* and that we aren't recursively invoked.
*/
- if (p->state != GMON_PROF_ON)
+ if (p == NULL || p->state != GMON_PROF_ON)
return;
#ifdef _KERNEL
MCOUNT_ENTER;
diff --git a/sys/sys/gmon.h b/sys/sys/gmon.h
index baaf7743be5..a29bfa849d9 100644
--- a/sys/sys/gmon.h
+++ b/sys/sys/gmon.h
@@ -35,6 +35,7 @@
#ifndef _SYS_GMON_H_
#define _SYS_GMON_H_
+#include <sys/queue.h>
#include <machine/profile.h>
/*
@@ -141,6 +142,7 @@ struct gmonparam {
size_t outbuflen;
void *rawarcs;
int dirfd;
+ SLIST_ENTRY(gmonparam) list;
};
/*
@@ -174,6 +176,7 @@ void _monstartup(u_long, u_long);
void moncontrol(int);
/* XXX remove end of may 2025 */
void monstartup(u_long, u_long);
+struct gmonparam *_gmon_alloc(void);
__END_DECLS
#endif /* !_KERNEL */
--
Yuichiro NAITO (naito.yuichiro@gmail.com)
gprof: Profiling a multi-threaded application (revived)