Download raw body.
dt(4) btrace rwlock
Hi,
I would like to introduce dt(4) trace points into rwlock like we
have them in refcnt.
A first example would be to find exclusive netlock calls that are
used in the kernel.
/*
* netlock: stack traces where exclusive net lock is taken
* - probe: netlock, solock
* - arg0: pointer to struct rwlock
* - arg1: lock type
* - 1: read lock
* - 2: write lock
* - arg2: success status
* - 1: locked immediately
* - 2: locked after spinning
* - 3: locked after sleeping
* - 4: locking failed
*/
tracepoint:rwlock:netlock {
if (arg1 == 2) {
printf("%s %x %u %d%s", probe, arg0, arg1, arg2, kstack);
}
}
The printed output looks like this:
tracepoint:rwlock:netlock ffffffff82a34540 2 1
rw_do_enter_write+0x2d8
igmp_slowtimo+0x14
pfslowtimo+0x107
softclock_thread_run+0xc8
0xffffffff822b9495
proc_trampoline+0x10
kernel
tracepoint:rwlock:netlock ffffffff82a34540 2 1
rw_do_enter_write+0x2d8
icmp_sysctl+0x8a
net_sysctl+0x27b
sys_sysctl+0x1c5
syscall+0x5f9
Xsyscall+0x128
kernel
tracepoint:rwlock:netlock ffffffff82a34540 2 1
rw_do_enter_write+0x2d8
ip6_sysctl+0x39a
net_sysctl+0x27b
sys_sysctl+0x1c5
syscall+0x5f9
Xsyscall+0x128
kernel
The second example finds contention of the socket lock.
/*
* solock: stack sampling of socket lock contension
* - probe: netlock, solock
* - arg0: pointer to struct rwlock
* - arg1: lock type
* - 1: read lock
* - 2: write lock
* - arg2: success status
* - 1: locked immediately
* - 2: locked after spinning
* - 3: locked after sleeping
* - 4: locking failed
*
* To produce a FlameGraph process the output with stackcollapse-bpftrace.pl
* and flamegraph.pl found in:
* https://github.com/brendangregg/FlameGraph
*/
tracepoint:rwlock:solock {
if (arg2 >= 2) {
@[kstack] = count();
}
}
END {
print(@);
}
This script allows to create a flame graph:
http://bluhm.genua.de/files/ot48-solock.svg
Diff below adds trace points for netlock and solock, others can
follow later. The initializer is called with _trace suffix.
ok?
bluhm
Index: dev/dt/dt_prov_static.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/dt/dt_prov_static.c,v
diff -u -p -r1.25 dt_prov_static.c
--- dev/dt/dt_prov_static.c 10 Mar 2025 09:28:56 -0000 1.25
+++ dev/dt/dt_prov_static.c 17 Jul 2025 13:32:27 -0000
@@ -107,6 +107,13 @@ DT_STATIC_PROBE3(refcnt, syncache, "void
DT_STATIC_PROBE3(refcnt, tdb, "void *", "int", "int");
/*
+ * read write sleeping locks, keep in sync with sys/rwlock.h
+ */
+DT_STATIC_PROBE0(rwlock, none);
+DT_STATIC_PROBE3(rwlock, netlock, "void *", "int", "int");
+DT_STATIC_PROBE3(rwlock, solock, "void *", "int", "int");
+
+/*
* List of all static probes
*/
struct dt_probe *const dtps_static[] = {
@@ -161,9 +168,14 @@ struct dt_probe *const dtps_static[] = {
&_DT_STATIC_P(refcnt, socket),
&_DT_STATIC_P(refcnt, syncache),
&_DT_STATIC_P(refcnt, tdb),
+ /* rwlock */
+ &_DT_STATIC_P(rwlock, none),
+ &_DT_STATIC_P(rwlock, netlock),
+ &_DT_STATIC_P(rwlock, solock),
};
struct dt_probe *const *dtps_index_refcnt;
+struct dt_probe *const *dtps_index_rwlock;
int
dt_prov_static_init(void)
@@ -173,6 +185,8 @@ dt_prov_static_init(void)
for (i = 0; i < nitems(dtps_static); i++) {
if (dtps_static[i] == &_DT_STATIC_P(refcnt, none))
dtps_index_refcnt = &dtps_static[i];
+ if (dtps_static[i] == &_DT_STATIC_P(rwlock, none))
+ dtps_index_rwlock = &dtps_static[i];
dt_dev_register_probe(dtps_static[i]);
}
Index: kern/kern_rwlock.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_rwlock.c,v
diff -u -p -r1.57 kern_rwlock.c
--- kern/kern_rwlock.c 3 Jun 2025 00:20:31 -0000 1.57
+++ kern/kern_rwlock.c 17 Jul 2025 13:32:27 -0000
@@ -24,6 +24,7 @@
#include <sys/rwlock.h>
#include <sys/limits.h>
#include <sys/atomic.h>
+#include <sys/tracepoint.h>
#include <sys/witness.h>
#ifdef RWDIAG
@@ -162,12 +163,13 @@ rw_exit_write(struct rwlock *rwl)
static void
_rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags,
- const struct lock_type *type)
+ const struct lock_type *type, int trace)
{
rwl->rwl_owner = 0;
rwl->rwl_waiters = 0;
rwl->rwl_readers = 0;
rwl->rwl_name = name;
+ rwl->rwl_traceidx = trace;
#ifdef WITNESS
rwl->rwl_lock_obj.lo_flags = lo_flags;
@@ -182,9 +184,9 @@ _rw_init_flags_witness(struct rwlock *rw
void
_rw_init_flags(struct rwlock *rwl, const char *name, int flags,
- const struct lock_type *type)
+ const struct lock_type *type, int trace)
{
- _rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type);
+ _rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type, trace);
}
int
@@ -235,6 +237,7 @@ rw_do_enter_write(struct rwlock *rwl, in
owner = rw_cas(&rwl->rwl_owner, 0, self);
if (owner == 0) {
/* wow, we won. so easy */
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 2, 1);
goto locked;
}
if (__predict_false(owner == self)) {
@@ -268,6 +271,8 @@ rw_do_enter_write(struct rwlock *rwl, in
if (owner == 0) {
spc->spc_spinning--;
/* ok, we won now. */
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 2,
+ 2);
goto locked;
}
}
@@ -275,8 +280,10 @@ rw_do_enter_write(struct rwlock *rwl, in
}
#endif
- if (ISSET(flags, RW_NOSLEEP))
+ if (ISSET(flags, RW_NOSLEEP)) {
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 2, 4);
return (EBUSY);
+ }
prio = PLOCK - 4;
if (ISSET(flags, RW_INTR))
@@ -299,12 +306,14 @@ rw_do_enter_write(struct rwlock *rwl, in
#endif
if (ISSET(flags, RW_INTR) && (error != 0)) {
rw_dec(&rwl->rwl_waiters);
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 2, 4);
return (error);
}
owner = rw_cas(&rwl->rwl_owner, 0, self);
} while (owner != 0);
rw_dec(&rwl->rwl_waiters);
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 2, 3);
locked:
membar_enter_after_atomic();
@@ -349,6 +358,7 @@ rw_do_enter_read(struct rwlock *rwl, int
owner = rw_cas(&rwl->rwl_owner, 0, RWLOCK_READ_INCR);
if (owner == 0) {
/* ermagerd, we won! */
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 1, 1);
goto locked;
}
@@ -360,12 +370,15 @@ rw_do_enter_read(struct rwlock *rwl, int
} else if (atomic_load_int(&rwl->rwl_waiters) == 0) {
if (rw_read_incr(rwl, owner)) {
/* nailed it */
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 1, 2);
goto locked;
}
}
- if (ISSET(flags, RW_NOSLEEP))
+ if (ISSET(flags, RW_NOSLEEP)) {
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 1, 4);
return (EBUSY);
+ }
prio = PLOCK;
if (ISSET(flags, RW_INTR))
@@ -388,10 +401,12 @@ rw_do_enter_read(struct rwlock *rwl, int
#endif
if (ISSET(flags, RW_INTR) && (error != 0)) {
rw_dec(&rwl->rwl_readers);
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 1, 4);
return (error);
}
} while (!rw_read_incr(rwl, 0));
rw_dec(&rwl->rwl_readers);
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 1, 3);
locked:
membar_enter_after_atomic();
@@ -451,8 +466,10 @@ rw_upgrade(struct rwlock *rwl, int flags
owner, self);
}
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 3, 4);
return (EBUSY);
}
+ TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 3, 1);
#ifdef WITNESS
{
@@ -583,7 +600,7 @@ _rrw_init_flags(struct rrwlock *rrwl, co
{
memset(rrwl, 0, sizeof(struct rrwlock));
_rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags),
- type);
+ type, 0);
}
int
@@ -696,7 +713,7 @@ _rw_obj_alloc_flags(struct rwlock **lock
mo = pool_get(&rwlock_obj_pool, PR_WAITOK);
mo->ro_magic = RWLOCK_OBJ_MAGIC;
- _rw_init_flags(&mo->ro_lock, name, flags, type);
+ _rw_init_flags(&mo->ro_lock, name, flags, type, 0);
mo->ro_refcnt = 1;
*lock = &mo->ro_lock;
Index: kern/subr_pool.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/subr_pool.c,v
diff -u -p -r1.240 subr_pool.c
--- kern/subr_pool.c 12 Jun 2025 20:37:58 -0000 1.240
+++ kern/subr_pool.c 17 Jul 2025 13:32:27 -0000
@@ -2244,7 +2244,7 @@ void
pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
const struct lock_type *type)
{
- _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
+ _rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type, 0);
}
void
Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
diff -u -p -r1.381 uipc_socket.c
--- kern/uipc_socket.c 15 Jul 2025 12:52:52 -0000 1.381
+++ kern/uipc_socket.c 17 Jul 2025 13:32:27 -0000
@@ -158,7 +158,8 @@ soalloc(const struct protosw *prp, int w
#endif
refcnt_init_trace(&so->so_refcnt, DT_REFCNT_IDX_SOCKET);
- rw_init_flags(&so->so_lock, dom_name, RWL_DUPOK);
+ rw_init_flags_trace(&so->so_lock, dom_name, RWL_DUPOK,
+ DT_RWLOCK_IDX_SOLOCK);
rw_init(&so->so_rcv.sb_lock, "sbufrcv");
rw_init(&so->so_snd.sb_lock, "sbufsnd");
mtx_init_flags(&so->so_rcv.sb_mtx, IPL_MPFLOOR, "sbrcv", 0);
Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
diff -u -p -r1.737 if.c
--- net/if.c 7 Jul 2025 02:28:50 -0000 1.737
+++ net/if.c 17 Jul 2025 13:32:27 -0000
@@ -248,7 +248,8 @@ struct task if_input_task_locked = TASK_
* Serialize socket operations to ensure no new sleeping points
* are introduced in IP output paths.
*/
-struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
+struct rwlock netlock = RWLOCK_INITIALIZER_TRACE("netlock",
+ DT_RWLOCK_IDX_NETLOCK);
/*
* Network interface utility routines.
Index: sys/rwlock.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/rwlock.h,v
diff -u -p -r1.33 rwlock.h
--- sys/rwlock.h 18 May 2025 00:13:57 -0000 1.33
+++ sys/rwlock.h 17 Jul 2025 13:32:27 -0000
@@ -66,6 +66,7 @@ struct rwlock {
#ifdef WITNESS
struct lock_object rwl_lock_obj;
#endif
+ int rwl_traceidx;
};
#define RWLOCK_LO_FLAGS(flags) \
@@ -93,10 +94,14 @@ struct rwlock {
#ifdef WITNESS
#define RWLOCK_INITIALIZER(name) \
- { 0, 0, 0, name, .rwl_lock_obj = RWLOCK_LO_INITIALIZER(name, 0) }
+ { 0, 0, 0, name, .rwl_lock_obj = RWLOCK_LO_INITIALIZER(name, 0), 0 }
+#define RWLOCK_INITIALIZER_TRACE(name, trace) \
+ { 0, 0, 0, name, .rwl_lock_obj = RWLOCK_LO_INITIALIZER(name, 0), trace }
#else
#define RWLOCK_INITIALIZER(name) \
- { 0, 0, 0, name }
+ { 0, 0, 0, name, 0 }
+#define RWLOCK_INITIALIZER_TRACE(name, trace) \
+ { 0, 0, 0, name, trace }
#endif
#define RWLOCK_WRLOCK 0x04UL
@@ -133,18 +138,24 @@ struct rrwlock {
#ifdef _KERNEL
void _rw_init_flags(struct rwlock *, const char *, int,
- const struct lock_type *);
+ const struct lock_type *, int);
#ifdef WITNESS
+#define rw_init_flags_trace(rwl, name, flags, trace) do { \
+ static const struct lock_type __lock_type = { .lt_name = #rwl };\
+ _rw_init_flags(rwl, name, flags, &__lock_type, trace); \
+} while (0)
#define rw_init_flags(rwl, name, flags) do { \
static const struct lock_type __lock_type = { .lt_name = #rwl };\
- _rw_init_flags(rwl, name, flags, &__lock_type); \
+ _rw_init_flags(rwl, name, flags, &__lock_type, 0); \
} while (0)
-#define rw_init(rwl, name) rw_init_flags(rwl, name, 0)
+#define rw_init(rwl, name) rw_init_flags(rwl, name, 0)
#else /* WITNESS */
+#define rw_init_flags_trace(rwl, name, flags, trace) \
+ _rw_init_flags(rwl, name, flags, NULL, trace)
#define rw_init_flags(rwl, name, flags) \
- _rw_init_flags(rwl, name, flags, NULL)
-#define rw_init(rwl, name) _rw_init_flags(rwl, name, 0, NULL)
+ _rw_init_flags(rwl, name, flags, NULL, 0)
+#define rw_init(rwl, name) _rw_init_flags(rwl, name, 0, NULL, 0)
#endif /* WITNESS */
void rw_enter_read(struct rwlock *);
@@ -230,6 +241,10 @@ void _rw_obj_alloc_flags(struct rwlock *
struct lock_type *);
void rw_obj_hold(struct rwlock *);
int rw_obj_free(struct rwlock *);
+
+/* sorted alphabetically, keep in sync with dev/dt/dt_prov_static.c */
+#define DT_RWLOCK_IDX_NETLOCK 1
+#define DT_RWLOCK_IDX_SOLOCK 2
#endif /* _KERNEL */
dt(4) btrace rwlock