Index | Thread | Search

From:
Alexander Bluhm <bluhm@openbsd.org>
Subject:
dt(4) btrace rwlock
To:
tech@openbsd.org
Date:
Fri, 18 Jul 2025 17:15:42 +0200

Download raw body.

Thread
  • Alexander Bluhm:

    dt(4) btrace rwlock

Hi,

I would like to introduce dt(4) trace points into rwlock like we
have them in refcnt.

A first example would be to find exclusive netlock calls that are
used in the kernel.
/*
 * netlock: stack traces where exclusive net lock is taken
 *  - probe: netlock, solock
 *  - arg0: pointer to struct rwlock
 *  - arg1: lock type
 *    - 1: read lock
 *    - 2: write lock
 *  - arg2: success status
 *    - 1: locked immediately
 *    - 2: locked after spinning
 *    - 3: locked after sleeping
 *    - 4: locking failed
 */
tracepoint:rwlock:netlock {
        if (arg1 == 2) {
                printf("%s %x %u %d%s", probe, arg0, arg1, arg2, kstack);
        }
}

The printed output looks like this:
tracepoint:rwlock:netlock ffffffff82a34540 2 1
rw_do_enter_write+0x2d8
igmp_slowtimo+0x14
pfslowtimo+0x107
softclock_thread_run+0xc8
0xffffffff822b9495
proc_trampoline+0x10
kernel
tracepoint:rwlock:netlock ffffffff82a34540 2 1
rw_do_enter_write+0x2d8
icmp_sysctl+0x8a
net_sysctl+0x27b
sys_sysctl+0x1c5
syscall+0x5f9
Xsyscall+0x128
kernel
tracepoint:rwlock:netlock ffffffff82a34540 2 1
rw_do_enter_write+0x2d8
ip6_sysctl+0x39a
net_sysctl+0x27b
sys_sysctl+0x1c5
syscall+0x5f9
Xsyscall+0x128
kernel

The second example finds contention of the socket lock.
/*
 * solock: stack sampling of socket lock contension
 *  - probe: netlock, solock
 *  - arg0: pointer to struct rwlock
 *  - arg1: lock type
 *    - 1: read lock
 *    - 2: write lock
 *  - arg2: success status
 *    - 1: locked immediately
 *    - 2: locked after spinning
 *    - 3: locked after sleeping
 *    - 4: locking failed
 *
 * To produce a FlameGraph process the output with stackcollapse-bpftrace.pl
 * and flamegraph.pl found in:
 *      https://github.com/brendangregg/FlameGraph
 */
tracepoint:rwlock:solock {
        if (arg2 >= 2) {
                @[kstack] = count();
        }
}
END {
        print(@);
}

This script allows to create a flame graph:
http://bluhm.genua.de/files/ot48-solock.svg

Diff below adds trace points for netlock and solock, others can
follow later.  The initializer is called with _trace suffix.

ok?

bluhm

Index: dev/dt/dt_prov_static.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/dt/dt_prov_static.c,v
diff -u -p -r1.25 dt_prov_static.c
--- dev/dt/dt_prov_static.c	10 Mar 2025 09:28:56 -0000	1.25
+++ dev/dt/dt_prov_static.c	17 Jul 2025 13:32:27 -0000
@@ -107,6 +107,13 @@ DT_STATIC_PROBE3(refcnt, syncache, "void
 DT_STATIC_PROBE3(refcnt, tdb, "void *", "int", "int");
 
 /*
+ * read write sleeping locks, keep in sync with sys/rwlock.h
+ */
+DT_STATIC_PROBE0(rwlock, none);
+DT_STATIC_PROBE3(rwlock, netlock, "void *", "int", "int");
+DT_STATIC_PROBE3(rwlock, solock, "void *", "int", "int");
+
+/*
  * List of all static probes
  */
 struct dt_probe *const dtps_static[] = {
@@ -161,9 +168,14 @@ struct dt_probe *const dtps_static[] = {
 	&_DT_STATIC_P(refcnt, socket),
 	&_DT_STATIC_P(refcnt, syncache),
 	&_DT_STATIC_P(refcnt, tdb),
+	/* rwlock */
+	&_DT_STATIC_P(rwlock, none),
+	&_DT_STATIC_P(rwlock, netlock),
+	&_DT_STATIC_P(rwlock, solock),
 };
 
 struct dt_probe *const *dtps_index_refcnt;
+struct dt_probe *const *dtps_index_rwlock;
 
 int
 dt_prov_static_init(void)
@@ -173,6 +185,8 @@ dt_prov_static_init(void)
 	for (i = 0; i < nitems(dtps_static); i++) {
 		if (dtps_static[i] == &_DT_STATIC_P(refcnt, none))
 			dtps_index_refcnt = &dtps_static[i];
+		if (dtps_static[i] == &_DT_STATIC_P(rwlock, none))
+			dtps_index_rwlock = &dtps_static[i];
 		dt_dev_register_probe(dtps_static[i]);
 	}
 
Index: kern/kern_rwlock.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_rwlock.c,v
diff -u -p -r1.57 kern_rwlock.c
--- kern/kern_rwlock.c	3 Jun 2025 00:20:31 -0000	1.57
+++ kern/kern_rwlock.c	17 Jul 2025 13:32:27 -0000
@@ -24,6 +24,7 @@
 #include <sys/rwlock.h>
 #include <sys/limits.h>
 #include <sys/atomic.h>
+#include <sys/tracepoint.h>
 #include <sys/witness.h>
 
 #ifdef RWDIAG
@@ -162,12 +163,13 @@ rw_exit_write(struct rwlock *rwl)
 
 static void
 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags,
-    const struct lock_type *type)
+    const struct lock_type *type, int trace)
 {
 	rwl->rwl_owner = 0;
 	rwl->rwl_waiters = 0;
 	rwl->rwl_readers = 0;
 	rwl->rwl_name = name;
+	rwl->rwl_traceidx = trace;
 
 #ifdef WITNESS
 	rwl->rwl_lock_obj.lo_flags = lo_flags;
@@ -182,9 +184,9 @@ _rw_init_flags_witness(struct rwlock *rw
 
 void
 _rw_init_flags(struct rwlock *rwl, const char *name, int flags,
-    const struct lock_type *type)
+    const struct lock_type *type, int trace)
 {
-	_rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type);
+	_rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type, trace);
 }
 
 int
@@ -235,6 +237,7 @@ rw_do_enter_write(struct rwlock *rwl, in
 	owner = rw_cas(&rwl->rwl_owner, 0, self);
 	if (owner == 0) {
 		/* wow, we won. so easy */
+		TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 2, 1);
 		goto locked;
 	}
 	if (__predict_false(owner == self)) {
@@ -268,6 +271,8 @@ rw_do_enter_write(struct rwlock *rwl, in
 			if (owner == 0) {
 				spc->spc_spinning--;
 				/* ok, we won now. */
+				TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 2,
+				    2);
 				goto locked;
 			}
 		}
@@ -275,8 +280,10 @@ rw_do_enter_write(struct rwlock *rwl, in
 	}
 #endif
 
-	if (ISSET(flags, RW_NOSLEEP))
+	if (ISSET(flags, RW_NOSLEEP)) {
+		TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 2, 4);
 		return (EBUSY);
+	}
 
 	prio = PLOCK - 4;
 	if (ISSET(flags, RW_INTR))
@@ -299,12 +306,14 @@ rw_do_enter_write(struct rwlock *rwl, in
 #endif
 		if (ISSET(flags, RW_INTR) && (error != 0)) {
 			rw_dec(&rwl->rwl_waiters);
+			TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 2, 4);
 			return (error);
 		}
 
 		owner = rw_cas(&rwl->rwl_owner, 0, self);
 	} while (owner != 0);
 	rw_dec(&rwl->rwl_waiters);
+	TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 2, 3);
 
 locked:
 	membar_enter_after_atomic();
@@ -349,6 +358,7 @@ rw_do_enter_read(struct rwlock *rwl, int
 	owner = rw_cas(&rwl->rwl_owner, 0, RWLOCK_READ_INCR);
 	if (owner == 0) {
 		/* ermagerd, we won! */
+		TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 1, 1);
 		goto locked;
 	}
 
@@ -360,12 +370,15 @@ rw_do_enter_read(struct rwlock *rwl, int
 	} else if (atomic_load_int(&rwl->rwl_waiters) == 0) {
 		if (rw_read_incr(rwl, owner)) {
 			/* nailed it */
+			TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 1, 2);
 			goto locked;
 		}
 	}
 
-	if (ISSET(flags, RW_NOSLEEP))
+	if (ISSET(flags, RW_NOSLEEP)) {
+		TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 1, 4);
 		return (EBUSY);
+	}
 
 	prio = PLOCK;
 	if (ISSET(flags, RW_INTR))
@@ -388,10 +401,12 @@ rw_do_enter_read(struct rwlock *rwl, int
 #endif
 		if (ISSET(flags, RW_INTR) && (error != 0)) {
 			rw_dec(&rwl->rwl_readers);
+			TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 1, 4);
 			return (error);
 		}
 	} while (!rw_read_incr(rwl, 0));
 	rw_dec(&rwl->rwl_readers);
+	TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 1, 3);
 
 locked:
 	membar_enter_after_atomic();
@@ -451,8 +466,10 @@ rw_upgrade(struct rwlock *rwl, int flags
 			    owner, self);
 		}
 
+		TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 3, 4);
 		return (EBUSY);
 	}
+	TRACEINDEX(rwlock, rwl->rwl_traceidx, rwl, 3, 1);
 
 #ifdef WITNESS
 	{
@@ -583,7 +600,7 @@ _rrw_init_flags(struct rrwlock *rrwl, co
 {
 	memset(rrwl, 0, sizeof(struct rrwlock));
 	_rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags),
-	    type);
+	    type, 0);
 }
 
 int
@@ -696,7 +713,7 @@ _rw_obj_alloc_flags(struct rwlock **lock
 
 	mo = pool_get(&rwlock_obj_pool, PR_WAITOK);
 	mo->ro_magic = RWLOCK_OBJ_MAGIC;
-	_rw_init_flags(&mo->ro_lock, name, flags, type);
+	_rw_init_flags(&mo->ro_lock, name, flags, type, 0);
 	mo->ro_refcnt = 1;
 
 	*lock = &mo->ro_lock;
Index: kern/subr_pool.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/subr_pool.c,v
diff -u -p -r1.240 subr_pool.c
--- kern/subr_pool.c	12 Jun 2025 20:37:58 -0000	1.240
+++ kern/subr_pool.c	17 Jul 2025 13:32:27 -0000
@@ -2244,7 +2244,7 @@ void
 pool_lock_rw_init(struct pool *pp, union pool_lock *lock,
     const struct lock_type *type)
 {
-	_rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type);
+	_rw_init_flags(&lock->prl_rwlock, pp->pr_wchan, 0, type, 0);
 }
 
 void
Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
diff -u -p -r1.381 uipc_socket.c
--- kern/uipc_socket.c	15 Jul 2025 12:52:52 -0000	1.381
+++ kern/uipc_socket.c	17 Jul 2025 13:32:27 -0000
@@ -158,7 +158,8 @@ soalloc(const struct protosw *prp, int w
 #endif
 
 	refcnt_init_trace(&so->so_refcnt, DT_REFCNT_IDX_SOCKET);
-	rw_init_flags(&so->so_lock, dom_name, RWL_DUPOK);
+	rw_init_flags_trace(&so->so_lock, dom_name, RWL_DUPOK,
+	    DT_RWLOCK_IDX_SOLOCK);
 	rw_init(&so->so_rcv.sb_lock, "sbufrcv");
 	rw_init(&so->so_snd.sb_lock, "sbufsnd");
 	mtx_init_flags(&so->so_rcv.sb_mtx, IPL_MPFLOOR, "sbrcv", 0);
Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
diff -u -p -r1.737 if.c
--- net/if.c	7 Jul 2025 02:28:50 -0000	1.737
+++ net/if.c	17 Jul 2025 13:32:27 -0000
@@ -248,7 +248,8 @@ struct task	if_input_task_locked = TASK_
  * Serialize socket operations to ensure no new sleeping points
  * are introduced in IP output paths.
  */
-struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
+struct rwlock netlock = RWLOCK_INITIALIZER_TRACE("netlock",
+    DT_RWLOCK_IDX_NETLOCK);
 
 /*
  * Network interface utility routines.
Index: sys/rwlock.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/rwlock.h,v
diff -u -p -r1.33 rwlock.h
--- sys/rwlock.h	18 May 2025 00:13:57 -0000	1.33
+++ sys/rwlock.h	17 Jul 2025 13:32:27 -0000
@@ -66,6 +66,7 @@ struct rwlock {
 #ifdef WITNESS
 	struct lock_object	 rwl_lock_obj;
 #endif
+	int			 rwl_traceidx;
 };
 
 #define RWLOCK_LO_FLAGS(flags) \
@@ -93,10 +94,14 @@ struct rwlock {
 
 #ifdef WITNESS
 #define RWLOCK_INITIALIZER(name) \
-	{ 0, 0, 0, name, .rwl_lock_obj = RWLOCK_LO_INITIALIZER(name, 0) }
+	{ 0, 0, 0, name, .rwl_lock_obj = RWLOCK_LO_INITIALIZER(name, 0), 0 }
+#define RWLOCK_INITIALIZER_TRACE(name, trace) \
+	{ 0, 0, 0, name, .rwl_lock_obj = RWLOCK_LO_INITIALIZER(name, 0), trace }
 #else
 #define RWLOCK_INITIALIZER(name) \
-	{ 0, 0, 0, name }
+	{ 0, 0, 0, name, 0 }
+#define RWLOCK_INITIALIZER_TRACE(name, trace) \
+	{ 0, 0, 0, name, trace }
 #endif
 
 #define RWLOCK_WRLOCK		0x04UL
@@ -133,18 +138,24 @@ struct rrwlock {
 #ifdef _KERNEL
 
 void	_rw_init_flags(struct rwlock *, const char *, int,
-	    const struct lock_type *);
+	    const struct lock_type *, int);
 
 #ifdef WITNESS
+#define rw_init_flags_trace(rwl, name, flags, trace) do {		\
+	static const struct lock_type __lock_type = { .lt_name = #rwl };\
+	_rw_init_flags(rwl, name, flags, &__lock_type, trace);		\
+} while (0)
 #define rw_init_flags(rwl, name, flags) do {				\
 	static const struct lock_type __lock_type = { .lt_name = #rwl };\
-	_rw_init_flags(rwl, name, flags, &__lock_type);			\
+	_rw_init_flags(rwl, name, flags, &__lock_type, 0);	\
 } while (0)
-#define rw_init(rwl, name)	rw_init_flags(rwl, name, 0)
+#define rw_init(rwl, name)		rw_init_flags(rwl, name, 0)
 #else /* WITNESS */
+#define rw_init_flags_trace(rwl, name, flags, trace) \
+				_rw_init_flags(rwl, name, flags, NULL, trace)
 #define rw_init_flags(rwl, name, flags) \
-				_rw_init_flags(rwl, name, flags, NULL)
-#define rw_init(rwl, name)	_rw_init_flags(rwl, name, 0, NULL)
+				_rw_init_flags(rwl, name, flags, NULL, 0)
+#define rw_init(rwl, name)	_rw_init_flags(rwl, name, 0, NULL, 0)
 #endif /* WITNESS */
 
 void	rw_enter_read(struct rwlock *);
@@ -230,6 +241,10 @@ void	_rw_obj_alloc_flags(struct rwlock *
 		struct lock_type *);
 void	rw_obj_hold(struct rwlock *);
 int	rw_obj_free(struct rwlock *);
+
+/* sorted alphabetically, keep in sync with dev/dt/dt_prov_static.c */
+#define DT_RWLOCK_IDX_NETLOCK	1
+#define DT_RWLOCK_IDX_SOLOCK	2
 
 #endif /* _KERNEL */