Index | Thread | Search

From:
Vitaliy Makkoveev <mvs@openbsd.org>
Subject:
sysctl: Use pc_lock and unlock KERN_CLOCKINTR
To:
tech@openbsd.org, Theo de Raadt <deraadt@openbsd.org>
Date:
Sat, 7 Jun 2025 06:56:21 +0300

Download raw body.

Thread
  • Vitaliy Makkoveev:

    sysctl: Use pc_lock and unlock KERN_CLOCKINTR

It is already mp-safe, but use `cq_stat_lock' pc_lock(9) instead of
manual `gen' handrolling. Also, exclude it from RAMDISK kernel. We do
not expose this statistics with sysctl(2), we do not need it during
install.

To keep "#ifdef" spaghetti small, do a little reordering and place the
call within existing "#ifndef SMALL_KERNEL" block.

This short test code proves that everything is fine with this diff.

#include <sys/types.h>
#include <sys/clockintr.h>
#include <sys/sysctl.h>
#include <stdio.h>
#include <err.h>

int
main(int argc, char *argv[])
{
	struct clockintr_stat stat;
	int mib[3];
	size_t len;

	mib[0] = CTL_KERN;
	mib[1] = KERN_CLOCKINTR;
	mib[2] = KERN_CLOCKINTR_STATS;
	len = sizeof(stat);

	if (sysctl(mib, 3, &stat, &len, NULL, 0) < 0)
		err(1, "sysctl");

	return 0;
}


Index: sys/kern/kern_clockintr.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clockintr.c,v
retrieving revision 1.71
diff -u -p -r1.71 kern_clockintr.c
--- sys/kern/kern_clockintr.c	7 Nov 2024 16:02:29 -0000	1.71
+++ sys/kern/kern_clockintr.c	7 Jun 2025 03:44:40 -0000
@@ -162,7 +162,7 @@ clockintr_dispatch(void *frame)
 	struct clockrequest *request = &cq->cq_request;
 	void *arg;
 	void (*func)(struct clockrequest *, void *, void *);
-	uint32_t ogen;
+	unsigned int gen;
 
 	if (cq->cq_dispatch != 0)
 		panic("%s: recursive dispatch", __func__);
@@ -243,9 +243,7 @@ rearm:
 	}
 stats:
 	/* Update our stats. */
-	ogen = cq->cq_gen;
-	cq->cq_gen = 0;
-	membar_producer();
+	gen =  pc_sprod_enter(&cq->cq_stat_lock);
 	cq->cq_stat.cs_dispatched += cq->cq_uptime - start;
 	if (run > 0) {
 		cq->cq_stat.cs_lateness += lateness;
@@ -256,8 +254,7 @@ stats:
 		cq->cq_stat.cs_earliness += clockqueue_next(cq) - cq->cq_uptime;
 	} else
 		cq->cq_stat.cs_spurious++;
-	membar_producer();
-	cq->cq_gen = MAX(1, ogen + 1);
+	pc_sprod_leave(&cq->cq_stat_lock, gen);
 
 	mtx_leave(&cq->cq_mtx);
 
@@ -452,7 +449,7 @@ clockqueue_init(struct clockqueue *cq)
 	mtx_init(&cq->cq_mtx, IPL_CLOCK);
 	TAILQ_INIT(&cq->cq_all);
 	TAILQ_INIT(&cq->cq_pend);
-	cq->cq_gen = 1;
+	pc_lock_init(&cq->cq_stat_lock);
 	SET(cq->cq_flags, CQ_INIT);
 }
 
@@ -559,6 +556,7 @@ nsec_advance(uint64_t *next, uint64_t pe
 	return elapsed;
 }
 
+#ifndef SMALL_KERNEL
 int
 sysctl_clockintr(int *name, u_int namelen, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen)
@@ -567,7 +565,7 @@ sysctl_clockintr(int *name, u_int namele
 	struct clockqueue *cq;
 	struct cpu_info *ci;
 	CPU_INFO_ITERATOR cii;
-	uint32_t gen;
+	unsigned int gen;
 
 	if (namelen != 1)
 		return ENOTDIR;
@@ -579,12 +577,12 @@ sysctl_clockintr(int *name, u_int namele
 			cq = &ci->ci_queue;
 			if (!ISSET(cq->cq_flags, CQ_INIT))
 				continue;
+
+			pc_cons_enter(&cq->cq_stat_lock, &gen);
 			do {
-				gen = cq->cq_gen;
-				membar_consumer();
 				tmp = cq->cq_stat;
-				membar_consumer();
-			} while (gen == 0 || gen != cq->cq_gen);
+			} while (pc_cons_leave(&cq->cq_stat_lock, &gen) != 0);
+
 			sum.cs_dispatched += tmp.cs_dispatched;
 			sum.cs_early += tmp.cs_early;
 			sum.cs_earliness += tmp.cs_earliness;
@@ -600,6 +598,7 @@ sysctl_clockintr(int *name, u_int namele
 
 	return EINVAL;
 }
+#endif /* SMALL_KERNEL */
 
 #ifdef DDB
 
Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.477
diff -u -p -r1.477 kern_sysctl.c
--- sys/kern/kern_sysctl.c	6 Jun 2025 17:06:23 -0000	1.477
+++ sys/kern/kern_sysctl.c	7 Jun 2025 03:44:40 -0000
@@ -406,7 +406,10 @@ kern_sysctl_dirs(int top_name, int *name
 #ifndef SMALL_KERNEL
 	case KERN_FILE:
 		return (sysctl_file(name, namelen, oldp, oldlenp, p));
-#endif
+	case KERN_CLOCKINTR:
+		return sysctl_clockintr(name, namelen, oldp, oldlenp, newp,
+		    newlen);
+#endif /* SMALL_KERNEL */
 	case KERN_MALLOCSTATS:
 		return (sysctl_malloc(name, namelen, oldp, oldlenp,
 		    newp, newlen, p));
@@ -503,9 +506,6 @@ kern_sysctl_dirs_locked(int top_name, in
 		return witness_sysctl(name, namelen, oldp, oldlenp,
 		    newp, newlen);
 #endif
-	case KERN_CLOCKINTR:
-		return sysctl_clockintr(name, namelen, oldp, oldlenp, newp,
-		    newlen);
 	default:
 		return (ENOTDIR);	/* overloaded */
 	}
Index: sys/sys/clockintr.h
===================================================================
RCS file: /cvs/src/sys/sys/clockintr.h,v
retrieving revision 1.29
diff -u -p -r1.29 clockintr.h
--- sys/sys/clockintr.h	25 Feb 2024 19:15:50 -0000	1.29
+++ sys/sys/clockintr.h	7 Jun 2025 03:44:40 -0000
@@ -34,6 +34,7 @@ struct clockintr_stat {
 
 #include <sys/mutex.h>
 #include <sys/queue.h>
+#include <sys/pclock.h>
 
 struct clockqueue;
 struct clockrequest;
@@ -94,6 +95,7 @@ struct clockrequest {
  *	I	Immutable after initialization.
  *	m	Per-queue mutex (cq_mtx).
  *	o	Owned by a single CPU.
+ *	p	cq_stat_lock
  */
 struct clockqueue {
 	struct clockrequest cq_request;	/* [o] callback request object */
@@ -104,8 +106,8 @@ struct clockqueue {
 	struct clockintr *cq_running;	/* [m] running clockintr */
 	struct clockintr cq_hardclock;	/* [o] hardclock handle */
 	struct intrclock cq_intrclock;	/* [I] local interrupt clock */
-	struct clockintr_stat cq_stat;	/* [o] dispatch statistics */
-	volatile uint32_t cq_gen;	/* [o] cq_stat update generation */ 
+	struct clockintr_stat cq_stat;	/* [p] dispatch statistics */
+	struct pc_lock cq_stat_lock;
 	volatile uint32_t cq_dispatch;	/* [o] dispatch is running */
 	uint32_t cq_flags;		/* [m] CQ_* flags; see below */
 };