Index | Thread | Search

From:
Kirill A. Korinsky <kirill@korins.ky>
Subject:
Re: sys/sysctl: make hw.setperf MP safe
To:
"Lorenz (xha)" <me@xha.li>
Cc:
tech@openbsd.org, mvs@openbsd.org
Date:
Wed, 21 May 2025 14:13:25 +0200

Download raw body.

Thread
On Wed, 21 May 2025 13:45:22 +0200,
"Lorenz (xha)" <me@xha.li> wrote:
>
> Hi Kirill,
>
> On Wed, May 21, 2025 at 12:37:39PM +0200, Kirill A. Korinsky wrote:
> > tech@,
> >
> > Here a diff which makes hw.setperf MP safe.
> >
> > Tested on -current/amd64.
> >
> > Feedback? Ok?
>
> i think this is wrong -- cpu_setperf() doesn't seem thread-safe on
> at least arm64. also, there are some races that could happen in
> setperf_auto with your diff (loading perflevel multiple times which
> could yield different values, not really the end of the world in
> that case but still).
>
> i would suggest using a mutex to protect setting hw.setperf?  it
> doesn't make much sense to set hw.setperf concurrently either way?
>

Well, setperf_auto is scheduled to run by TIMEOUT_INITIALIZER and it runs
all callback under kernel lock unless TIMEOUT_MPSAFE is used.

So, I doubt that any race condition can be triggered here.

But wrap cpu_setperf() inside sysctl_hwsetperf() make sence and here an
updated diff.

Thanks for catching it.


Index: sys/arch/amd64/amd64/est.c
===================================================================
RCS file: /home/cvs/src/sys/arch/amd64/amd64/est.c,v
diff -u -p -r1.42 est.c
--- sys/arch/amd64/amd64/est.c	12 Aug 2021 15:16:23 -0000	1.42
+++ sys/arch/amd64/amd64/est.c	21 May 2025 12:01:46 -0000
@@ -55,6 +55,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/atomic.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 
@@ -96,8 +97,13 @@ struct fqlist {
 
 static struct fqlist *est_fqlist;
 
+/*
+ * Locks used to protect data:
+ *	a	atomic
+ */
+
 extern int setperf_prio;
-extern int perflevel;
+extern int perflevel;			/* [a] */
 
 int bus_clock;
 
@@ -324,7 +330,7 @@ est_acpi_pss_changed(struct acpicpu_pss 
 	est_fqlist = acpilist;
 
 	if (needtran) {
-		est_setperf(perflevel);
+		est_setperf(atomic_load_int(&perflevel));
 	}
 }
 #endif
@@ -457,7 +463,7 @@ est_init(struct cpu_info *ci)
 	if (low == high)
 		goto nospeedstep;
 
-	perflevel = (cpuspeed - low) * 100 / (high - low);
+	atomic_store_int(&perflevel, (cpuspeed - low) * 100 / (high - low));
 
 	printf("%s: Enhanced SpeedStep %d MHz", cpu_device, cpuspeed);
 
Index: sys/arch/amd64/amd64/powernow-k8.c
===================================================================
RCS file: /home/cvs/src/sys/arch/amd64/amd64/powernow-k8.c,v
diff -u -p -r1.29 powernow-k8.c
--- sys/arch/amd64/amd64/powernow-k8.c	21 Feb 2022 08:16:08 -0000	1.29
+++ sys/arch/amd64/amd64/powernow-k8.c	21 May 2025 12:01:46 -0000
@@ -28,6 +28,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/atomic.h>
 #include <sys/malloc.h>
 #include <sys/sysctl.h>
 
@@ -47,8 +48,13 @@
 #define BIOS_START			0xe0000
 #define	BIOS_LEN			0x20000
 
+/*
+ * Locks used to protect data:
+ *	a	atomic
+ */
+
 extern int setperf_prio;
-extern int perflevel;
+extern int perflevel;			/* [a] */
 
 /*
  * MSRs and bits used by PowerNow technology
@@ -364,7 +370,7 @@ k8pnow_acpi_pss_changed(struct acpicpu_p
 		 * Our current operating state is not among
 		 * the ones found the new PSS.
 		 */
-		curs = ((perflevel * npss) + 1) / 101;
+		curs = ((atomic_load_int(&perflevel) * npss) + 1) / 101;
 		if (curs >= npss)
 			curs = npss - 1;
 		needtran = 1;
Index: sys/arch/i386/i386/apm.c
===================================================================
RCS file: /home/cvs/src/sys/arch/i386/i386/apm.c,v
diff -u -p -r1.133 apm.c
--- sys/arch/i386/i386/apm.c	7 Oct 2024 01:31:22 -0000	1.133
+++ sys/arch/i386/i386/apm.c	21 May 2025 12:01:46 -0000
@@ -38,6 +38,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/atomic.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/rwlock.h>
@@ -232,7 +233,12 @@ apm_perror(const char *str, struct apmre
 void
 apm_suspend(int state)
 {
-	extern int perflevel;
+	/*
+	 * Locks used to protect data:
+	 *	a	atomic
+	 */
+
+	extern int perflevel;		/* [a] */
 	int s;
 
 #if NWSDISPLAY > 0
@@ -286,7 +292,7 @@ apm_suspend(int state)
 
 	/* restore hw.setperf */
 	if (cpu_setperf != NULL)
-		cpu_setperf(perflevel);
+		cpu_setperf(atomic_load_int(&perflevel));
 }
 
 void
Index: sys/arch/i386/i386/powernow-k8.c
===================================================================
RCS file: /home/cvs/src/sys/arch/i386/i386/powernow-k8.c,v
diff -u -p -r1.35 powernow-k8.c
--- sys/arch/i386/i386/powernow-k8.c	30 Jan 2023 10:49:05 -0000	1.35
+++ sys/arch/i386/i386/powernow-k8.c	21 May 2025 12:01:46 -0000
@@ -138,7 +138,6 @@ struct pst_s {
 
 struct k8pnow_cpu_state *k8pnow_current_state = NULL;
 extern int setperf_prio;
-extern int perflevel;
 
 int k8pnow_read_pending_wait(uint64_t *);
 int k8pnow_decode_pst(struct k8pnow_cpu_state *, uint8_t *);
Index: sys/arch/macppc/dev/dfs.c
===================================================================
RCS file: /home/cvs/src/sys/arch/macppc/dev/dfs.c,v
diff -u -p -r1.4 dfs.c
--- sys/arch/macppc/dev/dfs.c	13 Mar 2022 12:33:01 -0000	1.4
+++ sys/arch/macppc/dev/dfs.c	21 May 2025 12:01:46 -0000
@@ -17,6 +17,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/atomic.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 
@@ -27,7 +28,12 @@
 #include <macppc/pci/macobio.h>
 #include <powerpc/hid.h>
 
-extern int perflevel;
+/*
+ * Locks used to protect data:
+ *	a	atomic
+ */
+
+extern int perflevel;			/* [a] */
 
 struct dfs_softc {
 	struct device	sc_dev;
@@ -85,10 +91,10 @@ dfs_attach(struct device *parent, struct
 
 	if (hid1 & HID1_DFS4) {
 		ppc_curfreq = ppc_maxfreq / 4;
-		perflevel = 25;
+		atomic_store_int(&perflevel, 25);
 	} else if (hid1 & HID1_DFS2) {
 		ppc_curfreq = ppc_maxfreq / 2;
-		perflevel = 50;
+		atomic_store_int(&perflevel, 50);
 	}
 
 	cpu_setperf = dfs_setperf;
Index: sys/arch/macppc/macppc/cpu.c
===================================================================
RCS file: /home/cvs/src/sys/arch/macppc/macppc/cpu.c,v
diff -u -p -r1.90 cpu.c
--- sys/arch/macppc/macppc/cpu.c	24 Oct 2024 17:37:06 -0000	1.90
+++ sys/arch/macppc/macppc/cpu.c	21 May 2025 12:01:46 -0000
@@ -35,6 +35,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/atomic.h>
 #include <sys/proc.h>
 #include <sys/sysctl.h>
 #include <sys/task.h>
@@ -154,7 +155,7 @@ ppc64_scale_frequency(u_int freq_scale)
 	ppc_intr_enable(s);
 }
 
-extern int perflevel;
+extern int perflevel;			/* [a] */
 
 struct task ppc64_setperf_task;
 int ppc64_perflevel;
Index: sys/arch/powerpc64/dev/opal.c
===================================================================
RCS file: /home/cvs/src/sys/arch/powerpc64/dev/opal.c,v
diff -u -p -r1.14 opal.c
--- sys/arch/powerpc64/dev/opal.c	12 Oct 2022 13:39:50 -0000	1.14
+++ sys/arch/powerpc64/dev/opal.c	21 May 2025 12:01:46 -0000
@@ -17,6 +17,7 @@
 
 #include <sys/param.h>
 #include <sys/device.h>
+#include <sys/atomic.h>
 #include <sys/malloc.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
@@ -79,7 +80,12 @@ int	opal_settime(struct todr_chip_handle
 void	opal_configure_idle_states(struct opal_softc *, int);
 void	opal_found_stop_state(struct opal_softc *, uint64_t);
 
-extern int perflevel;
+/*
+ * Locks used to protect data:
+ *	a	atomic
+ */
+
+extern int perflevel;			/* [a] */
 
 void	opalpm_init(struct opal_softc *, int);
 int	opalpm_find_index(struct opal_softc *);
@@ -437,7 +443,8 @@ opalpm_init(struct opal_softc *sc, int n
 	OF_getprop(node, "ibm,pstate-frequencies-mhz", sc->sc_freq, len);
 
 	if ((i = opalpm_find_index(sc)) != -1)
-		perflevel = (sc->sc_npstate - 1 - i) * 100 / sc->sc_npstate;
+		atomic_store_int(&perflevel,
+		    sc->sc_npstate - 1 - i) * 100 / sc->sc_npstate);
 	cpu_cpuspeed = opalpm_cpuspeed;
 #ifndef MULTIPROCESSOR
 	cpu_setperf = opalpm_setperf;
Index: sys/arch/riscv64/riscv64/cpu.c
===================================================================
RCS file: /home/cvs/src/sys/arch/riscv64/riscv64/cpu.c,v
diff -u -p -r1.21 cpu.c
--- sys/arch/riscv64/riscv64/cpu.c	10 Nov 2024 06:51:59 -0000	1.21
+++ sys/arch/riscv64/riscv64/cpu.c	21 May 2025 12:01:46 -0000
@@ -495,7 +495,12 @@ cpu_unidle(struct cpu_info *ci)
  * Dynamic voltage and frequency scaling implementation.
  */
 
-extern int perflevel;
+/*
+ * Locks used to protect data:
+ *	a	atomic
+ */
+
+extern int perflevel;			/* [a] */
 
 struct opp {
 	uint64_t opp_hz;
@@ -691,8 +696,8 @@ cpu_opp_mountroot(struct device *self)
 		task_set(&cpu_opp_task, cpu_opp_dotask, NULL);
 		cpu_setperf = cpu_opp_setperf;
 
-		perflevel = (level > 0) ? level : 0;
-		cpu_setperf(perflevel);
+		atomic_store_int(&perflevel, (level > 0) ? level : 0);
+		cpu_setperf(atomic_load_int(&perflevel));
 	}
 }
 
Index: sys/dev/acpi/acpitz.c
===================================================================
RCS file: /home/cvs/src/sys/dev/acpi/acpitz.c,v
diff -u -p -r1.61 acpitz.c
--- sys/dev/acpi/acpitz.c	5 Feb 2025 11:03:36 -0000	1.61
+++ sys/dev/acpi/acpitz.c	21 May 2025 12:01:46 -0000
@@ -21,6 +21,7 @@
 #include <sys/signalvar.h>
 #include <sys/systm.h>
 #include <sys/device.h>
+#include <sys/atomic.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
@@ -88,7 +89,13 @@ void	acpitz_init(struct acpitz_softc *, 
 void		(*acpitz_cpu_setperf)(int);
 int		acpitz_perflevel = -1;
 extern void	(*cpu_setperf)(int);
-extern int	perflevel;
+
+/*
+ * Locks used to protect data:
+ *	a	atomic
+ */
+
+extern int	perflevel;		/* [a] */
 #define PERFSTEP 10
 
 #define ACPITZ_TRIPS	(1L << 0)
@@ -99,7 +106,7 @@ void
 acpitz_init_perf(void *arg)
 {
 	if (acpitz_perflevel == -1)
-		acpitz_perflevel = perflevel;
+		acpitz_perflevel = atomic_load_int(&perflevel);
 
 	if (cpu_setperf != acpitz_setperf) {
 		acpitz_cpu_setperf = cpu_setperf;
@@ -412,7 +419,7 @@ acpitz_refresh(void *arg)
 
 		/* clamp passive cooling request */
 		if (nperf > perflevel)
-			nperf = perflevel;
+			nperf = atomic_load_int(&perflevel);
 
 		/* Perform CPU setperf */
 		if (acpitz_cpu_setperf && nperf != acpitz_perflevel) {
Index: sys/kern/kern_sysctl.c
===================================================================
RCS file: /home/cvs/src/sys/kern/kern_sysctl.c,v
diff -u -p -r1.468 kern_sysctl.c
--- sys/kern/kern_sysctl.c	9 May 2025 14:53:22 -0000	1.468
+++ sys/kern/kern_sysctl.c	21 May 2025 12:01:46 -0000
@@ -849,7 +849,6 @@ hw_sysctl(int *name, u_int namelen, void
 	case HW_CPUSPEED:
 #ifndef	SMALL_KERNEL
 	case HW_SENSORS:
-	case HW_SETPERF:
 	case HW_PERFPOLICY:
 	case HW_BATTERY:
 #endif /* !SMALL_KERNEL */
@@ -867,6 +866,10 @@ hw_sysctl(int *name, u_int namelen, void
 		sysctl_vsunlock(oldp, savelen);
 		return (err);
 	}
+#ifndef	SMALL_KERNEL
+	case HW_SETPERF:
+		return (sysctl_hwsetperf(oldp, oldlenp, newp, newlen));
+#endif /* !SMALL_KERNEL */
 	case HW_VENDOR:
 		if (hw_vendor)
 			return (sysctl_rdstring(oldp, oldlenp, newp,
@@ -940,8 +943,6 @@ hw_sysctl_locked(int *name, u_int namele
 	case HW_SENSORS:
 		return (sysctl_sensors(name + 1, namelen - 1, oldp, oldlenp,
 		    newp, newlen));
-	case HW_SETPERF:
-		return (sysctl_hwsetperf(oldp, oldlenp, newp, newlen));
 	case HW_PERFPOLICY:
 		return (sysctl_hwperfpolicy(oldp, oldlenp, newp, newlen));
 #endif /* !SMALL_KERNEL */
Index: sys/kern/sched_bsd.c
===================================================================
RCS file: /home/cvs/src/sys/kern/sched_bsd.c,v
diff -u -p -r1.99 sched_bsd.c
--- sys/kern/sched_bsd.c	10 Mar 2025 09:28:56 -0000	1.99
+++ sys/kern/sched_bsd.c	21 May 2025 12:03:15 -0000
@@ -548,7 +548,13 @@ void (*cpu_setperf)(int);
 #define PERFPOL_MANUAL 0
 #define PERFPOL_AUTO 1
 #define PERFPOL_HIGH 2
-int perflevel = 100;
+
+/*
+ * Locks used to protect data:
+ *	a	atomic
+ */
+
+int perflevel = 100;			/* [a] */
 int perfpolicy_on_ac = PERFPOL_HIGH;
 int perfpolicy_on_battery = PERFPOL_AUTO;
 
@@ -630,13 +636,14 @@ setperf_auto(void *v)
 	if (speedup && downbeats < 5)
 		downbeats++;
 
-	if (speedup && perflevel != 100) {
+	if (speedup && atomic_load_int(&perflevel) != 100) {
 faster:
-		perflevel = 100;
-		cpu_setperf(perflevel);
-	} else if (!speedup && perflevel != 0 && --downbeats <= 0) {
-		perflevel = 0;
-		cpu_setperf(perflevel);
+		atomic_store_int(&perflevel, 100);
+		cpu_setperf(atomic_load_int(&perflevel));
+	} else if (!speedup && atomic_load_int(&perflevel) != 0 &&
+	    --downbeats <= 0) {
+		atomic_store_int(&perflevel, 0);
+		cpu_setperf(atomic_load_int(&perflevel));
 	}
 
 	timeout_add_msec(&setperf_to, 100);
@@ -658,8 +665,11 @@ sysctl_hwsetperf(void *oldp, size_t *old
 	if (err)
 		return err;
 
-	if (newp != NULL)
-		cpu_setperf(perflevel);
+	if (newp != NULL) {
+		KERNEL_LOCK();
+		cpu_setperf(atomic_load_int(&perflevel));
+		KERNEL_UNLOCK();
+	}
 
 	return 0;
 }
@@ -729,8 +739,8 @@ sysctl_hwperfpolicy(void *oldp, size_t *
 	}
 
 	if (current_perfpolicy() == PERFPOL_HIGH) {
-		perflevel = 100;
-		cpu_setperf(perflevel);
+		atomic_store_int(&perflevel, 100);
+		cpu_setperf(atomic_load_int(&perflevel));
 	}
 
 	if (perfpolicy_dynamic())
Index: sys/kern/subr_suspend.c
===================================================================
RCS file: /home/cvs/src/sys/kern/subr_suspend.c,v
diff -u -p -r1.18 subr_suspend.c
--- sys/kern/subr_suspend.c	28 May 2024 09:40:40 -0000	1.18
+++ sys/kern/subr_suspend.c	21 May 2025 12:01:46 -0000
@@ -18,6 +18,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/atomic.h>
 #include <sys/buf.h>
 #include <sys/clockintr.h>
 #include <sys/reboot.h>
@@ -48,8 +49,13 @@ device_register_wakeup(struct device *de
 int
 sleep_state(void *v, int sleepmode)
 {
+	/*
+	 * Locks used to protect data:
+	 *	a	atomic
+	 */
+
 	int error, s;
-	extern int perflevel;
+	extern int perflevel;		/* [a] */
 	size_t rndbuflen;
 	char *rndbuf;
 #ifdef GPROF
@@ -210,7 +216,8 @@ fail_hiballoc:
 #endif
 	sys_sync(curproc, NULL, NULL);
 	if (cpu_setperf != NULL)
-		cpu_setperf(perflevel);	/* Restore hw.setperf */
+		/* Restore hw.setperf */
+		cpu_setperf(atomic_load_int(&perflevel));
 	if (suspend_finish(v) == EAGAIN)
 		goto top;
 	return (error);