From: Kirill A. Korinsky <kirill@korins.ky>
Subject: improvement of perfpolicy auto
To: OpenBSD tech <tech@openbsd.org>
Date: Sun, 27 Apr 2025 15:29:42 +0200

tech@,

here's an improvement to how perfpolicy auto works.

Short story: Instead of only using min and max speeds, it now utilizes
all supported by CPU speeds.

Long story: I've improved how idle time is computed. This change
switches to using p_tu.tu_runtime, which seems like the right way, at
least based on my understanding. This allows implementing something
similar to the concept behind Linux's ondemand: the CPU load within a
time window determines the target performance level. Since we don't have
per-CPU cpu_setperf, it uses the highest required frequency across all
CPUs as the target.

I've personally been using this approach since September, simetimes
polish it, and it works quite well. I'm seeing the CPU works at
different speeds, as expected.

Tests? Feedback? Ok?


Index: sys/kern/sched_bsd.c
===================================================================
RCS file: /home/cvs/src/sys/kern/sched_bsd.c,v
diff -u -p -r1.99 sched_bsd.c
--- sys/kern/sched_bsd.c	10 Mar 2025 09:28:56 -0000	1.99
+++ sys/kern/sched_bsd.c	27 Apr 2025 11:02:11 -0000
@@ -575,16 +575,21 @@ current_perfpolicy(void)
 	return (hw_power) ? perfpolicy_on_ac : perfpolicy_on_battery;
 }
 
+#define PERFPOL_AUTO_HW_POWER_HIGH	80
+#define PERFPOL_AUTO_AC_POWER_HIGH	95
+#define PERFPOL_AUTO_INTERVAL_MS	100
+
 void
 setperf_auto(void *v)
 {
-	static uint64_t *idleticks, *totalticks;
-	static int downbeats;
-	int i, j = 0;
-	int speedup = 0;
+	static struct timespec last, *idletimes;
+	int i = 0;
+	int speedup = 0, load = 0, speed = 0;
+	uint64_t idle, window;
 	CPU_INFO_ITERATOR cii;
 	struct cpu_info *ci;
-	uint64_t idle, total, allidle = 0, alltotal = 0;
+	struct timespec now, ts, its;
+	struct schedstate_percpu *spc;
 
 	if (!perfpolicy_dynamic())
 		return;
@@ -597,49 +602,68 @@ setperf_auto(void *v)
 		goto faster;
 	}
 
-	if (!idleticks)
-		if (!(idleticks = mallocarray(ncpusfound, sizeof(*idleticks),
+	if (!idletimes)
+		if (!(idletimes = mallocarray(ncpusfound, sizeof(*idletimes),
 		    M_DEVBUF, M_NOWAIT | M_ZERO)))
 			return;
-	if (!totalticks)
-		if (!(totalticks = mallocarray(ncpusfound, sizeof(*totalticks),
-		    M_DEVBUF, M_NOWAIT | M_ZERO))) {
-			free(idleticks, M_DEVBUF,
-			    sizeof(*idleticks) * ncpusfound);
-			return;
-		}
+
+	nanouptime(&now);
+	timespecsub(&now, &last, &ts);
+	last = now;
+	window = TIMESPEC_TO_NSEC(&ts) / 1000000ULL;
+
 	CPU_INFO_FOREACH(cii, ci) {
 		if (!cpu_is_online(ci))
 			continue;
-		total = 0;
-		for (i = 0; i < CPUSTATES; i++) {
-			total += ci->ci_schedstate.spc_cp_time[i];
-		}
-		total -= totalticks[j];
-		idle = ci->ci_schedstate.spc_cp_time[CP_IDLE] - idleticks[j];
-		if (idle < total / 3)
-			speedup = 1;
-		alltotal += total;
-		allidle += idle;
-		idleticks[j] += idle;
-		totalticks[j] += total;
-		j++;
+
+		spc = &ci->ci_schedstate;
+
+		ts = spc->spc_runtime;
+		its = spc->spc_idleproc->p_tu.tu_runtime;
+
+		if (ci->ci_curproc == spc->spc_idleproc &&
+		    timespeccmp(&ts, &spc->spc_runtime, ==) &&
+		    timespeccmp(&now, &ts, >))
+			timespecsub(&now, &ts, &ts);
+		else
+			timespecclear(&ts);
+
+		timespecsub(&its, &idletimes[i], &its);
+		timespecadd(&idletimes[i], &its, &idletimes[i]);
+
+		timespecadd(&ts, &its, &ts);
+
+		idle = TIMESPEC_TO_NSEC(&ts) / 1000000ULL;
+
+		if (idle <= 0)
+			speed = 100;
+
+		if (idle > window)
+			load = 0;
+		else
+			load = (100 * (window - idle)) / window;
+
+		if (load > speed)
+			speed = load;
+
+		i++;
 	}
-	if (allidle < alltotal / 2)
+
+	if (hw_power && speed >= PERFPOL_AUTO_HW_POWER_HIGH)
+		speedup = 1;
+	else if (speed >= PERFPOL_AUTO_AC_POWER_HIGH)
 		speedup = 1;
-	if (speedup && downbeats < 5)
-		downbeats++;
 
 	if (speedup && perflevel != 100) {
 faster:
 		perflevel = 100;
 		cpu_setperf(perflevel);
-	} else if (!speedup && perflevel != 0 && --downbeats <= 0) {
-		perflevel = 0;
+	} else if (!speedup && speed != perflevel) {
+		perflevel = speed;
 		cpu_setperf(perflevel);
 	}
 
-	timeout_add_msec(&setperf_to, 100);
+	timeout_add_msec(&setperf_to, PERFPOL_AUTO_INTERVAL_MS);
 }
 
 int