From: Kirill A. Korinsky Subject: improvement of perfpolicy auto To: OpenBSD tech Date: Sun, 27 Apr 2025 15:29:42 +0200 tech@, here's an improvement to how perfpolicy auto works. Short story: Instead of only using min and max speeds, it now utilizes all supported by CPU speeds. Long story: I've improved how idle time is computed. This change switches to using p_tu.tu_runtime, which seems like the right way, at least based on my understanding. This allows implementing something similar to the concept behind Linux's ondemand: the CPU load within a time window determines the target performance level. Since we don't have per-CPU cpu_setperf, it uses the highest required frequency across all CPUs as the target. I've personally been using this approach since September, simetimes polish it, and it works quite well. I'm seeing the CPU works at different speeds, as expected. Tests? Feedback? Ok? Index: sys/kern/sched_bsd.c =================================================================== RCS file: /home/cvs/src/sys/kern/sched_bsd.c,v diff -u -p -r1.99 sched_bsd.c --- sys/kern/sched_bsd.c 10 Mar 2025 09:28:56 -0000 1.99 +++ sys/kern/sched_bsd.c 27 Apr 2025 11:02:11 -0000 @@ -575,16 +575,21 @@ current_perfpolicy(void) return (hw_power) ? perfpolicy_on_ac : perfpolicy_on_battery; } +#define PERFPOL_AUTO_HW_POWER_HIGH 80 +#define PERFPOL_AUTO_AC_POWER_HIGH 95 +#define PERFPOL_AUTO_INTERVAL_MS 100 + void setperf_auto(void *v) { - static uint64_t *idleticks, *totalticks; - static int downbeats; - int i, j = 0; - int speedup = 0; + static struct timespec last, *idletimes; + int i = 0; + int speedup = 0, load = 0, speed = 0; + uint64_t idle, window; CPU_INFO_ITERATOR cii; struct cpu_info *ci; - uint64_t idle, total, allidle = 0, alltotal = 0; + struct timespec now, ts, its; + struct schedstate_percpu *spc; if (!perfpolicy_dynamic()) return; @@ -597,49 +602,68 @@ setperf_auto(void *v) goto faster; } - if (!idleticks) - if (!(idleticks = mallocarray(ncpusfound, sizeof(*idleticks), + if (!idletimes) + if (!(idletimes = mallocarray(ncpusfound, sizeof(*idletimes), M_DEVBUF, M_NOWAIT | M_ZERO))) return; - if (!totalticks) - if (!(totalticks = mallocarray(ncpusfound, sizeof(*totalticks), - M_DEVBUF, M_NOWAIT | M_ZERO))) { - free(idleticks, M_DEVBUF, - sizeof(*idleticks) * ncpusfound); - return; - } + + nanouptime(&now); + timespecsub(&now, &last, &ts); + last = now; + window = TIMESPEC_TO_NSEC(&ts) / 1000000ULL; + CPU_INFO_FOREACH(cii, ci) { if (!cpu_is_online(ci)) continue; - total = 0; - for (i = 0; i < CPUSTATES; i++) { - total += ci->ci_schedstate.spc_cp_time[i]; - } - total -= totalticks[j]; - idle = ci->ci_schedstate.spc_cp_time[CP_IDLE] - idleticks[j]; - if (idle < total / 3) - speedup = 1; - alltotal += total; - allidle += idle; - idleticks[j] += idle; - totalticks[j] += total; - j++; + + spc = &ci->ci_schedstate; + + ts = spc->spc_runtime; + its = spc->spc_idleproc->p_tu.tu_runtime; + + if (ci->ci_curproc == spc->spc_idleproc && + timespeccmp(&ts, &spc->spc_runtime, ==) && + timespeccmp(&now, &ts, >)) + timespecsub(&now, &ts, &ts); + else + timespecclear(&ts); + + timespecsub(&its, &idletimes[i], &its); + timespecadd(&idletimes[i], &its, &idletimes[i]); + + timespecadd(&ts, &its, &ts); + + idle = TIMESPEC_TO_NSEC(&ts) / 1000000ULL; + + if (idle <= 0) + speed = 100; + + if (idle > window) + load = 0; + else + load = (100 * (window - idle)) / window; + + if (load > speed) + speed = load; + + i++; } - if (allidle < alltotal / 2) + + if (hw_power && speed >= PERFPOL_AUTO_HW_POWER_HIGH) + speedup = 1; + else if (speed >= PERFPOL_AUTO_AC_POWER_HIGH) speedup = 1; - if (speedup && downbeats < 5) - downbeats++; if (speedup && perflevel != 100) { faster: perflevel = 100; cpu_setperf(perflevel); - } else if (!speedup && perflevel != 0 && --downbeats <= 0) { - perflevel = 0; + } else if (!speedup && speed != perflevel) { + perflevel = speed; cpu_setperf(perflevel); } - timeout_add_msec(&setperf_to, 100); + timeout_add_msec(&setperf_to, PERFPOL_AUTO_INTERVAL_MS); } int