Download raw body.
powersave CPU policy
tech@,
Suggested patch introduced powersaving cpu policies that trade off some of
the system's responsibilities for battery life. In my use case it allows to
go from ~2 to ~3 hours on the journey 100% -> 15%.
This feature only works on battery and when ampd is started with the -A and
-P flags. Default settings and AC power are not affected.
This patch dynamically removes CPU cores when the system is idle to prevent
the scheduler from using them for any task, i.e. assuming the hardware puts
unused CPU to sleep to save some power.
It also uses an upper limit on CPU frequency, which can be controlled by the
hw.powersavelimit at sysctl. I need this to prevent the CPU from running in
turbo mode, which consumes a lot of power. So the default setting is 75,
which is kind a guess, but which is good for the i5-10210U which I'm using.
I have been using this patch for the last few days and it works well.
Kernel + includes + sbin/sysctl + usr.sbin/apm{,d} recompilation required
Thus, this work is based on Solene's patch which she announced at
https://marc.info/?l=openbsd-tech&m=163259444331471&w=2
diff --git lib/libc/sys/sysctl.2 lib/libc/sys/sysctl.2
index 78c69ff5813..a3d03a3361a 100644
--- lib/libc/sys/sysctl.2
+++ lib/libc/sys/sysctl.2
@@ -293,6 +293,7 @@ privileges may change the value.
.It Dv HW_PHYSMEM Ta "integer" Ta "no"
.It Dv HW_PHYSMEM64 Ta "int64_t" Ta "no"
.It Dv HW_POWER Ta "integer" Ta "no"
+.It Dv HW_POWERSAVELIMIT Ta "integer" Ta "yes"
.It Dv HW_PRODUCT Ta "string" Ta "no"
.It Dv HW_SENSORS Ta "node" Ta "not applicable"
.It Dv HW_SETPERF Ta "integer" Ta "yes"
@@ -395,6 +396,8 @@ instead.
The total physical memory, in bytes.
.It Dv HW_POWER Pq Va hw.power
Machine has wall-power.
+.It Dv HW_POWERSAVELIMIT Pq Va hw.powersavelimit
+Upper limit of CPU performance in powersave policy.
.It Dv HW_PRODUCT Pq Va hw.product
The product name of the machine.
.It Dv HW_SENSORS Pq Va hw.sensors
diff --git sys/kern/kern_sysctl.c sys/kern/kern_sysctl.c
index 5d0d1a8a851..0c55bb3da67 100644
--- sys/kern/kern_sysctl.c
+++ sys/kern/kern_sysctl.c
@@ -733,6 +733,8 @@ hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
return (sysctl_hwsetperf(oldp, oldlenp, newp, newlen));
case HW_PERFPOLICY:
return (sysctl_hwperfpolicy(oldp, oldlenp, newp, newlen));
+ case HW_POWERSAVELIMIT:
+ return (sysctl_powersavelimit(oldp, oldlenp, newp, newlen));
#endif /* !SMALL_KERNEL */
case HW_VENDOR:
if (hw_vendor)
diff --git sys/kern/sched_bsd.c sys/kern/sched_bsd.c
index 25b221c1ee2..89c508cd6da 100644
--- sys/kern/sched_bsd.c
+++ sys/kern/sched_bsd.c
@@ -65,8 +65,13 @@ void update_loadavg(void *);
void schedcpu(void *);
uint32_t decay_aftersleep(uint32_t, uint32_t);
+extern struct cpuset sched_all_cpus;
extern struct cpuset sched_idle_cpus;
+#ifdef __HAVE_CPU_TOPOLOGY
+extern int sched_smt;
+#endif
+
/*
* constants for averages over 1, 5, and 15 minutes when sampling at
* 5 second intervals.
@@ -573,8 +578,11 @@ void (*cpu_setperf)(int);
#define PERFPOL_MANUAL 0
#define PERFPOL_AUTO 1
#define PERFPOL_HIGH 2
+#define PERFPOL_POWERSAVING 4
int perflevel = 100;
int perfpolicy = PERFPOL_AUTO;
+/* avoid turbo mode by default */
+int powersavelimit = 75;
#ifndef SMALL_KERNEL
/*
@@ -583,7 +591,9 @@ int perfpolicy = PERFPOL_AUTO;
#include <sys/sysctl.h>
void setperf_auto(void *);
+void setperf_powersaving(void *);
struct timeout setperf_to = TIMEOUT_INITIALIZER(setperf_auto, NULL);
+struct timeout setperf_to_powersaving = TIMEOUT_INITIALIZER(setperf_powersaving, NULL);
extern int hw_power;
void
@@ -653,6 +663,106 @@ faster:
timeout_add_msec(&setperf_to, 100);
}
+void
+setperf_powersaving(void *v)
+{
+ static uint64_t *idleticks, *totalticks;
+ static int downbeats;
+ int i, j = 0;
+ int speedup = 0;
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci, *firstoffline = NULL, *lastidle = NULL;
+ uint64_t idle, total, allidle = 0, alltotal = 0;
+
+ if (perfpolicy != PERFPOL_POWERSAVING)
+ goto recover;
+
+ if (cpu_setperf == NULL)
+ goto recover;
+
+ if (hw_power)
+ goto recover;
+
+ if (!idleticks)
+ if (!(idleticks = mallocarray(ncpusfound, sizeof(*idleticks),
+ M_DEVBUF, M_NOWAIT | M_ZERO)))
+ return;
+ if (!totalticks)
+ if (!(totalticks = mallocarray(ncpusfound, sizeof(*totalticks),
+ M_DEVBUF, M_NOWAIT | M_ZERO))) {
+ free(idleticks, M_DEVBUF,
+ sizeof(*idleticks) * ncpusfound);
+ return;
+ }
+ CPU_INFO_FOREACH(cii, ci) {
+ if (!cpu_is_online(ci)) {
+ if (!firstoffline)
+#ifdef __HAVE_CPU_TOPOLOGY
+ if (sched_smt || ci->ci_smt_id == 0)
+#endif
+ firstoffline = ci;
+ continue;
+ }
+ total = 0;
+ for (i = 0; i < CPUSTATES; i++) {
+ total += ci->ci_schedstate.spc_cp_time[i];
+ }
+ total -= totalticks[j];
+ idle = ci->ci_schedstate.spc_cp_time[CP_IDLE] - idleticks[j];
+ if (idle < total / 3)
+ speedup = 1;
+ alltotal += total;
+ allidle += idle;
+ idleticks[j] += idle;
+ totalticks[j] += total;
+ /* it shoul keep at least one CPU online */
+ if (j++ && cpuset_isset(&sched_idle_cpus, ci))
+ lastidle = ci;
+ }
+ if (allidle < alltotal / 3)
+ speedup = 1;
+ if (speedup)
+ /* twice as long here because we check every 200ms */
+ downbeats = 1;
+
+ if (speedup && perflevel != powersavelimit) {
+ perflevel = powersavelimit;
+ cpu_setperf(perflevel);
+ } else if (speedup && firstoffline) {
+ atomic_clearbits_int(&firstoffline->ci_schedstate.spc_schedflags,
+ SPCF_SHOULDHALT | SPCF_HALTED);
+ cpuset_add(&sched_all_cpus, firstoffline);
+ need_resched(firstoffline);
+ } else if (!speedup && perflevel != 0 && --downbeats <= 0) {
+ perflevel = 0;
+ cpu_setperf(perflevel);
+ } else if (!speedup && lastidle) {
+ cpuset_del(&sched_all_cpus, lastidle);
+ atomic_setbits_int(&lastidle->ci_schedstate.spc_schedflags,
+ SPCF_SHOULDHALT);
+ need_resched(lastidle);
+ }
+
+ /* every 200ms to have a better resolution of the load */
+ timeout_add_msec(&setperf_to_powersaving, 200);
+ return;
+
+recover:
+ CPU_INFO_FOREACH(cii, ci) {
+ if (!cpu_is_online(ci)) {
+#ifdef __HAVE_CPU_TOPOLOGY
+ if (!(sched_smt || ci->ci_smt_id == 0))
+ continue;
+#endif
+ atomic_clearbits_int(&ci->ci_schedstate.spc_schedflags,
+ SPCF_SHOULDHALT | SPCF_HALTED);
+ cpuset_add(&sched_all_cpus, ci);
+ need_resched(ci);
+ }
+ }
+}
+
+
int
sysctl_hwsetperf(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
{
@@ -691,6 +801,9 @@ sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
case PERFPOL_AUTO:
strlcpy(policy, "auto", sizeof(policy));
break;
+ case PERFPOL_POWERSAVING:
+ strlcpy(policy, "powersaving", sizeof(policy));
+ break;
case PERFPOL_HIGH:
strlcpy(policy, "high", sizeof(policy));
break;
@@ -709,6 +822,8 @@ sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
perfpolicy = PERFPOL_MANUAL;
else if (strcmp(policy, "auto") == 0)
perfpolicy = PERFPOL_AUTO;
+ else if (strcmp(policy, "powersaving") == 0)
+ perfpolicy = PERFPOL_POWERSAVING;
else if (strcmp(policy, "high") == 0)
perfpolicy = PERFPOL_HIGH;
else
@@ -716,12 +831,24 @@ sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
if (perfpolicy == PERFPOL_AUTO) {
timeout_add_msec(&setperf_to, 200);
+ } else if (perfpolicy == PERFPOL_POWERSAVING) {
+ timeout_add_msec(&setperf_to_powersaving, 200);
} else if (perfpolicy == PERFPOL_HIGH) {
perflevel = 100;
cpu_setperf(perflevel);
}
return 0;
}
+
+int
+sysctl_powersavelimit(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+ if (!cpu_setperf)
+ return EOPNOTSUPP;
+
+ return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
+ &powersavelimit, 0, 100);
+}
#endif
/*
diff --git sys/sys/sched.h sys/sys/sched.h
index ac6dad2a8bb..a54f69a8233 100644
--- sys/sys/sched.h
+++ sys/sys/sched.h
@@ -173,6 +173,7 @@ void sched_barrier(struct cpu_info *ci);
int sysctl_hwsetperf(void *, size_t *, void *, size_t);
int sysctl_hwperfpolicy(void *, size_t *, void *, size_t);
+int sysctl_powersavelimit(void *, size_t *, void *, size_t);
int sysctl_hwsmt(void *, size_t *, void *, size_t);
int sysctl_hwncpuonline(void);
diff --git sys/sys/sysctl.h sys/sys/sysctl.h
index e695f1d95cd..4b8ec5d2487 100644
--- sys/sys/sysctl.h
+++ sys/sys/sysctl.h
@@ -950,6 +950,7 @@ struct kinfo_file {
#define HW_POWER 26 /* int: machine has wall-power */
#define HW_BATTERY 27 /* node: battery */
#define HW_UCOMNAMES 28 /* strings: ucom names */
+#define HW_POWERSAVELIMIT 29 /* set the limit for powersave policy */
#define HW_MAXID 30 /* number of valid hw ids */
#define CTL_HW_NAMES { \
@@ -982,6 +983,7 @@ struct kinfo_file {
{ "power", CTLTYPE_INT }, \
{ "battery", CTLTYPE_NODE }, \
{ "ucomnames", CTLTYPE_STRING }, \
+ { "powersavelimit", CTLTYPE_INT }, \
}
/*
diff --git usr.sbin/apmd/apm-proto.h usr.sbin/apmd/apm-proto.h
index 867d0afbd70..166618e996f 100644
--- usr.sbin/apmd/apm-proto.h
+++ usr.sbin/apmd/apm-proto.h
@@ -38,6 +38,7 @@ enum apm_action {
SETPERF_LOW,
SETPERF_HIGH,
SETPERF_AUTO,
+ SETPERF_POWERSAVING,
};
enum apm_state {
@@ -51,6 +52,7 @@ enum apm_perfmode {
PERF_NONE = -1,
PERF_MANUAL,
PERF_AUTO,
+ PERF_POWERSAVING,
};
struct apm_command {
diff --git usr.sbin/apmd/apmd.8 usr.sbin/apmd/apmd.8
index 7c02bb10c8a..4647ed3e126 100644
--- usr.sbin/apmd/apmd.8
+++ usr.sbin/apmd/apmd.8
@@ -34,7 +34,7 @@
.Nd Advanced Power Management daemon
.Sh SYNOPSIS
.Nm apmd
-.Op Fl AadHLs
+.Op Fl AadHLPs
.Op Fl f Ar devname
.Op Fl S Ar sockname
.Op Fl t Ar seconds
@@ -94,6 +94,9 @@ Start
in manual performance adjustment mode, initialising
.Va hw.setperf
to 0.
+.It Fl P
+.Nm
+in auto mode use powersaving policy when on battery.
.It Fl S Ar sockname
Specify an alternate socket name,
.Ar sockname .
diff --git usr.sbin/apmd/apmd.c usr.sbin/apmd/apmd.c
index f29d5c9a081..b6d8c5e8aa6 100644
--- usr.sbin/apmd/apmd.c
+++ usr.sbin/apmd/apmd.c
@@ -58,6 +58,7 @@
#define AUTO_HIBERNATE 2
int debug = 0;
+int use_powersaving = 0;
extern char *__progname;
@@ -100,7 +101,7 @@ void
usage(void)
{
fprintf(stderr,
- "usage: %s [-AadHLs] [-f devname] [-S sockname] [-t seconds] "
+ "usage: %s [-AadHLPs] [-f devname] [-S sockname] [-t seconds] "
"[-Z percent] [-z percent]\n", __progname);
exit(1);
}
@@ -139,6 +140,10 @@ power_status(int fd, int force, struct apm_power_info *pinfo)
static struct apm_power_info last;
int acon = 0, priority = LOG_NOTICE;
+ int perfpol_mib[] = { CTL_HW, HW_PERFPOLICY };
+ char perfpol[32];
+ size_t perfpol_sz = sizeof(perfpol);
+
if (fd == -1) {
if (pinfo) {
bstate.battery_state = 255;
@@ -151,11 +156,19 @@ power_status(int fd, int force, struct apm_power_info *pinfo)
return 0;
}
+ if (sysctl(perfpol_mib, 2, perfpol, &perfpol_sz, NULL, 0) == -1)
+ logmsg(LOG_INFO, "cannot read hw.perfpolicy");
+
if (ioctl(fd, APM_IOC_GETPOWER, &bstate) == 0) {
/* various conditions under which we report status: something changed
* enough since last report, or asked to force a print */
- if (bstate.ac_state == APM_AC_ON)
+ if (bstate.ac_state == APM_AC_ON) {
acon = 1;
+ if(use_powersaving && strcmp(perfpol, "powersaving") == 0)
+ setperfpolicy("auto");
+ } else if(use_powersaving && strcmp(perfpol, "auto") == 0)
+ setperfpolicy("powersaving");
+
if (bstate.battery_state == APM_BATT_CRITICAL &&
bstate.battery_state != last.battery_state)
priority = LOG_EMERG;
@@ -282,6 +295,11 @@ handle_client(int sock_fd, int ctl_fd)
logmsg(LOG_NOTICE, "setting hw.perfpolicy to high");
setperfpolicy("high");
break;
+ case SETPERF_POWERSAVING:
+ reply.newstate = NORMAL;
+ logmsg(LOG_NOTICE, "setting hw.perfpolicy to powersaving");
+ setperfpolicy("powersaving");
+ break;
case SETPERF_AUTO:
reply.newstate = NORMAL;
logmsg(LOG_NOTICE, "setting hw.perfpolicy to auto");
@@ -299,8 +317,10 @@ handle_client(int sock_fd, int ctl_fd)
if (strcmp(perfpol, "manual") == 0 ||
strcmp(perfpol, "high") == 0) {
reply.perfmode = PERF_MANUAL;
- } else if (strcmp(perfpol, "auto") == 0)
+ } else if (strcmp(perfpol, "auto") == 0) {
reply.perfmode = PERF_AUTO;
+ } else if (strcmp(perfpol, "powersaving") == 0)
+ reply.perfmode = PERF_POWERSAVING;
}
if (sysctl(cpuspeed_mib, 2, &cpuspeed, &cpuspeed_sz, NULL, 0) == -1) {
@@ -423,7 +443,7 @@ main(int argc, char *argv[])
struct kevent ev[2];
int doperf = PERF_NONE;
- while ((ch = getopt(argc, argv, "aACdHLsf:t:S:z:Z:")) != -1)
+ while ((ch = getopt(argc, argv, "aACdHLPsf:t:S:z:Z:")) != -1)
switch(ch) {
case 'a':
noacsleep = 1;
@@ -459,6 +479,9 @@ main(int argc, char *argv[])
doperf = PERF_MANUAL;
setperfpolicy("low");
break;
+ case 'P':
+ use_powersaving = 1;
+ break;
case 'H':
if (doperf != PERF_NONE)
usage();
diff --git usr.sbin/apmd/apmsubr.c usr.sbin/apmd/apmsubr.c
index 6504fe823bb..a1dbcfb2c61 100644
--- usr.sbin/apmd/apmsubr.c
+++ usr.sbin/apmd/apmsubr.c
@@ -79,6 +79,8 @@ perf_mode(int mode)
return "manual";
case PERF_AUTO:
return "auto";
+ case PERF_POWERSAVING:
+ return "powersaving";
default:
return "invalid";
}
--
wbr, Kirill
powersave CPU policy