Download raw body.
powersave CPU policy
Hi,
On Sun, 05 May 2024 07:09:47 +0200,
"Lorenz (xha)" <me@xha.li> wrote:
>
> didn't actually review the diff completly, but maby more content
> in the manual page would be useful. anyways, small comment inline.
>
Agreed, I've added some details about the differnece between auto and
powersaving policies in sysctl.2.
>
> does the above check inforce some invariants? if not, could it be
> removed? this could be useful for servers to use less power, even
> if they're always running on AC.
Yes, defently.
I never thought about that use case, but I see that it can be run on
personal servers in basement to reduce the electry bill for example.
Additionally, it can be run on laptop instead of mixing with atuo mode.
So, here an updated version of diff which allows to switch a machine to the
powersaving mode by both ways: sysctl.conf and apmd.
diff --git lib/libc/sys/sysctl.2 lib/libc/sys/sysctl.2
index 78c69ff5813..f0f1b613a72 100644
--- lib/libc/sys/sysctl.2
+++ lib/libc/sys/sysctl.2
@@ -293,6 +293,7 @@ privileges may change the value.
.It Dv HW_PHYSMEM Ta "integer" Ta "no"
.It Dv HW_PHYSMEM64 Ta "int64_t" Ta "no"
.It Dv HW_POWER Ta "integer" Ta "no"
+.It Dv HW_POWERSAVELIMIT Ta "integer" Ta "yes"
.It Dv HW_PRODUCT Ta "string" Ta "no"
.It Dv HW_SENSORS Ta "node" Ta "not applicable"
.It Dv HW_SETPERF Ta "integer" Ta "yes"
@@ -384,8 +385,19 @@ The performance policy for power management.
Can be one of
.Dq manual ,
.Dq auto ,
+.Dq powersaving ,
or
.Dq high .
+.Pp
+The differences between
+.Dq auto
+and
+.Dq powersaving
+policies are that the latter trades off some system responsiveness for
+reduced power consumption. In addition, it has an upper limit on CPU
+performance that can be adjusted by
+.Dv HW_POWERSAVELIMIT ,
+and it offlines unused CPU cores.
.It Dv HW_PHYSMEM
The total physical memory, in bytes.
This variable is deprecated; use
@@ -395,6 +407,10 @@ instead.
The total physical memory, in bytes.
.It Dv HW_POWER Pq Va hw.power
Machine has wall-power.
+.It Dv HW_POWERSAVELIMIT Pq Va hw.powersavelimit
+An upper limit of CPU performance in
+.Dq powersaving
+policy.
.It Dv HW_PRODUCT Pq Va hw.product
The product name of the machine.
.It Dv HW_SENSORS Pq Va hw.sensors
diff --git sys/kern/kern_sysctl.c sys/kern/kern_sysctl.c
index 5d0d1a8a851..0c55bb3da67 100644
--- sys/kern/kern_sysctl.c
+++ sys/kern/kern_sysctl.c
@@ -733,6 +733,8 @@ hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
return (sysctl_hwsetperf(oldp, oldlenp, newp, newlen));
case HW_PERFPOLICY:
return (sysctl_hwperfpolicy(oldp, oldlenp, newp, newlen));
+ case HW_POWERSAVELIMIT:
+ return (sysctl_powersavelimit(oldp, oldlenp, newp, newlen));
#endif /* !SMALL_KERNEL */
case HW_VENDOR:
if (hw_vendor)
diff --git sys/kern/sched_bsd.c sys/kern/sched_bsd.c
index 25b221c1ee2..8e50cc421fd 100644
--- sys/kern/sched_bsd.c
+++ sys/kern/sched_bsd.c
@@ -65,8 +65,15 @@ void update_loadavg(void *);
void schedcpu(void *);
uint32_t decay_aftersleep(uint32_t, uint32_t);
+#ifndef SMALL_KERNEL
+extern struct cpuset sched_all_cpus;
+#endif
extern struct cpuset sched_idle_cpus;
+#ifdef __HAVE_CPU_TOPOLOGY
+extern int sched_smt;
+#endif
+
/*
* constants for averages over 1, 5, and 15 minutes when sampling at
* 5 second intervals.
@@ -573,8 +580,11 @@ void (*cpu_setperf)(int);
#define PERFPOL_MANUAL 0
#define PERFPOL_AUTO 1
#define PERFPOL_HIGH 2
+#define PERFPOL_POWERSAVING 4
int perflevel = 100;
int perfpolicy = PERFPOL_AUTO;
+/* avoid turbo frequency by default */
+int powersavelimit = 75;
#ifndef SMALL_KERNEL
/*
@@ -583,7 +593,9 @@ int perfpolicy = PERFPOL_AUTO;
#include <sys/sysctl.h>
void setperf_auto(void *);
+void setperf_powersaving(void *);
struct timeout setperf_to = TIMEOUT_INITIALIZER(setperf_auto, NULL);
+struct timeout setperf_to_powersaving = TIMEOUT_INITIALIZER(setperf_powersaving, NULL);
extern int hw_power;
void
@@ -653,6 +665,100 @@ faster:
timeout_add_msec(&setperf_to, 100);
}
+void
+setperf_powersaving(void *v)
+{
+ static uint64_t *idleticks, *totalticks;
+ static int downbeats;
+ int i, j = 0;
+ int speedup = 0;
+ CPU_INFO_ITERATOR cii;
+ struct cpu_info *ci, *firstoffline = NULL, *lastidle = NULL;
+ uint64_t idle, total, allidle = 0, alltotal = 0;
+
+ /* policy was changed, online all CPU cores */
+ if (perfpolicy != PERFPOL_POWERSAVING) {
+ CPU_INFO_FOREACH(cii, ci) {
+ if (!cpu_is_online(ci)) {
+#ifdef __HAVE_CPU_TOPOLOGY
+ if (!(sched_smt || ci->ci_smt_id == 0))
+ continue;
+#endif
+ atomic_clearbits_int(&ci->ci_schedstate.spc_schedflags,
+ SPCF_SHOULDHALT | SPCF_HALTED);
+ cpuset_add(&sched_all_cpus, ci);
+ need_resched(ci);
+ }
+ }
+ return;
+ }
+
+ if (!idleticks)
+ if (!(idleticks = mallocarray(ncpusfound, sizeof(*idleticks),
+ M_DEVBUF, M_NOWAIT | M_ZERO)))
+ return;
+ if (!totalticks)
+ if (!(totalticks = mallocarray(ncpusfound, sizeof(*totalticks),
+ M_DEVBUF, M_NOWAIT | M_ZERO))) {
+ free(idleticks, M_DEVBUF,
+ sizeof(*idleticks) * ncpusfound);
+ return;
+ }
+ CPU_INFO_FOREACH(cii, ci) {
+ if (!cpu_is_online(ci)) {
+ if (!firstoffline)
+#ifdef __HAVE_CPU_TOPOLOGY
+ if (sched_smt || ci->ci_smt_id == 0)
+#endif
+ firstoffline = ci;
+ continue;
+ }
+ total = 0;
+ for (i = 0; i < CPUSTATES; i++) {
+ total += ci->ci_schedstate.spc_cp_time[i];
+ }
+ total -= totalticks[j];
+ idle = ci->ci_schedstate.spc_cp_time[CP_IDLE] - idleticks[j];
+ if (idle < total / 3)
+ speedup = 1;
+ alltotal += total;
+ allidle += idle;
+ idleticks[j] += idle;
+ totalticks[j] += total;
+ /* it shoul keep at least one CPU online */
+ if (j++ && cpuset_isset(&sched_idle_cpus, ci))
+ lastidle = ci;
+ }
+ if (allidle < alltotal / 3)
+ speedup = 1;
+ if (speedup)
+ /* twice as long here because we check every 200ms */
+ downbeats = 1;
+
+ if (speedup && cpu_setperf && perflevel != powersavelimit) {
+ perflevel = powersavelimit;
+ cpu_setperf(perflevel);
+ } else if (speedup && firstoffline) {
+ atomic_clearbits_int(&firstoffline->ci_schedstate.spc_schedflags,
+ SPCF_SHOULDHALT | SPCF_HALTED);
+ cpuset_add(&sched_all_cpus, firstoffline);
+ need_resched(firstoffline);
+ } else if (!speedup && cpu_setperf && perflevel != 0 && --downbeats <= 0) {
+ perflevel = 0;
+ cpu_setperf(perflevel);
+ } else if (!speedup && lastidle) {
+ cpuset_del(&sched_all_cpus, lastidle);
+ atomic_setbits_int(&lastidle->ci_schedstate.spc_schedflags,
+ SPCF_SHOULDHALT);
+ need_resched(lastidle);
+ }
+
+ /* every 200ms to have a better resolution of the load */
+ timeout_add_msec(&setperf_to_powersaving, 200);
+ return;
+}
+
+
int
sysctl_hwsetperf(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
{
@@ -691,6 +797,9 @@ sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
case PERFPOL_AUTO:
strlcpy(policy, "auto", sizeof(policy));
break;
+ case PERFPOL_POWERSAVING:
+ strlcpy(policy, "powersaving", sizeof(policy));
+ break;
case PERFPOL_HIGH:
strlcpy(policy, "high", sizeof(policy));
break;
@@ -709,6 +818,8 @@ sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
perfpolicy = PERFPOL_MANUAL;
else if (strcmp(policy, "auto") == 0)
perfpolicy = PERFPOL_AUTO;
+ else if (strcmp(policy, "powersaving") == 0)
+ perfpolicy = PERFPOL_POWERSAVING;
else if (strcmp(policy, "high") == 0)
perfpolicy = PERFPOL_HIGH;
else
@@ -716,12 +827,24 @@ sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
if (perfpolicy == PERFPOL_AUTO) {
timeout_add_msec(&setperf_to, 200);
+ } else if (perfpolicy == PERFPOL_POWERSAVING) {
+ timeout_add_msec(&setperf_to_powersaving, 200);
} else if (perfpolicy == PERFPOL_HIGH) {
perflevel = 100;
cpu_setperf(perflevel);
}
return 0;
}
+
+int
+sysctl_powersavelimit(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+ if (!cpu_setperf)
+ return EOPNOTSUPP;
+
+ return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
+ &powersavelimit, 0, 100);
+}
#endif
/*
diff --git sys/sys/sched.h sys/sys/sched.h
index ac6dad2a8bb..a54f69a8233 100644
--- sys/sys/sched.h
+++ sys/sys/sched.h
@@ -173,6 +173,7 @@ void sched_barrier(struct cpu_info *ci);
int sysctl_hwsetperf(void *, size_t *, void *, size_t);
int sysctl_hwperfpolicy(void *, size_t *, void *, size_t);
+int sysctl_powersavelimit(void *, size_t *, void *, size_t);
int sysctl_hwsmt(void *, size_t *, void *, size_t);
int sysctl_hwncpuonline(void);
diff --git sys/sys/sysctl.h sys/sys/sysctl.h
index e695f1d95cd..4b8ec5d2487 100644
--- sys/sys/sysctl.h
+++ sys/sys/sysctl.h
@@ -950,6 +950,7 @@ struct kinfo_file {
#define HW_POWER 26 /* int: machine has wall-power */
#define HW_BATTERY 27 /* node: battery */
#define HW_UCOMNAMES 28 /* strings: ucom names */
+#define HW_POWERSAVELIMIT 29 /* set the limit for powersave policy */
#define HW_MAXID 30 /* number of valid hw ids */
#define CTL_HW_NAMES { \
@@ -982,6 +983,7 @@ struct kinfo_file {
{ "power", CTLTYPE_INT }, \
{ "battery", CTLTYPE_NODE }, \
{ "ucomnames", CTLTYPE_STRING }, \
+ { "powersavelimit", CTLTYPE_INT }, \
}
/*
diff --git usr.sbin/apmd/apm-proto.h usr.sbin/apmd/apm-proto.h
index 867d0afbd70..166618e996f 100644
--- usr.sbin/apmd/apm-proto.h
+++ usr.sbin/apmd/apm-proto.h
@@ -38,6 +38,7 @@ enum apm_action {
SETPERF_LOW,
SETPERF_HIGH,
SETPERF_AUTO,
+ SETPERF_POWERSAVING,
};
enum apm_state {
@@ -51,6 +52,7 @@ enum apm_perfmode {
PERF_NONE = -1,
PERF_MANUAL,
PERF_AUTO,
+ PERF_POWERSAVING,
};
struct apm_command {
diff --git usr.sbin/apmd/apmd.8 usr.sbin/apmd/apmd.8
index 7c02bb10c8a..8c54efc1a69 100644
--- usr.sbin/apmd/apmd.8
+++ usr.sbin/apmd/apmd.8
@@ -34,7 +34,7 @@
.Nd Advanced Power Management daemon
.Sh SYNOPSIS
.Nm apmd
-.Op Fl AadHLs
+.Op Fl AadHLPs
.Op Fl f Ar devname
.Op Fl S Ar sockname
.Op Fl t Ar seconds
@@ -94,6 +94,12 @@ Start
in manual performance adjustment mode, initialising
.Va hw.setperf
to 0.
+.It Fl P
+Start
+.Nm
+in powersaving mode. This flag can be used with
+.Fl A
+to use auto mode on AC power, and powersaving on battery.
.It Fl S Ar sockname
Specify an alternate socket name,
.Ar sockname .
diff --git usr.sbin/apmd/apmd.c usr.sbin/apmd/apmd.c
index f29d5c9a081..af01acc7a0b 100644
--- usr.sbin/apmd/apmd.c
+++ usr.sbin/apmd/apmd.c
@@ -58,6 +58,7 @@
#define AUTO_HIBERNATE 2
int debug = 0;
+int use_powersaving = 0;
extern char *__progname;
@@ -100,7 +101,7 @@ void
usage(void)
{
fprintf(stderr,
- "usage: %s [-AadHLs] [-f devname] [-S sockname] [-t seconds] "
+ "usage: %s [-AadHLPs] [-f devname] [-S sockname] [-t seconds] "
"[-Z percent] [-z percent]\n", __progname);
exit(1);
}
@@ -139,6 +140,10 @@ power_status(int fd, int force, struct apm_power_info *pinfo)
static struct apm_power_info last;
int acon = 0, priority = LOG_NOTICE;
+ int perfpol_mib[] = { CTL_HW, HW_PERFPOLICY };
+ char perfpol[32];
+ size_t perfpol_sz = sizeof(perfpol);
+
if (fd == -1) {
if (pinfo) {
bstate.battery_state = 255;
@@ -151,11 +156,19 @@ power_status(int fd, int force, struct apm_power_info *pinfo)
return 0;
}
+ if (sysctl(perfpol_mib, 2, perfpol, &perfpol_sz, NULL, 0) == -1)
+ logmsg(LOG_INFO, "cannot read hw.perfpolicy");
+
if (ioctl(fd, APM_IOC_GETPOWER, &bstate) == 0) {
/* various conditions under which we report status: something changed
* enough since last report, or asked to force a print */
- if (bstate.ac_state == APM_AC_ON)
+ if (bstate.ac_state == APM_AC_ON) {
acon = 1;
+ if(use_powersaving && strcmp(perfpol, "powersaving") == 0)
+ setperfpolicy("auto");
+ } else if(use_powersaving && strcmp(perfpol, "auto") == 0)
+ setperfpolicy("powersaving");
+
if (bstate.battery_state == APM_BATT_CRITICAL &&
bstate.battery_state != last.battery_state)
priority = LOG_EMERG;
@@ -282,6 +295,11 @@ handle_client(int sock_fd, int ctl_fd)
logmsg(LOG_NOTICE, "setting hw.perfpolicy to high");
setperfpolicy("high");
break;
+ case SETPERF_POWERSAVING:
+ reply.newstate = NORMAL;
+ logmsg(LOG_NOTICE, "setting hw.perfpolicy to powersaving");
+ setperfpolicy("powersaving");
+ break;
case SETPERF_AUTO:
reply.newstate = NORMAL;
logmsg(LOG_NOTICE, "setting hw.perfpolicy to auto");
@@ -299,8 +317,10 @@ handle_client(int sock_fd, int ctl_fd)
if (strcmp(perfpol, "manual") == 0 ||
strcmp(perfpol, "high") == 0) {
reply.perfmode = PERF_MANUAL;
- } else if (strcmp(perfpol, "auto") == 0)
+ } else if (strcmp(perfpol, "auto") == 0) {
reply.perfmode = PERF_AUTO;
+ } else if (strcmp(perfpol, "powersaving") == 0)
+ reply.perfmode = PERF_POWERSAVING;
}
if (sysctl(cpuspeed_mib, 2, &cpuspeed, &cpuspeed_sz, NULL, 0) == -1) {
@@ -423,7 +443,7 @@ main(int argc, char *argv[])
struct kevent ev[2];
int doperf = PERF_NONE;
- while ((ch = getopt(argc, argv, "aACdHLsf:t:S:z:Z:")) != -1)
+ while ((ch = getopt(argc, argv, "aACdHLPsf:t:S:z:Z:")) != -1)
switch(ch) {
case 'a':
noacsleep = 1;
@@ -448,7 +468,9 @@ main(int argc, char *argv[])
break;
case 'A':
case 'C':
- if (doperf != PERF_NONE)
+ if (doperf == PERF_POWERSAVING)
+ use_powersaving = 1;
+ else if (doperf != PERF_NONE)
usage();
doperf = PERF_AUTO;
setperfpolicy("auto");
@@ -459,6 +481,15 @@ main(int argc, char *argv[])
doperf = PERF_MANUAL;
setperfpolicy("low");
break;
+ case 'P':
+ if (doperf == PERF_NONE) {
+ doperf = PERF_POWERSAVING;
+ setperfpolicy("powersaving");
+ } else if (doperf != PERF_AUTO)
+ usage();
+ else
+ use_powersaving = 1;
+ break;
case 'H':
if (doperf != PERF_NONE)
usage();
diff --git usr.sbin/apmd/apmsubr.c usr.sbin/apmd/apmsubr.c
index 6504fe823bb..a1dbcfb2c61 100644
--- usr.sbin/apmd/apmsubr.c
+++ usr.sbin/apmd/apmsubr.c
@@ -79,6 +79,8 @@ perf_mode(int mode)
return "manual";
case PERF_AUTO:
return "auto";
+ case PERF_POWERSAVING:
+ return "powersaving";
default:
return "invalid";
}
--
wbr, Kirill
powersave CPU policy