Index | Thread | Search

From:
Kirill A. Korinsky <kirill@korins.ky>
Subject:
Re: powersave CPU policy
To:
Philip Guenther <guenther@gmail.com>
Cc:
"Lorenz (xha)" <me@xha.li>, OpenBSD tech <tech@openbsd.org>
Date:
Mon, 03 Jun 2024 13:18:38 +0100

Download raw body.

Thread
On Wed, 15 May 2024 08:19:12 +0100,
Philip Guenther <guenther@gmail.com> wrote:
> 
> acpicpi_idle() has nothing to tell it why the cpu is being idle so it will
> use the same logic for this as it does for when a CPU goes idle because
> your web browser is waiting for a response from a web-server...which is
> just a really simple (aka dumb) decaying average of how long it stayed idle
> when this function was previously called compared to the C-state latencies,
> such that it was idling longer recently then it'll idle deeper now.
> 
> (With dynamic clock interrupts, that may actually result in it going to
> deep C-states only after some activity so it can wake up and see that it
> was idle for a long time.  :-|  Indeed, it really needs to calculate a
> function of energy use and latency choice vs how long to the next clock
> alarm to see which state is best to drop into...and by how much it should
> ask for an early alarm so that it can come out of the C-state in time for
> when the *real* target time is.)
> 

Here an updated version of the diff which I run for last two weeks I assume.

The main differences is adding into acpicpu.c enforcing the deepst C-state
for shutdown CPU.

I've tested that this logic works by inverting it (the lightest C-State) and
confirm how fast battery is drawn on my machine.

Anyway, this code seems stable, I've tested for more than a week without
reboot with tons of ZZZ and zzz, and it works well. No crash, no issue.

And all system's responsiveness that I've traded off is not noticable after
few hours (or days) of using, and rigth now I just use it.


diff --git lib/libc/sys/sysctl.2 lib/libc/sys/sysctl.2
index 78c69ff5813..f0f1b613a72 100644
--- lib/libc/sys/sysctl.2
+++ lib/libc/sys/sysctl.2
@@ -293,6 +293,7 @@ privileges may change the value.
 .It Dv HW_PHYSMEM Ta "integer" Ta "no"
 .It Dv HW_PHYSMEM64 Ta "int64_t" Ta "no"
 .It Dv HW_POWER Ta "integer" Ta "no"
+.It Dv HW_POWERSAVELIMIT Ta "integer" Ta "yes"
 .It Dv HW_PRODUCT Ta "string" Ta "no"
 .It Dv HW_SENSORS Ta "node" Ta "not applicable"
 .It Dv HW_SETPERF Ta "integer" Ta "yes"
@@ -384,8 +385,19 @@ The performance policy for power management.
 Can be one of
 .Dq manual ,
 .Dq auto ,
+.Dq powersaving ,
 or
 .Dq high .
+.Pp
+The differences between
+.Dq auto
+and
+.Dq powersaving
+policies are that the latter trades off some system responsiveness for
+reduced power consumption. In addition, it has an upper limit on CPU
+performance that can be adjusted by
+.Dv HW_POWERSAVELIMIT ,
+and it offlines unused CPU cores.
 .It Dv HW_PHYSMEM
 The total physical memory, in bytes.
 This variable is deprecated; use
@@ -395,6 +407,10 @@ instead.
 The total physical memory, in bytes.
 .It Dv HW_POWER Pq Va hw.power
 Machine has wall-power.
+.It Dv HW_POWERSAVELIMIT Pq Va hw.powersavelimit
+An upper limit of CPU performance in
+.Dq powersaving
+policy.
 .It Dv HW_PRODUCT Pq Va hw.product
 The product name of the machine.
 .It Dv HW_SENSORS Pq Va hw.sensors
diff --git sys/dev/acpi/acpicpu.c sys/dev/acpi/acpicpu.c
index 0a5ee1fea7d..aa581c3c467 100644
--- sys/dev/acpi/acpicpu.c
+++ sys/dev/acpi/acpicpu.c
@@ -1155,6 +1155,7 @@ acpicpu_idle(void)
 	struct acpicpu_softc *sc = (struct acpicpu_softc *)ci->ci_acpicpudev;
 	struct acpi_cstate *best, *cx;
 	unsigned long itime;
+	int is_halted;
 
 	if (sc == NULL) {
 		__asm volatile("sti");
@@ -1168,11 +1169,27 @@ acpicpu_idle(void)
 	} else if (ci->ci_mwait & MWAIT_ONLY)
 		atomic_clearbits_int(&ci->ci_mwait, MWAIT_ONLY);
 
+#ifdef MULTIPROCESSOR
+	is_halted = ci->ci_schedstate.spc_schedflags & SPCF_HALTED;
+#else
+	is_halted = 0;
+#endif
+
+	best = cx = SLIST_FIRST(&sc->sc_cstates);
+
+	/* When CPU is halted use the deepest supported sleep state. */
+	if (is_halted) {
+		while ((cx = SLIST_NEXT(cx, link)) != NULL) {
+			if (cx->state > best->state)
+				best = cx;
+		}
+		goto bypass_latency_search;
+	}
+
 	/*
 	 * Find the first state with a latency we'll accept, ignoring
-	 * states marked skippable
+	 * states marked skippable.
 	 */
-	best = cx = SLIST_FIRST(&sc->sc_cstates);
 	while ((cx->flags & CST_FLAG_SKIP) ||
 	    cx->latency * 3 > sc->sc_prev_sleep) {
 		if ((cx = SLIST_NEXT(cx, link)) == NULL)
@@ -1196,7 +1213,7 @@ acpicpu_idle(void)
 		best = cx;
 	}
 
-
+bypass_latency_search:
 	atomic_inc_long(&cst_stats[best->state]);
 
 	itime = tick / 2;
@@ -1272,8 +1289,11 @@ acpicpu_idle(void)
 
 	}
 
-	sc->sc_last_itime = itime;
-	itime >>= 1;
-	sc->sc_prev_sleep = (sc->sc_prev_sleep + (sc->sc_prev_sleep >> 1)
-	    + itime) >> 1;
+	/* ignore latency statistics for halted CPU */
+	if (is_halted) {
+		sc->sc_last_itime = itime;
+		itime >>= 1;
+		sc->sc_prev_sleep = (sc->sc_prev_sleep + (sc->sc_prev_sleep >> 1)
+			+ itime) >> 1;
+	}
 }
diff --git sys/kern/kern_sched.c sys/kern/kern_sched.c
index dd54f98d4b8..ec2ad9eb7ce 100644
--- sys/kern/kern_sched.c
+++ sys/kern/kern_sched.c
@@ -639,6 +639,36 @@ sched_peg_curproc(struct cpu_info *ci)
 
 #ifdef MULTIPROCESSOR
 
+void
+sched_start_cpu(struct cpu_info *ci)
+{
+	struct schedstate_percpu *spc = &ci->ci_schedstate;
+
+	atomic_clearbits_int(&spc->spc_schedflags,
+		SPCF_SHOULDHALT | SPCF_HALTED);
+	cpuset_add(&sched_all_cpus, ci);
+}
+
+void sched_stop_cpu(struct cpu_info *ci)
+{
+	struct schedstate_percpu *spc = &ci->ci_schedstate;
+
+	cpuset_del(&sched_all_cpus, ci);
+	atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
+
+	need_resched(ci);
+}
+
+void sched_wait_halted_cpu(struct cpu_info *ci)
+{
+	struct schedstate_percpu *spc = &ci->ci_schedstate;
+
+	while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
+		sleep_setup(spc, PZERO, "schedstate");
+		sleep_finish(0, (spc->spc_schedflags & SPCF_HALTED) == 0);
+	}
+}
+
 void
 sched_start_secondary_cpus(void)
 {
@@ -646,17 +676,13 @@ sched_start_secondary_cpus(void)
 	struct cpu_info *ci;
 
 	CPU_INFO_FOREACH(cii, ci) {
-		struct schedstate_percpu *spc = &ci->ci_schedstate;
-
 		if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
 			continue;
-		atomic_clearbits_int(&spc->spc_schedflags,
-		    SPCF_SHOULDHALT | SPCF_HALTED);
 #ifdef __HAVE_CPU_TOPOLOGY
 		if (!sched_smt && ci->ci_smt_id > 0)
 			continue;
 #endif
-		cpuset_add(&sched_all_cpus, ci);
+		sched_start_cpu(ci);
 	}
 }
 
@@ -670,23 +696,14 @@ sched_stop_secondary_cpus(void)
 	 * Make sure we stop the secondary CPUs.
 	 */
 	CPU_INFO_FOREACH(cii, ci) {
-		struct schedstate_percpu *spc = &ci->ci_schedstate;
-
 		if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
 			continue;
-		cpuset_del(&sched_all_cpus, ci);
-		atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
+		sched_stop_cpu(ci);
 	}
 	CPU_INFO_FOREACH(cii, ci) {
-		struct schedstate_percpu *spc = &ci->ci_schedstate;
-
 		if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
 			continue;
-		while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
-			sleep_setup(spc, PZERO, "schedstate");
-			sleep_finish(0,
-			    (spc->spc_schedflags & SPCF_HALTED) == 0);
-		}
+		sched_wait_halted_cpu(ci);
 	}
 }
 
@@ -883,9 +900,9 @@ sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 		if (ci->ci_smt_id == 0)
 			continue;
 		if (sched_smt)
-			cpuset_add(&sched_all_cpus, ci);
+			sched_start_cpu(ci);
 		else
-			cpuset_del(&sched_all_cpus, ci);
+			sched_stop_cpu(ci);
 	}
 
 	return 0;
diff --git sys/kern/kern_sysctl.c sys/kern/kern_sysctl.c
index 5d0d1a8a851..0c55bb3da67 100644
--- sys/kern/kern_sysctl.c
+++ sys/kern/kern_sysctl.c
@@ -733,6 +733,8 @@ hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
 		return (sysctl_hwsetperf(oldp, oldlenp, newp, newlen));
 	case HW_PERFPOLICY:
 		return (sysctl_hwperfpolicy(oldp, oldlenp, newp, newlen));
+	case HW_POWERSAVELIMIT:
+		return (sysctl_powersavelimit(oldp, oldlenp, newp, newlen));
 #endif /* !SMALL_KERNEL */
 	case HW_VENDOR:
 		if (hw_vendor)
diff --git sys/kern/sched_bsd.c sys/kern/sched_bsd.c
index 1cfd32746f0..4e3af63e924 100644
--- sys/kern/sched_bsd.c
+++ sys/kern/sched_bsd.c
@@ -65,6 +65,10 @@ uint32_t		decay_aftersleep(uint32_t, uint32_t);
 
 extern struct cpuset sched_idle_cpus;
 
+#ifdef __HAVE_CPU_TOPOLOGY
+extern int sched_smt;
+#endif
+
 /*
  * constants for averages over 1, 5, and 15 minutes when sampling at
  * 5 second intervals.
@@ -567,8 +571,11 @@ void (*cpu_setperf)(int);
 #define PERFPOL_MANUAL 0
 #define PERFPOL_AUTO 1
 #define PERFPOL_HIGH 2
+#define PERFPOL_POWERSAVING 4
 int perflevel = 100;
 int perfpolicy = PERFPOL_AUTO;
+/* avoid turbo frequency by default */
+int powersavelimit = 75;
 
 #ifndef SMALL_KERNEL
 /*
@@ -577,7 +584,9 @@ int perfpolicy = PERFPOL_AUTO;
 #include <sys/sysctl.h>
 
 void setperf_auto(void *);
+void setperf_powersaving(void *);
 struct timeout setperf_to = TIMEOUT_INITIALIZER(setperf_auto, NULL);
+struct timeout setperf_to_powersaving = TIMEOUT_INITIALIZER(setperf_powersaving, NULL);
 extern int hw_power;
 
 void
@@ -647,6 +656,91 @@ faster:
 	timeout_add_msec(&setperf_to, 100);
 }
 
+void
+setperf_powersaving(void *v)
+{
+	static uint64_t *idleticks, *totalticks;
+	static int downbeats;
+	int i, j = 0;
+	int speedup = 0;
+	CPU_INFO_ITERATOR cii;
+	struct cpu_info *ci, *firstoffline = NULL, *lastidle = NULL;
+	uint64_t idle, total, allidle = 0, alltotal = 0;
+
+	/* policy was changed, online all CPU cores */
+	if (perfpolicy != PERFPOL_POWERSAVING) {
+		CPU_INFO_FOREACH(cii, ci) {
+			if (!cpu_is_online(ci)) {
+#ifdef __HAVE_CPU_TOPOLOGY
+				if (!(sched_smt || ci->ci_smt_id == 0))
+					continue;
+#endif
+				sched_start_cpu(ci);
+			}
+		}
+		return;
+	}
+
+	if (!idleticks)
+		if (!(idleticks = mallocarray(ncpusfound, sizeof(*idleticks),
+		    M_DEVBUF, M_NOWAIT | M_ZERO)))
+			return;
+	if (!totalticks)
+		if (!(totalticks = mallocarray(ncpusfound, sizeof(*totalticks),
+		    M_DEVBUF, M_NOWAIT | M_ZERO))) {
+			free(idleticks, M_DEVBUF,
+			    sizeof(*idleticks) * ncpusfound);
+			return;
+		}
+	CPU_INFO_FOREACH(cii, ci) {
+		if (!cpu_is_online(ci)) {
+			if (!firstoffline)
+#ifdef __HAVE_CPU_TOPOLOGY
+				if (sched_smt || ci->ci_smt_id == 0)
+#endif
+					firstoffline = ci;
+			continue;
+		}
+		total = 0;
+		for (i = 0; i < CPUSTATES; i++) {
+			total += ci->ci_schedstate.spc_cp_time[i];
+		}
+		total -= totalticks[j];
+		idle = ci->ci_schedstate.spc_cp_time[CP_IDLE] - idleticks[j];
+		if (idle < total / 3)
+			speedup = 1;
+		alltotal += total;
+		allidle += idle;
+		idleticks[j] += idle;
+		totalticks[j] += total;
+		/* it shoul keep at least one CPU online */
+		if (j++ && cpuset_isset(&sched_idle_cpus, ci))
+			lastidle = ci;
+	}
+	if (allidle < alltotal / 3)
+		speedup = 1;
+	if (speedup)
+		/* twice as long here because we check every 200ms */
+		downbeats = 1;
+
+	if (speedup && cpu_setperf && perflevel != powersavelimit) {
+		perflevel = powersavelimit;
+		cpu_setperf(perflevel);
+	} else if (speedup && firstoffline) {
+		sched_start_cpu(firstoffline);
+	} else if (!speedup && cpu_setperf && perflevel != 0 && --downbeats <= 0) {
+		perflevel = 0;
+		cpu_setperf(perflevel);
+	} else if (!speedup && lastidle) {
+		sched_stop_cpu(lastidle);
+	}
+
+	/* every 200ms to have a better resolution of the load */
+	timeout_add_msec(&setperf_to_powersaving, 200);
+	return;
+}
+
+
 int
 sysctl_hwsetperf(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
@@ -685,6 +779,9 @@ sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 	case PERFPOL_AUTO:
 		strlcpy(policy, "auto", sizeof(policy));
 		break;
+	case PERFPOL_POWERSAVING:
+		strlcpy(policy, "powersaving", sizeof(policy));
+		break;
 	case PERFPOL_HIGH:
 		strlcpy(policy, "high", sizeof(policy));
 		break;
@@ -703,6 +800,8 @@ sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 		perfpolicy = PERFPOL_MANUAL;
 	else if (strcmp(policy, "auto") == 0)
 		perfpolicy = PERFPOL_AUTO;
+	else if (strcmp(policy, "powersaving") == 0)
+		perfpolicy = PERFPOL_POWERSAVING;
 	else if (strcmp(policy, "high") == 0)
 		perfpolicy = PERFPOL_HIGH;
 	else
@@ -710,12 +809,24 @@ sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 
 	if (perfpolicy == PERFPOL_AUTO) {
 		timeout_add_msec(&setperf_to, 200);
+	} else if (perfpolicy == PERFPOL_POWERSAVING) {
+		timeout_add_msec(&setperf_to_powersaving, 200);
 	} else if (perfpolicy == PERFPOL_HIGH) {
 		perflevel = 100;
 		cpu_setperf(perflevel);
 	}
 	return 0;
 }
+
+int
+sysctl_powersavelimit(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+	if (!cpu_setperf)
+		return EOPNOTSUPP;
+
+	return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
+	    &powersavelimit, 0, 100);
+}
 #endif
 
 /*
diff --git sys/sys/sched.h sys/sys/sched.h
index 593bd3e60ae..fb2cc7ea2dc 100644
--- sys/sys/sched.h
+++ sys/sys/sched.h
@@ -173,10 +173,14 @@ void sched_barrier(struct cpu_info *ci);
 
 int sysctl_hwsetperf(void *, size_t *, void *, size_t);
 int sysctl_hwperfpolicy(void *, size_t *, void *, size_t);
+int sysctl_powersavelimit(void *, size_t *, void *, size_t);
 int sysctl_hwsmt(void *, size_t *, void *, size_t);
 int sysctl_hwncpuonline(void);
 
 #ifdef MULTIPROCESSOR
+void sched_start_cpu(struct cpu_info *ci);
+void sched_stop_cpu(struct cpu_info *ci);
+void sched_wait_halted_cpu(struct cpu_info *ci);
 void sched_start_secondary_cpus(void);
 void sched_stop_secondary_cpus(void);
 #endif
diff --git sys/sys/sysctl.h sys/sys/sysctl.h
index e695f1d95cd..4b8ec5d2487 100644
--- sys/sys/sysctl.h
+++ sys/sys/sysctl.h
@@ -950,6 +950,7 @@ struct kinfo_file {
 #define	HW_POWER		26	/* int: machine has wall-power */
 #define	HW_BATTERY		27	/* node: battery */
 #define	HW_UCOMNAMES		28	/* strings: ucom names */
+#define	HW_POWERSAVELIMIT	29	/* set the limit for powersave policy */
 #define	HW_MAXID		30	/* number of valid hw ids */
 
 #define	CTL_HW_NAMES { \
@@ -982,6 +983,7 @@ struct kinfo_file {
 	{ "power", CTLTYPE_INT }, \
 	{ "battery", CTLTYPE_NODE }, \
 	{ "ucomnames", CTLTYPE_STRING }, \
+	{ "powersavelimit", CTLTYPE_INT }, \
 }
 
 /*
diff --git usr.sbin/apm/apm.8 usr.sbin/apm/apm.8
index ed110e11f14..6b25228bfd1 100644
--- usr.sbin/apm/apm.8
+++ usr.sbin/apm/apm.8
@@ -90,6 +90,7 @@ If charging, the estimated time to fully charge is displayed instead.
 Display the performance adjustment mode.
 0 means manual mode.
 1 means automatic mode.
+2 means powersaving mode.
 .It Fl S
 Put the system into stand-by (light sleep) state.
 .It Fl v
diff --git usr.sbin/apmd/apm-proto.h usr.sbin/apmd/apm-proto.h
index 867d0afbd70..166618e996f 100644
--- usr.sbin/apmd/apm-proto.h
+++ usr.sbin/apmd/apm-proto.h
@@ -38,6 +38,7 @@ enum apm_action {
 	SETPERF_LOW,
 	SETPERF_HIGH,
 	SETPERF_AUTO,
+	SETPERF_POWERSAVING,
 };
 
 enum apm_state {
@@ -51,6 +52,7 @@ enum apm_perfmode {
 	PERF_NONE = -1,
 	PERF_MANUAL,
 	PERF_AUTO,
+	PERF_POWERSAVING,
 };
 
 struct apm_command {
diff --git usr.sbin/apmd/apmd.8 usr.sbin/apmd/apmd.8
index 7c02bb10c8a..8c54efc1a69 100644
--- usr.sbin/apmd/apmd.8
+++ usr.sbin/apmd/apmd.8
@@ -34,7 +34,7 @@
 .Nd Advanced Power Management daemon
 .Sh SYNOPSIS
 .Nm apmd
-.Op Fl AadHLs
+.Op Fl AadHLPs
 .Op Fl f Ar devname
 .Op Fl S Ar sockname
 .Op Fl t Ar seconds
@@ -94,6 +94,12 @@ Start
 in manual performance adjustment mode, initialising
 .Va hw.setperf
 to 0.
+.It Fl P
+Start
+.Nm
+in powersaving mode. This flag can be used with
+.Fl A
+to use auto mode on AC power, and powersaving on battery.
 .It Fl S Ar sockname
 Specify an alternate socket name,
 .Ar sockname .
diff --git usr.sbin/apmd/apmd.c usr.sbin/apmd/apmd.c
index f29d5c9a081..af01acc7a0b 100644
--- usr.sbin/apmd/apmd.c
+++ usr.sbin/apmd/apmd.c
@@ -58,6 +58,7 @@
 #define AUTO_HIBERNATE 2
 
 int debug = 0;
+int use_powersaving = 0;
 
 extern char *__progname;
 
@@ -100,7 +101,7 @@ void
 usage(void)
 {
 	fprintf(stderr,
-	    "usage: %s [-AadHLs] [-f devname] [-S sockname] [-t seconds] "
+	    "usage: %s [-AadHLPs] [-f devname] [-S sockname] [-t seconds] "
 		"[-Z percent] [-z percent]\n", __progname);
 	exit(1);
 }
@@ -139,6 +140,10 @@ power_status(int fd, int force, struct apm_power_info *pinfo)
 	static struct apm_power_info last;
 	int acon = 0, priority = LOG_NOTICE;
 
+	int perfpol_mib[] = { CTL_HW, HW_PERFPOLICY };
+	char perfpol[32];
+	size_t perfpol_sz = sizeof(perfpol);
+
 	if (fd == -1) {
 		if (pinfo) {
 			bstate.battery_state = 255;
@@ -151,11 +156,19 @@ power_status(int fd, int force, struct apm_power_info *pinfo)
 		return 0;
 	}
 
+	if (sysctl(perfpol_mib, 2, perfpol, &perfpol_sz, NULL, 0) == -1)
+		logmsg(LOG_INFO, "cannot read hw.perfpolicy");
+
 	if (ioctl(fd, APM_IOC_GETPOWER, &bstate) == 0) {
 	/* various conditions under which we report status:  something changed
 	 * enough since last report, or asked to force a print */
-		if (bstate.ac_state == APM_AC_ON)
+		if (bstate.ac_state == APM_AC_ON) {
 			acon = 1;
+			if(use_powersaving && strcmp(perfpol, "powersaving") == 0)
+				setperfpolicy("auto");
+		} else if(use_powersaving && strcmp(perfpol, "auto") == 0)
+				setperfpolicy("powersaving");
+
 		if (bstate.battery_state == APM_BATT_CRITICAL &&
 		    bstate.battery_state != last.battery_state)
 			priority = LOG_EMERG;
@@ -282,6 +295,11 @@ handle_client(int sock_fd, int ctl_fd)
 		logmsg(LOG_NOTICE, "setting hw.perfpolicy to high");
 		setperfpolicy("high");
 		break;
+	case SETPERF_POWERSAVING:
+		reply.newstate = NORMAL;
+		logmsg(LOG_NOTICE, "setting hw.perfpolicy to powersaving");
+		setperfpolicy("powersaving");
+		break;
 	case SETPERF_AUTO:
 		reply.newstate = NORMAL;
 		logmsg(LOG_NOTICE, "setting hw.perfpolicy to auto");
@@ -299,8 +317,10 @@ handle_client(int sock_fd, int ctl_fd)
 		if (strcmp(perfpol, "manual") == 0 ||
 		    strcmp(perfpol, "high") == 0) {
 			reply.perfmode = PERF_MANUAL;
-		} else if (strcmp(perfpol, "auto") == 0)
+		} else if (strcmp(perfpol, "auto") == 0) {
 			reply.perfmode = PERF_AUTO;
+		} else if (strcmp(perfpol, "powersaving") == 0)
+			reply.perfmode = PERF_POWERSAVING;
 	}
 
 	if (sysctl(cpuspeed_mib, 2, &cpuspeed, &cpuspeed_sz, NULL, 0) == -1) {
@@ -423,7 +443,7 @@ main(int argc, char *argv[])
 	struct kevent ev[2];
 	int doperf = PERF_NONE;
 
-	while ((ch = getopt(argc, argv, "aACdHLsf:t:S:z:Z:")) != -1)
+	while ((ch = getopt(argc, argv, "aACdHLPsf:t:S:z:Z:")) != -1)
 		switch(ch) {
 		case 'a':
 			noacsleep = 1;
@@ -448,7 +468,9 @@ main(int argc, char *argv[])
 			break;
 		case 'A':
 		case 'C':
-			if (doperf != PERF_NONE)
+			if (doperf == PERF_POWERSAVING)
+				use_powersaving = 1;
+			else if (doperf != PERF_NONE)
 				usage();
 			doperf = PERF_AUTO;
 			setperfpolicy("auto");
@@ -459,6 +481,15 @@ main(int argc, char *argv[])
 			doperf = PERF_MANUAL;
 			setperfpolicy("low");
 			break;
+		case 'P':
+			if (doperf == PERF_NONE) {
+				doperf = PERF_POWERSAVING;
+				setperfpolicy("powersaving");
+			} else if (doperf != PERF_AUTO)
+				usage();
+			else
+				use_powersaving = 1;
+			break;
 		case 'H':
 			if (doperf != PERF_NONE)
 				usage();
diff --git usr.sbin/apmd/apmsubr.c usr.sbin/apmd/apmsubr.c
index 6504fe823bb..a1dbcfb2c61 100644
--- usr.sbin/apmd/apmsubr.c
+++ usr.sbin/apmd/apmsubr.c
@@ -79,6 +79,8 @@ perf_mode(int mode)
 		return "manual";
 	case PERF_AUTO:
 		return "auto";
+	case PERF_POWERSAVING:
+		return "powersaving";
 	default:
 		return "invalid";
 	}


-- 
wbr, Kirill