summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/acpi/processor_idle.c48
-rw-r--r--drivers/cpuidle/cpuidle.c94
-rw-r--r--drivers/idle/intel_idle.c179
-rw-r--r--include/linux/cpuidle.h13
-rw-r--r--include/linux/suspend.h16
-rw-r--r--include/linux/tick.h6
-rw-r--r--kernel/power/suspend.c43
-rw-r--r--kernel/sched/idle.c16
-rw-r--r--kernel/time/tick-common.c50
-rw-r--r--kernel/time/timekeeping.c48
-rw-r--r--kernel/time/timekeeping.h2
11 files changed, 407 insertions, 108 deletions
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index c256bd7fbd78..c6bb9f1257c9 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -732,9 +732,8 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
static bool acpi_idle_fallback_to_c1(struct acpi_processor *pr)
{
- return IS_ENABLED(CONFIG_HOTPLUG_CPU) && num_online_cpus() > 1 &&
- !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED) &&
- !pr->flags.has_cst;
+ return IS_ENABLED(CONFIG_HOTPLUG_CPU) && !pr->flags.has_cst &&
+ !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED);
}
static int c3_cpu_count;
@@ -744,9 +743,10 @@ static DEFINE_RAW_SPINLOCK(c3_lock);
* acpi_idle_enter_bm - enters C3 with proper BM handling
* @pr: Target processor
* @cx: Target state context
+ * @timer_bc: Whether or not to change timer mode to broadcast
*/
static void acpi_idle_enter_bm(struct acpi_processor *pr,
- struct acpi_processor_cx *cx)
+ struct acpi_processor_cx *cx, bool timer_bc)
{
acpi_unlazy_tlb(smp_processor_id());
@@ -754,7 +754,8 @@ static void acpi_idle_enter_bm(struct acpi_processor *pr,
* Must be done before busmaster disable as we might need to
* access HPET !
*/
- lapic_timer_state_broadcast(pr, cx, 1);
+ if (timer_bc)
+ lapic_timer_state_broadcast(pr, cx, 1);
/*
* disable bus master
@@ -784,7 +785,8 @@ static void acpi_idle_enter_bm(struct acpi_processor *pr,
raw_spin_unlock(&c3_lock);
}
- lapic_timer_state_broadcast(pr, cx, 0);
+ if (timer_bc)
+ lapic_timer_state_broadcast(pr, cx, 0);
}
static int acpi_idle_enter(struct cpuidle_device *dev,
@@ -798,12 +800,12 @@ static int acpi_idle_enter(struct cpuidle_device *dev,
return -EINVAL;
if (cx->type != ACPI_STATE_C1) {
- if (acpi_idle_fallback_to_c1(pr)) {
+ if (acpi_idle_fallback_to_c1(pr) && num_online_cpus() > 1) {
index = CPUIDLE_DRIVER_STATE_START;
cx = per_cpu(acpi_cstate[index], dev->cpu);
} else if (cx->type == ACPI_STATE_C3 && pr->flags.bm_check) {
if (cx->bm_sts_skip || !acpi_idle_bm_check()) {
- acpi_idle_enter_bm(pr, cx);
+ acpi_idle_enter_bm(pr, cx, true);
return index;
} else if (drv->safe_state_index >= 0) {
index = drv->safe_state_index;
@@ -827,6 +829,27 @@ static int acpi_idle_enter(struct cpuidle_device *dev,
return index;
}
+static void acpi_idle_enter_freeze(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu);
+
+ if (cx->type == ACPI_STATE_C3) {
+ struct acpi_processor *pr = __this_cpu_read(processors);
+
+ if (unlikely(!pr))
+ return;
+
+ if (pr->flags.bm_check) {
+ acpi_idle_enter_bm(pr, cx, false);
+ return;
+ } else {
+ ACPI_FLUSH_CPU_CACHE();
+ }
+ }
+ acpi_idle_do_entry(cx);
+}
+
struct cpuidle_driver acpi_idle_driver = {
.name = "acpi_idle",
.owner = THIS_MODULE,
@@ -925,6 +948,15 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
state->enter_dead = acpi_idle_play_dead;
drv->safe_state_index = count;
}
+ /*
+ * Halt-induced C1 is not good for ->enter_freeze, because it
+ * re-enables interrupts on exit. Moreover, C1 is generally not
+ * particularly interesting from the suspend-to-idle angle, so
+ * avoid C1 and the situations in which we may need to fall back
+ * to it altogether.
+ */
+ if (cx->type != ACPI_STATE_C1 && !acpi_idle_fallback_to_c1(pr))
+ state->enter_freeze = acpi_idle_enter_freeze;
count++;
if (count == CPUIDLE_STATE_MAX)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 125150dc6e81..4d534582514e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -19,6 +19,8 @@
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/module.h>
+#include <linux/suspend.h>
+#include <linux/tick.h>
#include <trace/events/power.h>
#include "cpuidle.h"
@@ -32,7 +34,6 @@ LIST_HEAD(cpuidle_detected_devices);
static int enabled_devices;
static int off __read_mostly;
static int initialized __read_mostly;
-static bool use_deepest_state __read_mostly;
int cpuidle_disabled(void)
{
@@ -66,36 +67,23 @@ int cpuidle_play_dead(void)
}
/**
- * cpuidle_use_deepest_state - Enable/disable the "deepest idle" mode.
- * @enable: Whether enable or disable the feature.
- *
- * If the "deepest idle" mode is enabled, cpuidle will ignore the governor and
- * always use the state with the greatest exit latency (out of the states that
- * are not disabled).
- *
- * This function can only be called after cpuidle_pause() to avoid races.
- */
-void cpuidle_use_deepest_state(bool enable)
-{
- use_deepest_state = enable;
-}
-
-/**
- * cpuidle_find_deepest_state - Find the state of the greatest exit latency.
- * @drv: cpuidle driver for a given CPU.
- * @dev: cpuidle device for a given CPU.
+ * cpuidle_find_deepest_state - Find deepest state meeting specific conditions.
+ * @drv: cpuidle driver for the given CPU.
+ * @dev: cpuidle device for the given CPU.
+ * @freeze: Whether or not the state should be suitable for suspend-to-idle.
*/
static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
- struct cpuidle_device *dev)
+ struct cpuidle_device *dev, bool freeze)
{
unsigned int latency_req = 0;
- int i, ret = CPUIDLE_DRIVER_STATE_START - 1;
+ int i, ret = freeze ? -1 : CPUIDLE_DRIVER_STATE_START - 1;
for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
- if (s->disabled || su->disable || s->exit_latency <= latency_req)
+ if (s->disabled || su->disable || s->exit_latency <= latency_req
+ || (freeze && !s->enter_freeze))
continue;
latency_req = s->exit_latency;
@@ -104,6 +92,63 @@ static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
return ret;
}
+static void enter_freeze_proper(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, int index)
+{
+ tick_freeze();
+ /*
+ * The state used here cannot be a "coupled" one, because the "coupled"
+ * cpuidle mechanism enables interrupts and doing that with timekeeping
+ * suspended is generally unsafe.
+ */
+ drv->states[index].enter_freeze(dev, drv, index);
+ WARN_ON(!irqs_disabled());
+ /*
+ * timekeeping_resume() that will be called by tick_unfreeze() for the
+ * last CPU executing it calls functions containing RCU read-side
+ * critical sections, so tell RCU about that.
+ */
+ RCU_NONIDLE(tick_unfreeze());
+}
+
+/**
+ * cpuidle_enter_freeze - Enter an idle state suitable for suspend-to-idle.
+ *
+ * If there are states with the ->enter_freeze callback, find the deepest of
+ * them and enter it with frozen tick. Otherwise, find the deepest state
+ * available and enter it normally.
+ */
+void cpuidle_enter_freeze(void)
+{
+ struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+ int index;
+
+ /*
+ * Find the deepest state with ->enter_freeze present, which guarantees
+ * that interrupts won't be enabled when it exits and allows the tick to
+ * be frozen safely.
+ */
+ index = cpuidle_find_deepest_state(drv, dev, true);
+ if (index >= 0) {
+ enter_freeze_proper(drv, dev, index);
+ return;
+ }
+
+ /*
+ * It is not safe to freeze the tick, find the deepest state available
+ * at all and try to enter it normally.
+ */
+ index = cpuidle_find_deepest_state(drv, dev, false);
+ if (index >= 0)
+ cpuidle_enter(drv, dev, index);
+ else
+ arch_cpu_idle();
+
+ /* Interrupts are enabled again here. */
+ local_irq_disable();
+}
+
/**
* cpuidle_enter_state - enter the state and update stats
* @dev: cpuidle device for this cpu
@@ -166,9 +211,6 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
if (!drv || !dev || !dev->enabled)
return -EBUSY;
- if (unlikely(use_deepest_state))
- return cpuidle_find_deepest_state(drv, dev);
-
return cpuidle_curr_governor->select(drv, dev);
}
@@ -200,7 +242,7 @@ int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
*/
void cpuidle_reflect(struct cpuidle_device *dev, int index)
{
- if (cpuidle_curr_governor->reflect && !unlikely(use_deepest_state))
+ if (cpuidle_curr_governor->reflect)
cpuidle_curr_governor->reflect(dev, index);
}
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 1bc0c170f12a..b0e58522780d 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -97,6 +97,8 @@ static const struct idle_cpu *icpu;
static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
static int intel_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index);
+static void intel_idle_freeze(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index);
static int intel_idle_cpu_init(int cpu);
static struct cpuidle_state *cpuidle_state_table;
@@ -131,28 +133,32 @@ static struct cpuidle_state nehalem_cstates[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 3,
.target_residency = 6,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C1E-NHM",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
.target_residency = 20,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C3-NHM",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 20,
.target_residency = 80,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6-NHM",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200,
.target_residency = 800,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -164,35 +170,40 @@ static struct cpuidle_state snb_cstates[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 2,
.target_residency = 2,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C1E-SNB",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
.target_residency = 20,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C3-SNB",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80,
.target_residency = 211,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6-SNB",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 104,
.target_residency = 345,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C7-SNB",
.desc = "MWAIT 0x30",
.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 109,
.target_residency = 345,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -204,42 +215,48 @@ static struct cpuidle_state byt_cstates[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 1,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C1E-BYT",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 15,
.target_residency = 30,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6N-BYT",
.desc = "MWAIT 0x58",
.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 40,
.target_residency = 275,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6S-BYT",
.desc = "MWAIT 0x52",
.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 140,
.target_residency = 560,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C7-BYT",
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 1200,
.target_residency = 1500,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C7S-BYT",
.desc = "MWAIT 0x64",
.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 10000,
.target_residency = 20000,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -251,35 +268,40 @@ static struct cpuidle_state ivb_cstates[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 1,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C1E-IVB",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
.target_residency = 20,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C3-IVB",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 59,
.target_residency = 156,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6-IVB",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80,
.target_residency = 300,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C7-IVB",
.desc = "MWAIT 0x30",
.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 87,
.target_residency = 300,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -291,28 +313,32 @@ static struct cpuidle_state ivt_cstates[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 1,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C1E-IVT",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
.target_residency = 80,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C3-IVT",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 59,
.target_residency = 156,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6-IVT",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 82,
.target_residency = 300,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -324,28 +350,32 @@ static struct cpuidle_state ivt_cstates_4s[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 1,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C1E-IVT-4S",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
.target_residency = 250,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C3-IVT-4S",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 59,
.target_residency = 300,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6-IVT-4S",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 84,
.target_residency = 400,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -357,28 +387,32 @@ static struct cpuidle_state ivt_cstates_8s[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 1,
.target_residency = 1,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C1E-IVT-8S",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
.target_residency = 500,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C3-IVT-8S",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 59,
.target_residency = 600,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6-IVT-8S",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 88,
.target_residency = 700,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -390,56 +424,64 @@ static struct cpuidle_state hsw_cstates[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 2,
.target_residency = 2,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C1E-HSW",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
.target_residency = 20,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C3-HSW",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 33,
.target_residency = 100,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6-HSW",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
.target_residency = 400,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C7s-HSW",
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
.target_residency = 500,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C8-HSW",
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
.target_residency = 900,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C9-HSW",
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
.target_residency = 1800,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C10-HSW",
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
.target_residency = 7700,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -450,56 +492,64 @@ static struct cpuidle_state bdw_cstates[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 2,
.target_residency = 2,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C1E-BDW",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01),
.exit_latency = 10,
.target_residency = 20,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C3-BDW",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 40,
.target_residency = 100,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6-BDW",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
.target_residency = 400,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C7s-BDW",
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
.target_residency = 500,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C8-BDW",
.desc = "MWAIT 0x40",
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 300,
.target_residency = 900,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C9-BDW",
.desc = "MWAIT 0x50",
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 600,
.target_residency = 1800,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C10-BDW",
.desc = "MWAIT 0x60",
.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 2600,
.target_residency = 7700,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -511,28 +561,32 @@ static struct cpuidle_state atom_cstates[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 10,
.target_residency = 20,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C2-ATM",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10),
.exit_latency = 20,
.target_residency = 80,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C4-ATM",
.desc = "MWAIT 0x30",
.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 100,
.target_residency = 400,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6-ATM",
.desc = "MWAIT 0x52",
.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 140,
.target_residency = 560,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -543,14 +597,16 @@ static struct cpuidle_state avn_cstates[] = {
.flags = MWAIT2flg(0x00),
.exit_latency = 2,
.target_residency = 2,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.name = "C6-AVN",
.desc = "MWAIT 0x51",
.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 15,
.target_residency = 45,
- .enter = &intel_idle },
+ .enter = &intel_idle,
+ .enter_freeze = intel_idle_freeze, },
{
.enter = NULL }
};
@@ -592,6 +648,21 @@ static int intel_idle(struct cpuidle_device *dev,
return index;
}
+/**
+ * intel_idle_freeze - simplified "enter" callback routine for suspend-to-idle
+ * @dev: cpuidle_device
+ * @drv: cpuidle driver
+ * @index: state index
+ */
+static void intel_idle_freeze(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ unsigned long ecx = 1; /* break on interrupt flag */
+ unsigned long eax = flg2MWAIT(drv->states[index].flags);
+
+ mwait_idle_with_hints(eax, ecx);
+}
+
static void __setup_broadcast_timer(void *arg)
{
unsigned long reason = (unsigned long)arg;
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index ab70f3bc44ad..f551a9299ac9 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -50,6 +50,15 @@ struct cpuidle_state {
int index);
int (*enter_dead) (struct cpuidle_device *dev, int index);
+
+ /*
+ * CPUs execute ->enter_freeze with the local tick or entire timekeeping
+ * suspended, so it must not re-enable interrupts at any point (even
+ * temporarily) or attempt to change states of clock event devices.
+ */
+ void (*enter_freeze) (struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index);
};
/* Idle State Flags */
@@ -141,7 +150,7 @@ extern void cpuidle_resume(void);
extern int cpuidle_enable_device(struct cpuidle_device *dev);
extern void cpuidle_disable_device(struct cpuidle_device *dev);
extern int cpuidle_play_dead(void);
-extern void cpuidle_use_deepest_state(bool enable);
+extern void cpuidle_enter_freeze(void);
extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
#else
@@ -174,7 +183,7 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev)
{return -ENODEV; }
static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
static inline int cpuidle_play_dead(void) {return -ENODEV; }
-static inline void cpuidle_use_deepest_state(bool enable) {}
+static inline void cpuidle_enter_freeze(void) { }
static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
struct cpuidle_device *dev) {return NULL; }
#endif
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 3388c1b6f7d8..5efe743ce1e8 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -201,6 +201,21 @@ struct platform_freeze_ops {
*/
extern void suspend_set_ops(const struct platform_suspend_ops *ops);
extern int suspend_valid_only_mem(suspend_state_t state);
+
+/* Suspend-to-idle state machnine. */
+enum freeze_state {
+ FREEZE_STATE_NONE, /* Not suspended/suspending. */
+ FREEZE_STATE_ENTER, /* Enter suspend-to-idle. */
+ FREEZE_STATE_WAKE, /* Wake up from suspend-to-idle. */
+};
+
+extern enum freeze_state __read_mostly suspend_freeze_state;
+
+static inline bool idle_should_freeze(void)
+{
+ return unlikely(suspend_freeze_state == FREEZE_STATE_ENTER);
+}
+
extern void freeze_set_ops(const struct platform_freeze_ops *ops);
extern void freeze_wake(void);
@@ -228,6 +243,7 @@ extern int pm_suspend(suspend_state_t state);
static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
+static inline bool idle_should_freeze(void) { return false; }
static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {}
static inline void freeze_wake(void) {}
#endif /* !CONFIG_SUSPEND */
diff --git a/include/linux/tick.h b/include/linux/tick.h
index eda850ca757a..9c085dc12ae9 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -79,6 +79,9 @@ extern void __init tick_init(void);
extern int tick_is_oneshot_available(void);
extern struct tick_device *tick_get_device(int cpu);
+extern void tick_freeze(void);
+extern void tick_unfreeze(void);
+
# ifdef CONFIG_HIGH_RES_TIMERS
extern int tick_init_highres(void);
extern int tick_program_event(ktime_t expires, int force);
@@ -119,6 +122,8 @@ static inline int tick_oneshot_mode_active(void) { return 0; }
#else /* CONFIG_GENERIC_CLOCKEVENTS */
static inline void tick_init(void) { }
+static inline void tick_freeze(void) { }
+static inline void tick_unfreeze(void) { }
static inline void tick_cancel_sched_timer(int cpu) { }
static inline void tick_clock_notify(void) { }
static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
@@ -226,5 +231,4 @@ static inline void tick_nohz_task_switch(struct task_struct *tsk)
__tick_nohz_task_switch(tsk);
}
-
#endif
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index c347e3ce3a55..b7d6b3a721b1 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -37,7 +37,9 @@ const char *pm_states[PM_SUSPEND_MAX];
static const struct platform_suspend_ops *suspend_ops;
static const struct platform_freeze_ops *freeze_ops;
static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head);
-static bool suspend_freeze_wake;
+
+enum freeze_state __read_mostly suspend_freeze_state;
+static DEFINE_SPINLOCK(suspend_freeze_lock);
void freeze_set_ops(const struct platform_freeze_ops *ops)
{
@@ -48,22 +50,49 @@ void freeze_set_ops(const struct platform_freeze_ops *ops)
static void freeze_begin(void)
{
- suspend_freeze_wake = false;
+ suspend_freeze_state = FREEZE_STATE_NONE;
}
static void freeze_enter(void)
{
- cpuidle_use_deepest_state(true);
+ spin_lock_irq(&suspend_freeze_lock);
+ if (pm_wakeup_pending())
+ goto out;
+
+ suspend_freeze_state = FREEZE_STATE_ENTER;
+ spin_unlock_irq(&suspend_freeze_lock);
+
+ get_online_cpus();
cpuidle_resume();
- wait_event(suspend_freeze_wait_head, suspend_freeze_wake);
+
+ /* Push all the CPUs into the idle loop. */
+ wake_up_all_idle_cpus();
+ pr_debug("PM: suspend-to-idle\n");
+ /* Make the current CPU wait so it can enter the idle loop too. */
+ wait_event(suspend_freeze_wait_head,
+ suspend_freeze_state == FREEZE_STATE_WAKE);
+ pr_debug("PM: resume from suspend-to-idle\n");
+
cpuidle_pause();
- cpuidle_use_deepest_state(false);
+ put_online_cpus();
+
+ spin_lock_irq(&suspend_freeze_lock);
+
+ out:
+ suspend_freeze_state = FREEZE_STATE_NONE;
+ spin_unlock_irq(&suspend_freeze_lock);
}
void freeze_wake(void)
{
- suspend_freeze_wake = true;
- wake_up(&suspend_freeze_wait_head);
+ unsigned long flags;
+
+ spin_lock_irqsave(&suspend_freeze_lock, flags);
+ if (suspend_freeze_state > FREEZE_STATE_NONE) {
+ suspend_freeze_state = FREEZE_STATE_WAKE;
+ wake_up(&suspend_freeze_wait_head);
+ }
+ spin_unlock_irqrestore(&suspend_freeze_lock, flags);
}
EXPORT_SYMBOL_GPL(freeze_wake);
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index aaf1c1d5cf5d..94b2d7b88a27 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -7,6 +7,7 @@
#include <linux/tick.h>
#include <linux/mm.h>
#include <linux/stackprotector.h>
+#include <linux/suspend.h>
#include <asm/tlb.h>
@@ -105,6 +106,21 @@ static void cpuidle_idle_call(void)
rcu_idle_enter();
/*
+ * Suspend-to-idle ("freeze") is a system state in which all user space
+ * has been frozen, all I/O devices have been suspended and the only
+ * activity happens here and in iterrupts (if any). In that case bypass
+ * the cpuidle governor and go stratight for the deepest idle state
+ * available. Possibly also suspend the local tick and the entire
+ * timekeeping to prevent timer interrupts from kicking us out of idle
+ * until a proper wakeup interrupt happens.
+ */
+ if (idle_should_freeze()) {
+ cpuidle_enter_freeze();
+ local_irq_enable();
+ goto exit_idle;
+ }
+
+ /*
* Ask the cpuidle framework to choose a convenient idle state.
* Fall back to the default arch idle method on errors.
*/
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 7efeedf53ebd..f7c515595b42 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -394,6 +394,56 @@ void tick_resume(void)
}
}
+static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
+static unsigned int tick_freeze_depth;
+
+/**
+ * tick_freeze - Suspend the local tick and (possibly) timekeeping.
+ *
+ * Check if this is the last online CPU executing the function and if so,
+ * suspend timekeeping. Otherwise suspend the local tick.
+ *
+ * Call with interrupts disabled. Must be balanced with %tick_unfreeze().
+ * Interrupts must not be enabled before the subsequent %tick_unfreeze().
+ */
+void tick_freeze(void)
+{
+ raw_spin_lock(&tick_freeze_lock);
+
+ tick_freeze_depth++;
+ if (tick_freeze_depth == num_online_cpus()) {
+ timekeeping_suspend();
+ } else {
+ tick_suspend();
+ tick_suspend_broadcast();
+ }
+
+ raw_spin_unlock(&tick_freeze_lock);
+}
+
+/**
+ * tick_unfreeze - Resume the local tick and (possibly) timekeeping.
+ *
+ * Check if this is the first CPU executing the function and if so, resume
+ * timekeeping. Otherwise resume the local tick.
+ *
+ * Call with interrupts disabled. Must be balanced with %tick_freeze().
+ * Interrupts must not be enabled after the preceding %tick_freeze().
+ */
+void tick_unfreeze(void)
+{
+ raw_spin_lock(&tick_freeze_lock);
+
+ if (tick_freeze_depth == num_online_cpus())
+ timekeeping_resume();
+ else
+ tick_resume();
+
+ tick_freeze_depth--;
+
+ raw_spin_unlock(&tick_freeze_lock);
+}
+
/**
* tick_init - initialize the tick control
*/
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b124af259800..91db94136c10 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -230,9 +230,7 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
/**
* update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
- * @tk: The timekeeper from which we take the update
- * @tkf: The fast timekeeper to update
- * @tbase: The time base for the fast timekeeper (mono/raw)
+ * @tkr: Timekeeping readout base from which we take the update
*
* We want to use this from any context including NMI and tracing /
* instrumenting the timekeeping code itself.
@@ -244,11 +242,11 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
* smp_wmb(); <- Ensure that the last base[1] update is visible
* tkf->seq++;
* smp_wmb(); <- Ensure that the seqcount update is visible
- * update(tkf->base[0], tk);
+ * update(tkf->base[0], tkr);
* smp_wmb(); <- Ensure that the base[0] update is visible
* tkf->seq++;
* smp_wmb(); <- Ensure that the seqcount update is visible
- * update(tkf->base[1], tk);
+ * update(tkf->base[1], tkr);
*
* The reader side does:
*
@@ -269,7 +267,7 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
* slightly wrong timestamp (a few nanoseconds). See
* @ktime_get_mono_fast_ns.
*/
-static void update_fast_timekeeper(struct timekeeper *tk)
+static void update_fast_timekeeper(struct tk_read_base *tkr)
{
struct tk_read_base *base = tk_fast_mono.base;
@@ -277,7 +275,7 @@ static void update_fast_timekeeper(struct timekeeper *tk)
raw_write_seqcount_latch(&tk_fast_mono.seq);
/* Update base[0] */
- memcpy(base, &tk->tkr, sizeof(*base));
+ memcpy(base, tkr, sizeof(*base));
/* Force readers back to base[0] */
raw_write_seqcount_latch(&tk_fast_mono.seq);
@@ -334,6 +332,35 @@ u64 notrace ktime_get_mono_fast_ns(void)
}
EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
+/* Suspend-time cycles value for halted fast timekeeper. */
+static cycle_t cycles_at_suspend;
+
+static cycle_t dummy_clock_read(struct clocksource *cs)
+{
+ return cycles_at_suspend;
+}
+
+/**
+ * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
+ * @tk: Timekeeper to snapshot.
+ *
+ * It generally is unsafe to access the clocksource after timekeeping has been
+ * suspended, so take a snapshot of the readout base of @tk and use it as the
+ * fast timekeeper's readout base while suspended. It will return the same
+ * number of cycles every time until timekeeping is resumed at which time the
+ * proper readout base for the fast timekeeper will be restored automatically.
+ */
+static void halt_fast_timekeeper(struct timekeeper *tk)
+{
+ static struct tk_read_base tkr_dummy;
+ struct tk_read_base *tkr = &tk->tkr;
+
+ memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
+ cycles_at_suspend = tkr->read(tkr->clock);
+ tkr_dummy.read = dummy_clock_read;
+ update_fast_timekeeper(&tkr_dummy);
+}
+
#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
static inline void update_vsyscall(struct timekeeper *tk)
@@ -462,7 +489,7 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
memcpy(&shadow_timekeeper, &tk_core.timekeeper,
sizeof(tk_core.timekeeper));
- update_fast_timekeeper(tk);
+ update_fast_timekeeper(&tk->tkr);
}
/**
@@ -1170,7 +1197,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
* xtime/wall_to_monotonic/jiffies/etc are
* still managed by arch specific suspend/resume code.
*/
-static void timekeeping_resume(void)
+void timekeeping_resume(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct clocksource *clock = tk->tkr.clock;
@@ -1251,7 +1278,7 @@ static void timekeeping_resume(void)
hrtimers_resume();
}
-static int timekeeping_suspend(void)
+int timekeeping_suspend(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned long flags;
@@ -1296,6 +1323,7 @@ static int timekeeping_suspend(void)
}
timekeeping_update(tk, TK_MIRROR);
+ halt_fast_timekeeper(tk);
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index adc1fc98bde3..1d91416055d5 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -16,5 +16,7 @@ extern int timekeeping_inject_offset(struct timespec *ts);
extern s32 timekeeping_get_tai_offset(void);
extern void timekeeping_set_tai_offset(s32 tai_offset);
extern void timekeeping_clocktai(struct timespec *ts);
+extern int timekeeping_suspend(void);
+extern void timekeeping_resume(void);
#endif