summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/base/arch_topology.c89
-rw-r--r--drivers/base/power/domain.c36
-rw-r--r--drivers/base/power/wakeup_stats.c4
-rw-r--r--drivers/clk/mvebu/armada-37xx-periph.c83
-rw-r--r--drivers/cpufreq/Kconfig23
-rw-r--r--drivers/cpufreq/Kconfig.arm10
-rw-r--r--drivers/cpufreq/armada-37xx-cpufreq.c111
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c259
-rw-r--r--drivers/cpufreq/cpufreq-dt.c9
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/cpufreq/ia64-acpi-cpufreq.c4
-rw-r--r--drivers/cpufreq/intel_pstate.c107
-rw-r--r--drivers/cpufreq/s5pv210-cpufreq.c14
-rw-r--r--drivers/cpuidle/Kconfig.arm2
-rw-r--r--drivers/cpuidle/cpuidle-tegra.c19
-rw-r--r--drivers/cpuidle/driver.c4
-rw-r--r--drivers/cpuidle/governors/menu.c17
-rw-r--r--drivers/cpuidle/governors/teo.c54
-rw-r--r--drivers/idle/intel_idle.c5
-rw-r--r--drivers/pci/pci.c16
-rw-r--r--drivers/powercap/intel_rapl_common.c1
-rw-r--r--drivers/powercap/intel_rapl_msr.c1
22 files changed, 586 insertions, 285 deletions
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index de8587cc119e..c1179edc0f3b 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -21,17 +21,94 @@
#include <linux/sched.h>
#include <linux/smp.h>
+static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
+static struct cpumask scale_freq_counters_mask;
+static bool scale_freq_invariant;
+
+static bool supports_scale_freq_counters(const struct cpumask *cpus)
+{
+ return cpumask_subset(cpus, &scale_freq_counters_mask);
+}
+
bool topology_scale_freq_invariant(void)
{
return cpufreq_supports_freq_invariance() ||
- arch_freq_counters_available(cpu_online_mask);
+ supports_scale_freq_counters(cpu_online_mask);
}
-__weak bool arch_freq_counters_available(const struct cpumask *cpus)
+static void update_scale_freq_invariant(bool status)
{
- return false;
+ if (scale_freq_invariant == status)
+ return;
+
+ /*
+ * Task scheduler behavior depends on frequency invariance support,
+ * either cpufreq or counter driven. If the support status changes as
+ * a result of counter initialisation and use, retrigger the build of
+ * scheduling domains to ensure the information is propagated properly.
+ */
+ if (topology_scale_freq_invariant() == status) {
+ scale_freq_invariant = status;
+ rebuild_sched_domains_energy();
+ }
}
-DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
+
+void topology_set_scale_freq_source(struct scale_freq_data *data,
+ const struct cpumask *cpus)
+{
+ struct scale_freq_data *sfd;
+ int cpu;
+
+ /*
+ * Avoid calling rebuild_sched_domains() unnecessarily if FIE is
+ * supported by cpufreq.
+ */
+ if (cpumask_empty(&scale_freq_counters_mask))
+ scale_freq_invariant = topology_scale_freq_invariant();
+
+ for_each_cpu(cpu, cpus) {
+ sfd = per_cpu(sft_data, cpu);
+
+ /* Use ARCH provided counters whenever possible */
+ if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
+ per_cpu(sft_data, cpu) = data;
+ cpumask_set_cpu(cpu, &scale_freq_counters_mask);
+ }
+ }
+
+ update_scale_freq_invariant(true);
+}
+EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);
+
+void topology_clear_scale_freq_source(enum scale_freq_source source,
+ const struct cpumask *cpus)
+{
+ struct scale_freq_data *sfd;
+ int cpu;
+
+ for_each_cpu(cpu, cpus) {
+ sfd = per_cpu(sft_data, cpu);
+
+ if (sfd && sfd->source == source) {
+ per_cpu(sft_data, cpu) = NULL;
+ cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
+ }
+ }
+
+ update_scale_freq_invariant(false);
+}
+EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source);
+
+void topology_scale_freq_tick(void)
+{
+ struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data);
+
+ if (sfd)
+ sfd->set_freq_scale();
+}
+
+DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
+EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
unsigned long max_freq)
@@ -47,13 +124,13 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
* want to update the scale factor with information from CPUFREQ.
* Instead the scale factor will be updated from arch_scale_freq_tick.
*/
- if (arch_freq_counters_available(cpus))
+ if (supports_scale_freq_counters(cpus))
return;
scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;
for_each_cpu(i, cpus)
- per_cpu(freq_scale, i) = scale;
+ per_cpu(arch_freq_scale, i) = scale;
}
DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 78c310d3179d..b6a782c31613 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1088,34 +1088,6 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock,
}
/**
- * resume_needed - Check whether to resume a device before system suspend.
- * @dev: Device to check.
- * @genpd: PM domain the device belongs to.
- *
- * There are two cases in which a device that can wake up the system from sleep
- * states should be resumed by genpd_prepare(): (1) if the device is enabled
- * to wake up the system and it has to remain active for this purpose while the
- * system is in the sleep state and (2) if the device is not enabled to wake up
- * the system from sleep states and it generally doesn't generate wakeup signals
- * by itself (those signals are generated on its behalf by other parts of the
- * system). In the latter case it may be necessary to reconfigure the device's
- * wakeup settings during system suspend, because it may have been set up to
- * signal remote wakeup from the system's working state as needed by runtime PM.
- * Return 'true' in either of the above cases.
- */
-static bool resume_needed(struct device *dev,
- const struct generic_pm_domain *genpd)
-{
- bool active_wakeup;
-
- if (!device_can_wakeup(dev))
- return false;
-
- active_wakeup = genpd_is_active_wakeup(genpd);
- return device_may_wakeup(dev) ? active_wakeup : !active_wakeup;
-}
-
-/**
* genpd_prepare - Start power transition of a device in a PM domain.
* @dev: Device to start the transition of.
*
@@ -1135,14 +1107,6 @@ static int genpd_prepare(struct device *dev)
if (IS_ERR(genpd))
return -EINVAL;
- /*
- * If a wakeup request is pending for the device, it should be woken up
- * at this point and a system wakeup event should be reported if it's
- * set up to wake up the system from sleep states.
- */
- if (resume_needed(dev, genpd))
- pm_runtime_resume(dev);
-
genpd_lock(genpd);
if (genpd->prepared_count++ == 0)
diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c
index d638259b829a..924fac493c4f 100644
--- a/drivers/base/power/wakeup_stats.c
+++ b/drivers/base/power/wakeup_stats.c
@@ -137,7 +137,7 @@ static struct device *wakeup_source_device_create(struct device *parent,
struct wakeup_source *ws)
{
struct device *dev = NULL;
- int retval = -ENODEV;
+ int retval;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev) {
@@ -154,7 +154,7 @@ static struct device *wakeup_source_device_create(struct device *parent,
dev_set_drvdata(dev, ws);
device_set_pm_not_required(dev);
- retval = kobject_set_name(&dev->kobj, "wakeup%d", ws->id);
+ retval = dev_set_name(dev, "wakeup%d", ws->id);
if (retval)
goto error;
diff --git a/drivers/clk/mvebu/armada-37xx-periph.c b/drivers/clk/mvebu/armada-37xx-periph.c
index f5746f9ea929..32ac6b6b7530 100644
--- a/drivers/clk/mvebu/armada-37xx-periph.c
+++ b/drivers/clk/mvebu/armada-37xx-periph.c
@@ -84,6 +84,7 @@ struct clk_pm_cpu {
void __iomem *reg_div;
u8 shift_div;
struct regmap *nb_pm_base;
+ unsigned long l1_expiration;
};
#define to_clk_double_div(_hw) container_of(_hw, struct clk_double_div, hw)
@@ -440,33 +441,6 @@ static u8 clk_pm_cpu_get_parent(struct clk_hw *hw)
return val;
}
-static int clk_pm_cpu_set_parent(struct clk_hw *hw, u8 index)
-{
- struct clk_pm_cpu *pm_cpu = to_clk_pm_cpu(hw);
- struct regmap *base = pm_cpu->nb_pm_base;
- int load_level;
-
- /*
- * We set the clock parent only if the DVFS is available but
- * not enabled.
- */
- if (IS_ERR(base) || armada_3700_pm_dvfs_is_enabled(base))
- return -EINVAL;
-
- /* Set the parent clock for all the load level */
- for (load_level = 0; load_level < LOAD_LEVEL_NR; load_level++) {
- unsigned int reg, mask, val,
- offset = ARMADA_37XX_NB_TBG_SEL_OFF;
-
- armada_3700_pm_dvfs_update_regs(load_level, &reg, &offset);
-
- val = index << offset;
- mask = ARMADA_37XX_NB_TBG_SEL_MASK << offset;
- regmap_update_bits(base, reg, mask, val);
- }
- return 0;
-}
-
static unsigned long clk_pm_cpu_recalc_rate(struct clk_hw *hw,
unsigned long parent_rate)
{
@@ -514,8 +488,10 @@ static long clk_pm_cpu_round_rate(struct clk_hw *hw, unsigned long rate,
}
/*
- * Switching the CPU from the L2 or L3 frequencies (300 and 200 Mhz
- * respectively) to L0 frequency (1.2 Ghz) requires a significant
+ * Workaround when base CPU frequnecy is 1000 or 1200 MHz
+ *
+ * Switching the CPU from the L2 or L3 frequencies (250/300 or 200 MHz
+ * respectively) to L0 frequency (1/1.2 GHz) requires a significant
* amount of time to let VDD stabilize to the appropriate
* voltage. This amount of time is large enough that it cannot be
* covered by the hardware countdown register. Due to this, the CPU
@@ -525,26 +501,56 @@ static long clk_pm_cpu_round_rate(struct clk_hw *hw, unsigned long rate,
* To work around this problem, we prevent switching directly from the
* L2/L3 frequencies to the L0 frequency, and instead switch to the L1
* frequency in-between. The sequence therefore becomes:
- * 1. First switch from L2/L3(200/300MHz) to L1(600MHZ)
+ * 1. First switch from L2/L3 (200/250/300 MHz) to L1 (500/600 MHz)
* 2. Sleep 20ms for stabling VDD voltage
- * 3. Then switch from L1(600MHZ) to L0(1200Mhz).
+ * 3. Then switch from L1 (500/600 MHz) to L0 (1000/1200 MHz).
*/
-static void clk_pm_cpu_set_rate_wa(unsigned long rate, struct regmap *base)
+static void clk_pm_cpu_set_rate_wa(struct clk_pm_cpu *pm_cpu,
+ unsigned int new_level, unsigned long rate,
+ struct regmap *base)
{
unsigned int cur_level;
- if (rate != 1200 * 1000 * 1000)
- return;
-
regmap_read(base, ARMADA_37XX_NB_CPU_LOAD, &cur_level);
cur_level &= ARMADA_37XX_NB_CPU_LOAD_MASK;
- if (cur_level <= ARMADA_37XX_DVFS_LOAD_1)
+
+ if (cur_level == new_level)
+ return;
+
+ /*
+ * System wants to go to L1 on its own. If we are going from L2/L3,
+ * remember when 20ms will expire. If from L0, set the value so that
+ * next switch to L0 won't have to wait.
+ */
+ if (new_level == ARMADA_37XX_DVFS_LOAD_1) {
+ if (cur_level == ARMADA_37XX_DVFS_LOAD_0)
+ pm_cpu->l1_expiration = jiffies;
+ else
+ pm_cpu->l1_expiration = jiffies + msecs_to_jiffies(20);
return;
+ }
+
+ /*
+ * If we are setting to L2/L3, just invalidate L1 expiration time,
+ * sleeping is not needed.
+ */
+ if (rate < 1000*1000*1000)
+ goto invalidate_l1_exp;
+
+ /*
+ * We are going to L0 with rate >= 1GHz. Check whether we have been at
+ * L1 for long enough time. If not, go to L1 for 20ms.
+ */
+ if (pm_cpu->l1_expiration && jiffies >= pm_cpu->l1_expiration)
+ goto invalidate_l1_exp;
regmap_update_bits(base, ARMADA_37XX_NB_CPU_LOAD,
ARMADA_37XX_NB_CPU_LOAD_MASK,
ARMADA_37XX_DVFS_LOAD_1);
msleep(20);
+
+invalidate_l1_exp:
+ pm_cpu->l1_expiration = 0;
}
static int clk_pm_cpu_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -578,7 +584,9 @@ static int clk_pm_cpu_set_rate(struct clk_hw *hw, unsigned long rate,
reg = ARMADA_37XX_NB_CPU_LOAD;
mask = ARMADA_37XX_NB_CPU_LOAD_MASK;
- clk_pm_cpu_set_rate_wa(rate, base);
+ /* Apply workaround when base CPU frequency is 1000 or 1200 MHz */
+ if (parent_rate >= 1000*1000*1000)
+ clk_pm_cpu_set_rate_wa(pm_cpu, load_level, rate, base);
regmap_update_bits(base, reg, mask, load_level);
@@ -592,7 +600,6 @@ static int clk_pm_cpu_set_rate(struct clk_hw *hw, unsigned long rate,
static const struct clk_ops clk_pm_cpu_ops = {
.get_parent = clk_pm_cpu_get_parent,
- .set_parent = clk_pm_cpu_set_parent,
.round_rate = clk_pm_cpu_round_rate,
.set_rate = clk_pm_cpu_set_rate,
.recalc_rate = clk_pm_cpu_recalc_rate,
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 85de313ddec2..c3038cdc6865 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -13,7 +13,8 @@ config CPU_FREQ
clock speed, you need to either enable a dynamic cpufreq governor
(see below) after boot, or use a userspace tool.
- For details, take a look at <file:Documentation/cpu-freq>.
+ For details, take a look at
+ <file:Documentation/admin-guide/pm/cpufreq.rst>.
If in doubt, say N.
@@ -140,8 +141,6 @@ config CPU_FREQ_GOV_USERSPACE
To compile this driver as a module, choose M here: the
module will be called cpufreq_userspace.
- For details, take a look at <file:Documentation/cpu-freq/>.
-
If in doubt, say Y.
config CPU_FREQ_GOV_ONDEMAND
@@ -158,7 +157,8 @@ config CPU_FREQ_GOV_ONDEMAND
To compile this driver as a module, choose M here: the
module will be called cpufreq_ondemand.
- For details, take a look at linux/Documentation/cpu-freq.
+ For details, take a look at
+ <file:Documentation/admin-guide/pm/cpufreq.rst>.
If in doubt, say N.
@@ -182,7 +182,8 @@ config CPU_FREQ_GOV_CONSERVATIVE
To compile this driver as a module, choose M here: the
module will be called cpufreq_conservative.
- For details, take a look at linux/Documentation/cpu-freq.
+ For details, take a look at
+ <file:Documentation/admin-guide/pm/cpufreq.rst>.
If in doubt, say N.
@@ -246,8 +247,6 @@ config IA64_ACPI_CPUFREQ
This driver adds a CPUFreq driver which utilizes the ACPI
Processor Performance States.
- For details, take a look at <file:Documentation/cpu-freq/>.
-
If in doubt, say N.
endif
@@ -271,8 +270,6 @@ config LOONGSON2_CPUFREQ
Loongson2F and it's successors support this feature.
- For details, take a look at <file:Documentation/cpu-freq/>.
-
If in doubt, say N.
config LOONGSON1_CPUFREQ
@@ -282,8 +279,6 @@ config LOONGSON1_CPUFREQ
This option adds a CPUFreq driver for loongson1 processors which
support software configurable cpu frequency.
- For details, take a look at <file:Documentation/cpu-freq/>.
-
If in doubt, say N.
endif
@@ -293,8 +288,6 @@ config SPARC_US3_CPUFREQ
help
This adds the CPUFreq driver for UltraSPARC-III processors.
- For details, take a look at <file:Documentation/cpu-freq>.
-
If in doubt, say N.
config SPARC_US2E_CPUFREQ
@@ -302,8 +295,6 @@ config SPARC_US2E_CPUFREQ
help
This adds the CPUFreq driver for UltraSPARC-IIe processors.
- For details, take a look at <file:Documentation/cpu-freq>.
-
If in doubt, say N.
endif
@@ -318,8 +309,6 @@ config SH_CPU_FREQ
will also generate a notice in the boot log before disabling
itself if the CPU in question is not capable of rate rounding.
- For details, take a look at <file:Documentation/cpu-freq>.
-
If unsure, say N.
endif
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index e65e0a43be64..a5c5f70acfc9 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -19,6 +19,16 @@ config ACPI_CPPC_CPUFREQ
If in doubt, say N.
+config ACPI_CPPC_CPUFREQ_FIE
+ bool "Frequency Invariance support for CPPC cpufreq driver"
+ depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY
+ default y
+ help
+ This extends frequency invariance support in the CPPC cpufreq driver,
+ by using CPPC delivered and reference performance counters.
+
+ If in doubt, say N.
+
config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
tristate "Allwinner nvmem based SUN50I CPUFreq driver"
depends on ARCH_SUNXI
diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c
index b4af4094309b..3fc98a3ffd91 100644
--- a/drivers/cpufreq/armada-37xx-cpufreq.c
+++ b/drivers/cpufreq/armada-37xx-cpufreq.c
@@ -25,6 +25,10 @@
#include "cpufreq-dt.h"
+/* Clk register set */
+#define ARMADA_37XX_CLK_TBG_SEL 0
+#define ARMADA_37XX_CLK_TBG_SEL_CPU_OFF 22
+
/* Power management in North Bridge register set */
#define ARMADA_37XX_NB_L0L1 0x18
#define ARMADA_37XX_NB_L2L3 0x1C
@@ -69,6 +73,8 @@
#define LOAD_LEVEL_NR 4
#define MIN_VOLT_MV 1000
+#define MIN_VOLT_MV_FOR_L1_1000MHZ 1108
+#define MIN_VOLT_MV_FOR_L1_1200MHZ 1155
/* AVS value for the corresponding voltage (in mV) */
static int avs_map[] = {
@@ -80,6 +86,8 @@ static int avs_map[] = {
};
struct armada37xx_cpufreq_state {
+ struct platform_device *pdev;
+ struct device *cpu_dev;
struct regmap *regmap;
u32 nb_l0l1;
u32 nb_l2l3;
@@ -120,10 +128,15 @@ static struct armada_37xx_dvfs *armada_37xx_cpu_freq_info_get(u32 freq)
* will be configured then the DVFS will be enabled.
*/
static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base,
- struct clk *clk, u8 *divider)
+ struct regmap *clk_base, u8 *divider)
{
+ u32 cpu_tbg_sel;
int load_lvl;
- struct clk *parent;
+
+ /* Determine to which TBG clock is CPU connected */
+ regmap_read(clk_base, ARMADA_37XX_CLK_TBG_SEL, &cpu_tbg_sel);
+ cpu_tbg_sel >>= ARMADA_37XX_CLK_TBG_SEL_CPU_OFF;
+ cpu_tbg_sel &= ARMADA_37XX_NB_TBG_SEL_MASK;
for (load_lvl = 0; load_lvl < LOAD_LEVEL_NR; load_lvl++) {
unsigned int reg, mask, val, offset = 0;
@@ -142,6 +155,11 @@ static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base,
mask = (ARMADA_37XX_NB_CLK_SEL_MASK
<< ARMADA_37XX_NB_CLK_SEL_OFF);
+ /* Set TBG index, for all levels we use the same TBG */
+ val = cpu_tbg_sel << ARMADA_37XX_NB_TBG_SEL_OFF;
+ mask = (ARMADA_37XX_NB_TBG_SEL_MASK
+ << ARMADA_37XX_NB_TBG_SEL_OFF);
+
/*
* Set cpu divider based on the pre-computed array in
* order to have balanced step.
@@ -160,14 +178,6 @@ static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base,
regmap_update_bits(base, reg, mask, val);
}
-
- /*
- * Set cpu clock source, for all the level we keep the same
- * clock source that the one already configured. For this one
- * we need to use the clock framework
- */
- parent = clk_get_parent(clk);
- clk_set_parent(clk, parent);
}
/*
@@ -202,6 +212,8 @@ static u32 armada_37xx_avs_val_match(int target_vm)
* - L2 & L3 voltage should be about 150mv smaller than L0 voltage.
* This function calculates L1 & L2 & L3 AVS values dynamically based
* on L0 voltage and fill all AVS values to the AVS value table.
+ * When base CPU frequency is 1000 or 1200 MHz then there is additional
+ * minimal avs value for load L1.
*/
static void __init armada37xx_cpufreq_avs_configure(struct regmap *base,
struct armada_37xx_dvfs *dvfs)
@@ -233,6 +245,19 @@ static void __init armada37xx_cpufreq_avs_configure(struct regmap *base,
for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++)
dvfs->avs[load_level] = avs_min;
+ /*
+ * Set the avs values for load L0 and L1 when base CPU frequency
+ * is 1000/1200 MHz to its typical initial values according to
+ * the Armada 3700 Hardware Specifications.
+ */
+ if (dvfs->cpu_freq_max >= 1000*1000*1000) {
+ if (dvfs->cpu_freq_max >= 1200*1000*1000)
+ avs_min = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1200MHZ);
+ else
+ avs_min = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1000MHZ);
+ dvfs->avs[0] = dvfs->avs[1] = avs_min;
+ }
+
return;
}
@@ -252,6 +277,26 @@ static void __init armada37xx_cpufreq_avs_configure(struct regmap *base,
target_vm = avs_map[l0_vdd_min] - 150;
target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV;
dvfs->avs[2] = dvfs->avs[3] = armada_37xx_avs_val_match(target_vm);
+
+ /*
+ * Fix the avs value for load L1 when base CPU frequency is 1000/1200 MHz,
+ * otherwise the CPU gets stuck when switching from load L1 to load L0.
+ * Also ensure that avs value for load L1 is not higher than for L0.
+ */
+ if (dvfs->cpu_freq_max >= 1000*1000*1000) {
+ u32 avs_min_l1;
+
+ if (dvfs->cpu_freq_max >= 1200*1000*1000)
+ avs_min_l1 = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1200MHZ);
+ else
+ avs_min_l1 = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1000MHZ);
+
+ if (avs_min_l1 > dvfs->avs[0])
+ avs_min_l1 = dvfs->avs[0];
+
+ if (dvfs->avs[1] < avs_min_l1)
+ dvfs->avs[1] = avs_min_l1;
+ }
}
static void __init armada37xx_cpufreq_avs_setup(struct regmap *base,
@@ -357,12 +402,17 @@ static int __init armada37xx_cpufreq_driver_init(void)
struct armada_37xx_dvfs *dvfs;
struct platform_device *pdev;
unsigned long freq;
- unsigned int cur_frequency, base_frequency;
- struct regmap *nb_pm_base, *avs_base;
+ unsigned int base_frequency;
+ struct regmap *nb_clk_base, *nb_pm_base, *avs_base;
struct device *cpu_dev;
int load_lvl, ret;
struct clk *clk, *parent;
+ nb_clk_base =
+ syscon_regmap_lookup_by_compatible("marvell,armada-3700-periph-clock-nb");
+ if (IS_ERR(nb_clk_base))
+ return -ENODEV;
+
nb_pm_base =
syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm");
@@ -413,15 +463,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
return -EINVAL;
}
- /* Get nominal (current) CPU frequency */
- cur_frequency = clk_get_rate(clk);
- if (!cur_frequency) {
- dev_err(cpu_dev, "Failed to get clock rate for CPU\n");
- clk_put(clk);
- return -EINVAL;
- }
-
- dvfs = armada_37xx_cpu_freq_info_get(cur_frequency);
+ dvfs = armada_37xx_cpu_freq_info_get(base_frequency);
if (!dvfs) {
clk_put(clk);
return -EINVAL;
@@ -439,7 +481,7 @@ static int __init armada37xx_cpufreq_driver_init(void)
armada37xx_cpufreq_avs_configure(avs_base, dvfs);
armada37xx_cpufreq_avs_setup(avs_base, dvfs);
- armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider);
+ armada37xx_cpufreq_dvfs_setup(nb_pm_base, nb_clk_base, dvfs->divider);
clk_put(clk);
for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR;
@@ -466,6 +508,9 @@ static int __init armada37xx_cpufreq_driver_init(void)
if (ret)
goto disable_dvfs;
+ armada37xx_cpufreq_state->cpu_dev = cpu_dev;
+ armada37xx_cpufreq_state->pdev = pdev;
+ platform_set_drvdata(pdev, dvfs);
return 0;
disable_dvfs:
@@ -473,7 +518,7 @@ disable_dvfs:
remove_opp:
/* clean-up the already added opp before leaving */
while (load_lvl-- > ARMADA_37XX_DVFS_LOAD_0) {
- freq = cur_frequency / dvfs->divider[load_lvl];
+ freq = base_frequency / dvfs->divider[load_lvl];
dev_pm_opp_remove(cpu_dev, freq);
}
@@ -484,6 +529,26 @@ remove_opp:
/* late_initcall, to guarantee the driver is loaded after A37xx clock driver */
late_initcall(armada37xx_cpufreq_driver_init);
+static void __exit armada37xx_cpufreq_driver_exit(void)
+{
+ struct platform_device *pdev = armada37xx_cpufreq_state->pdev;
+ struct armada_37xx_dvfs *dvfs = platform_get_drvdata(pdev);
+ unsigned long freq;
+ int load_lvl;
+
+ platform_device_unregister(pdev);
+
+ armada37xx_cpufreq_disable_dvfs(armada37xx_cpufreq_state->regmap);
+
+ for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; load_lvl++) {
+ freq = dvfs->cpu_freq_max / dvfs->divider[load_lvl];
+ dev_pm_opp_remove(armada37xx_cpufreq_state->cpu_dev, freq);
+ }
+
+ kfree(armada37xx_cpufreq_state);
+}
+module_exit(armada37xx_cpufreq_driver_exit);
+
static const struct of_device_id __maybe_unused armada37xx_cpufreq_of_match[] = {
{ .compatible = "marvell,armada-3700-nb-pm" },
{ },
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 8a482c434ea6..3848b4c222e1 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -10,14 +10,18 @@
#define pr_fmt(fmt) "CPPC Cpufreq:" fmt
+#include <linux/arch_topology.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/dmi.h>
+#include <linux/irq_work.h>
+#include <linux/kthread.h>
#include <linux/time.h>
#include <linux/vmalloc.h>
+#include <uapi/linux/sched/types.h>
#include <asm/unaligned.h>
@@ -57,6 +61,204 @@ static struct cppc_workaround_oem_info wa_info[] = {
}
};
+#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
+
+/* Frequency invariance support */
+struct cppc_freq_invariance {
+ int cpu;
+ struct irq_work irq_work;
+ struct kthread_work work;
+ struct cppc_perf_fb_ctrs prev_perf_fb_ctrs;
+ struct cppc_cpudata *cpu_data;
+};
+
+static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
+static struct kthread_worker *kworker_fie;
+static bool fie_disabled;
+
+static struct cpufreq_driver cppc_cpufreq_driver;
+static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
+static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
+ struct cppc_perf_fb_ctrs fb_ctrs_t0,
+ struct cppc_perf_fb_ctrs fb_ctrs_t1);
+
+/**
+ * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
+ * @work: The work item.
+ *
+ * The CPPC driver register itself with the topology core to provide its own
+ * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which
+ * gets called by the scheduler on every tick.
+ *
+ * Note that the arch specific counters have higher priority than CPPC counters,
+ * if available, though the CPPC driver doesn't need to have any special
+ * handling for that.
+ *
+ * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we
+ * reach here from hard-irq context), which then schedules a normal work item
+ * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable
+ * based on the counter updates since the last tick.
+ */
+static void cppc_scale_freq_workfn(struct kthread_work *work)
+{
+ struct cppc_freq_invariance *cppc_fi;
+ struct cppc_perf_fb_ctrs fb_ctrs = {0};
+ struct cppc_cpudata *cpu_data;
+ unsigned long local_freq_scale;
+ u64 perf;
+
+ cppc_fi = container_of(work, struct cppc_freq_invariance, work);
+ cpu_data = cppc_fi->cpu_data;
+
+ if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) {
+ pr_warn("%s: failed to read perf counters\n", __func__);
+ return;
+ }
+
+ cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
+ perf = cppc_perf_from_fbctrs(cpu_data, cppc_fi->prev_perf_fb_ctrs,
+ fb_ctrs);
+
+ perf <<= SCHED_CAPACITY_SHIFT;
+ local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf);
+ if (WARN_ON(local_freq_scale > 1024))
+ local_freq_scale = 1024;
+
+ per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale;
+}
+
+static void cppc_irq_work(struct irq_work *irq_work)
+{
+ struct cppc_freq_invariance *cppc_fi;
+
+ cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work);
+ kthread_queue_work(kworker_fie, &cppc_fi->work);
+}
+
+static void cppc_scale_freq_tick(void)
+{
+ struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id());
+
+ /*
+ * cppc_get_perf_ctrs() can potentially sleep, call that from the right
+ * context.
+ */
+ irq_work_queue(&cppc_fi->irq_work);
+}
+
+static struct scale_freq_data cppc_sftd = {
+ .source = SCALE_FREQ_SOURCE_CPPC,
+ .set_freq_scale = cppc_scale_freq_tick,
+};
+
+static void cppc_freq_invariance_policy_init(struct cpufreq_policy *policy,
+ struct cppc_cpudata *cpu_data)
+{
+ struct cppc_perf_fb_ctrs fb_ctrs = {0};
+ struct cppc_freq_invariance *cppc_fi;
+ int i, ret;
+
+ if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
+ return;
+
+ if (fie_disabled)
+ return;
+
+ for_each_cpu(i, policy->cpus) {
+ cppc_fi = &per_cpu(cppc_freq_inv, i);
+ cppc_fi->cpu = i;
+ cppc_fi->cpu_data = cpu_data;
+ kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn);
+ init_irq_work(&cppc_fi->irq_work, cppc_irq_work);
+
+ ret = cppc_get_perf_ctrs(i, &fb_ctrs);
+ if (ret) {
+ pr_warn("%s: failed to read perf counters: %d\n",
+ __func__, ret);
+ fie_disabled = true;
+ } else {
+ cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
+ }
+ }
+}
+
+static void __init cppc_freq_invariance_init(void)
+{
+ struct sched_attr attr = {
+ .size = sizeof(struct sched_attr),
+ .sched_policy = SCHED_DEADLINE,
+ .sched_nice = 0,
+ .sched_priority = 0,
+ /*
+ * Fake (unused) bandwidth; workaround to "fix"
+ * priority inheritance.
+ */
+ .sched_runtime = 1000000,
+ .sched_deadline = 10000000,
+ .sched_period = 10000000,
+ };
+ int ret;
+
+ if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
+ return;
+
+ if (fie_disabled)
+ return;
+
+ kworker_fie = kthread_create_worker(0, "cppc_fie");
+ if (IS_ERR(kworker_fie))
+ return;
+
+ ret = sched_setattr_nocheck(kworker_fie->task, &attr);
+ if (ret) {
+ pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__,
+ ret);
+ kthread_destroy_worker(kworker_fie);
+ return;
+ }
+
+ /* Register for freq-invariance */
+ topology_set_scale_freq_source(&cppc_sftd, cpu_present_mask);
+}
+
+static void cppc_freq_invariance_exit(void)
+{
+ struct cppc_freq_invariance *cppc_fi;
+ int i;
+
+ if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
+ return;
+
+ if (fie_disabled)
+ return;
+
+ topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpu_present_mask);
+
+ for_each_possible_cpu(i) {
+ cppc_fi = &per_cpu(cppc_freq_inv, i);
+ irq_work_sync(&cppc_fi->irq_work);
+ }
+
+ kthread_destroy_worker(kworker_fie);
+ kworker_fie = NULL;
+}
+
+#else
+static inline void
+cppc_freq_invariance_policy_init(struct cpufreq_policy *policy,
+ struct cppc_cpudata *cpu_data)
+{
+}
+
+static inline void cppc_freq_invariance_init(void)
+{
+}
+
+static inline void cppc_freq_invariance_exit(void)
+{
+}
+#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */
+
/* Callback function used to retrieve the max frequency from DMI */
static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
{
@@ -216,26 +418,16 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
{
unsigned long implementor = read_cpuid_implementor();
unsigned long part_num = read_cpuid_part_number();
- unsigned int delay_us = 0;
switch (implementor) {
case ARM_CPU_IMP_QCOM:
switch (part_num) {
case QCOM_CPU_PART_FALKOR_V1:
case QCOM_CPU_PART_FALKOR:
- delay_us = 10000;
- break;
- default:
- delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
- break;
+ return 10000;
}
- break;
- default:
- delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
- break;
}
-
- return delay_us;
+ return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
}
#else
@@ -355,9 +547,12 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
cpu_data->perf_ctrls.desired_perf = caps->highest_perf;
ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
- if (ret)
+ if (ret) {
pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
caps->highest_perf, cpu, ret);
+ } else {
+ cppc_freq_invariance_policy_init(policy, cpu_data);
+ }
return ret;
}
@@ -370,12 +565,12 @@ static inline u64 get_delta(u64 t1, u64 t0)
return (u32)t1 - (u32)t0;
}
-static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
- struct cppc_perf_fb_ctrs fb_ctrs_t0,
- struct cppc_perf_fb_ctrs fb_ctrs_t1)
+static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
+ struct cppc_perf_fb_ctrs fb_ctrs_t0,
+ struct cppc_perf_fb_ctrs fb_ctrs_t1)
{
u64 delta_reference, delta_delivered;
- u64 reference_perf, delivered_perf;
+ u64 reference_perf;
reference_perf = fb_ctrs_t0.reference_perf;
@@ -384,12 +579,21 @@ static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
delta_delivered = get_delta(fb_ctrs_t1.delivered,
fb_ctrs_t0.delivered);
- /* Check to avoid divide-by zero */
- if (delta_reference || delta_delivered)
- delivered_perf = (reference_perf * delta_delivered) /
- delta_reference;
- else
- delivered_perf = cpu_data->perf_ctrls.desired_perf;
+ /* Check to avoid divide-by zero and invalid delivered_perf */
+ if (!delta_reference || !delta_delivered)
+ return cpu_data->perf_ctrls.desired_perf;
+
+ return (reference_perf * delta_delivered) / delta_reference;
+}
+
+static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
+ struct cppc_perf_fb_ctrs fb_ctrs_t0,
+ struct cppc_perf_fb_ctrs fb_ctrs_t1)
+{
+ u64 delivered_perf;
+
+ delivered_perf = cppc_perf_from_fbctrs(cpu_data, fb_ctrs_t0,
+ fb_ctrs_t1);
return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
}
@@ -514,6 +718,8 @@ static void cppc_check_hisi_workaround(void)
static int __init cppc_cpufreq_init(void)
{
+ int ret;
+
if ((acpi_disabled) || !acpi_cpc_valid())
return -ENODEV;
@@ -521,7 +727,11 @@ static int __init cppc_cpufreq_init(void)
cppc_check_hisi_workaround();
- return cpufreq_register_driver(&cppc_cpufreq_driver);
+ ret = cpufreq_register_driver(&cppc_cpufreq_driver);
+ if (!ret)
+ cppc_freq_invariance_init();
+
+ return ret;
}
static inline void free_cpu_data(void)
@@ -538,6 +748,7 @@ static inline void free_cpu_data(void)
static void __exit cppc_cpufreq_exit(void)
{
+ cppc_freq_invariance_exit();
cpufreq_unregister_driver(&cppc_cpufreq_driver);
free_cpu_data();
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index b1e1bdc63b01..ece52863ba62 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -255,10 +255,15 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu)
* before updating priv->cpus. Otherwise, we will end up creating
* duplicate OPPs for the CPUs.
*
- * OPPs might be populated at runtime, don't check for error here.
+ * OPPs might be populated at runtime, don't fail for error here unless
+ * it is -EPROBE_DEFER.
*/
- if (!dev_pm_opp_of_cpumask_add_table(priv->cpus))
+ ret = dev_pm_opp_of_cpumask_add_table(priv->cpus);
+ if (!ret) {
priv->have_static_opps = true;
+ } else if (ret == -EPROBE_DEFER) {
+ goto out;
+ }
/*
* The OPP table must be initialized, statically or dynamically, by this
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 1d1b563cea4b..802abc925b2a 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -42,9 +42,6 @@ static LIST_HEAD(cpufreq_policy_list);
#define for_each_inactive_policy(__policy) \
for_each_suitable_policy(__policy, false)
-#define for_each_policy(__policy) \
- list_for_each_entry(__policy, &cpufreq_policy_list, policy_list)
-
/* Iterate over governors */
static LIST_HEAD(cpufreq_governor_list);
#define for_each_governor(__governor) \
diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c
index 2efe7189ccc4..c6bdc455517f 100644
--- a/drivers/cpufreq/ia64-acpi-cpufreq.c
+++ b/drivers/cpufreq/ia64-acpi-cpufreq.c
@@ -54,7 +54,7 @@ processor_set_pstate (
retval = ia64_pal_set_pstate((u64)value);
if (retval) {
- pr_debug("Failed to set freq to 0x%x, with error 0x%lx\n",
+ pr_debug("Failed to set freq to 0x%x, with error 0x%llx\n",
value, retval);
return -ENODEV;
}
@@ -77,7 +77,7 @@ processor_get_pstate (
if (retval)
pr_debug("Failed to get current freq with "
- "error 0x%lx, idx 0x%x\n", retval, *value);
+ "error 0x%llx, idx 0x%x\n", retval, *value);
return (int)retval;
}
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 5175ae3cac44..f0401064d7aa 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -819,19 +819,21 @@ static struct freq_attr *hwp_cpufreq_attrs[] = {
NULL,
};
-static void intel_pstate_get_hwp_max(struct cpudata *cpu, int *phy_max,
- int *current_max)
+static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
{
u64 cap;
rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap);
WRITE_ONCE(cpu->hwp_cap_cached, cap);
- if (global.no_turbo || global.turbo_disabled)
- *current_max = HWP_GUARANTEED_PERF(cap);
- else
- *current_max = HWP_HIGHEST_PERF(cap);
+ cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap);
+ cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap);
+}
- *phy_max = HWP_HIGHEST_PERF(cap);
+static void intel_pstate_get_hwp_cap(struct cpudata *cpu)
+{
+ __intel_pstate_get_hwp_cap(cpu);
+ cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
+ cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
}
static void intel_pstate_hwp_set(unsigned int cpu)
@@ -1195,12 +1197,13 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
static void update_qos_request(enum freq_qos_req_type type)
{
- int max_state, turbo_max, freq, i, perf_pct;
struct freq_qos_request *req;
struct cpufreq_policy *policy;
+ int i;
for_each_possible_cpu(i) {
struct cpudata *cpu = all_cpu_data[i];
+ unsigned int freq, perf_pct;
policy = cpufreq_cpu_get(i);
if (!policy)
@@ -1213,9 +1216,7 @@ static void update_qos_request(enum freq_qos_req_type type)
continue;
if (hwp_active)
- intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
- else
- turbo_max = cpu->pstate.turbo_pstate;
+ intel_pstate_get_hwp_cap(cpu);
if (type == FREQ_QOS_MIN) {
perf_pct = global.min_perf_pct;
@@ -1224,8 +1225,7 @@ static void update_qos_request(enum freq_qos_req_type type)
perf_pct = global.max_perf_pct;
}
- freq = DIV_ROUND_UP(turbo_max * perf_pct, 100);
- freq *= cpu->pstate.scaling;
+ freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * perf_pct, 100);
if (freq_qos_update_request(req, freq) < 0)
pr_warn("Failed to update freq constraint: CPU%d\n", i);
@@ -1715,21 +1715,17 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
{
cpu->pstate.min_pstate = pstate_funcs.get_min();
cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
- cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
cpu->pstate.scaling = pstate_funcs.get_scaling();
if (hwp_active && !hwp_mode_bdw) {
- unsigned int phy_max, current_max;
-
- intel_pstate_get_hwp_max(cpu, &phy_max, &current_max);
- cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
- cpu->pstate.turbo_pstate = phy_max;
- cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(READ_ONCE(cpu->hwp_cap_cached));
+ __intel_pstate_get_hwp_cap(cpu);
} else {
- cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
cpu->pstate.max_pstate = pstate_funcs.get_max();
+ cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
}
+
cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
+ cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
if (pstate_funcs.get_aperf_mperf_shift)
cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
@@ -2199,41 +2195,34 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu,
unsigned int policy_min,
unsigned int policy_max)
{
+ int scaling = cpu->pstate.scaling;
int32_t max_policy_perf, min_policy_perf;
- int max_state, turbo_max;
- int max_freq;
/*
- * HWP needs some special consideration, because on BDX the
- * HWP_REQUEST uses abstract value to represent performance
- * rather than pure ratios.
+ * HWP needs some special consideration, because HWP_REQUEST uses
+ * abstract values to represent performance rather than pure ratios.
*/
- if (hwp_active) {
- intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
- } else {
- max_state = global.no_turbo || global.turbo_disabled ?
- cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
- turbo_max = cpu->pstate.turbo_pstate;
- }
- max_freq = max_state * cpu->pstate.scaling;
+ if (hwp_active)
+ intel_pstate_get_hwp_cap(cpu);
- max_policy_perf = max_state * policy_max / max_freq;
+ max_policy_perf = policy_max / scaling;
if (policy_max == policy_min) {
min_policy_perf = max_policy_perf;
} else {
- min_policy_perf = max_state * policy_min / max_freq;
+ min_policy_perf = policy_min / scaling;
min_policy_perf = clamp_t(int32_t, min_policy_perf,
0, max_policy_perf);
}
- pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n",
- cpu->cpu, max_state, min_policy_perf, max_policy_perf);
+ pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n",
+ cpu->cpu, min_policy_perf, max_policy_perf);
/* Normalize user input to [min_perf, max_perf] */
if (per_cpu_limits) {
cpu->min_perf_ratio = min_policy_perf;
cpu->max_perf_ratio = max_policy_perf;
} else {
+ int turbo_max = cpu->pstate.turbo_pstate;
int32_t global_min, global_max;
/* Global limits are in percent of the maximum turbo P-state. */
@@ -2322,10 +2311,9 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu,
update_turbo_state();
if (hwp_active) {
- int max_state, turbo_max;
-
- intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
- max_freq = max_state * cpu->pstate.scaling;
+ intel_pstate_get_hwp_cap(cpu);
+ max_freq = global.no_turbo || global.turbo_disabled ?
+ cpu->pstate.max_freq : cpu->pstate.turbo_freq;
} else {
max_freq = intel_pstate_get_max_freq(cpu);
}
@@ -2416,25 +2404,15 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
cpu->max_perf_ratio = 0xFF;
cpu->min_perf_ratio = 0;
- policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
- policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
-
/* cpuinfo and default policy values */
policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
update_turbo_state();
global.turbo_disabled_mf = global.turbo_disabled;
policy->cpuinfo.max_freq = global.turbo_disabled ?
- cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
- policy->cpuinfo.max_freq *= cpu->pstate.scaling;
-
- if (hwp_active) {
- unsigned int max_freq;
-
- max_freq = global.turbo_disabled ?
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
- if (max_freq < policy->cpuinfo.max_freq)
- policy->cpuinfo.max_freq = max_freq;
- }
+
+ policy->min = policy->cpuinfo.min_freq;
+ policy->max = policy->cpuinfo.max_freq;
intel_pstate_init_acpi_perf_limits(policy);
@@ -2683,10 +2661,10 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum,
static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
- int max_state, turbo_max, min_freq, max_freq, ret;
struct freq_qos_request *req;
struct cpudata *cpu;
struct device *dev;
+ int ret, freq;
dev = get_cpu_device(policy->cpu);
if (!dev)
@@ -2711,30 +2689,31 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (hwp_active) {
u64 value;
- intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state);
policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP;
+
+ intel_pstate_get_hwp_cap(cpu);
+
rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
WRITE_ONCE(cpu->hwp_req_cached, value);
+
cpu->epp_cached = intel_pstate_get_epp(cpu, value);
} else {
- turbo_max = cpu->pstate.turbo_pstate;
policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY;
}
- min_freq = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100);
- min_freq *= cpu->pstate.scaling;
- max_freq = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
- max_freq *= cpu->pstate.scaling;
+ freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.min_perf_pct, 100);
ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN,
- min_freq);
+ freq);
if (ret < 0) {
dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
goto free_req;
}
+ freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.max_perf_pct, 100);
+
ret = freq_qos_add_request(&policy->constraints, req + 1, FREQ_QOS_MAX,
- max_freq);
+ freq);
if (ret < 0) {
dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
goto remove_min_req;
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index 69786e5bbf05..ad7d4f272ddc 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -91,7 +91,7 @@ static DEFINE_MUTEX(set_freq_lock);
/* Use 800MHz when entering sleep mode */
#define SLEEP_FREQ (800 * 1000)
-/* Tracks if cpu freqency can be updated anymore */
+/* Tracks if CPU frequency can be updated anymore */
static bool no_cpufreq_access;
/*
@@ -190,7 +190,7 @@ static u32 clkdiv_val[5][11] = {
/*
* This function set DRAM refresh counter
- * accoriding to operating frequency of DRAM
+ * according to operating frequency of DRAM
* ch: DMC port number 0 or 1
* freq: Operating frequency of DRAM(KHz)
*/
@@ -320,7 +320,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index)
/*
* 3. DMC1 refresh count for 133Mhz if (index == L4) is
- * true refresh counter is already programed in upper
+ * true refresh counter is already programmed in upper
* code. 0x287@83Mhz
*/
if (!bus_speed_changing)
@@ -378,7 +378,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index)
/*
* 6. Turn on APLL
* 6-1. Set PMS values
- * 6-2. Wait untile the PLL is locked
+ * 6-2. Wait until the PLL is locked
*/
if (index == L0)
writel_relaxed(APLL_VAL_1000, S5P_APLL_CON);
@@ -390,7 +390,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index)
} while (!(reg & (0x1 << 29)));
/*
- * 7. Change souce clock from SCLKMPLL(667Mhz)
+ * 7. Change source clock from SCLKMPLL(667Mhz)
* to SCLKA2M(200Mhz) in MFC_MUX and G3D MUX
* (667/4=166)->(200/4=50)Mhz
*/
@@ -439,8 +439,8 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index)
}
/*
- * L4 level need to change memory bus speed, hence onedram clock divier
- * and memory refresh parameter should be changed
+ * L4 level needs to change memory bus speed, hence ONEDRAM clock
+ * divider and memory refresh parameter should be changed
*/
if (bus_speed_changing) {
reg = readl_relaxed(S5P_CLK_DIV6);
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index 0844fadc4be8..334f83e56120 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -107,7 +107,7 @@ config ARM_TEGRA_CPUIDLE
config ARM_QCOM_SPM_CPUIDLE
bool "CPU Idle Driver for Qualcomm Subsystem Power Manager (SPM)"
- depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64
+ depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64 && MMU
select ARM_CPU_SUSPEND
select CPU_IDLE_MULTIPLE_DRIVERS
select DT_IDLE_STATES
diff --git a/drivers/cpuidle/cpuidle-tegra.c b/drivers/cpuidle/cpuidle-tegra.c
index 191966dc8d02..508bd9f23792 100644
--- a/drivers/cpuidle/cpuidle-tegra.c
+++ b/drivers/cpuidle/cpuidle-tegra.c
@@ -48,11 +48,6 @@ enum tegra_state {
static atomic_t tegra_idle_barrier;
static atomic_t tegra_abort_flag;
-static inline bool tegra_cpuidle_using_firmware(void)
-{
- return firmware_ops->prepare_idle && firmware_ops->do_idle;
-}
-
static void tegra_cpuidle_report_cpus_state(void)
{
unsigned long cpu, lcpu, csr;
@@ -135,13 +130,9 @@ static int tegra_cpuidle_c7_enter(void)
{
int err;
- if (tegra_cpuidle_using_firmware()) {
- err = call_firmware_op(prepare_idle, TF_PM_MODE_LP2_NOFLUSH_L2);
- if (err)
- return err;
-
- return call_firmware_op(do_idle, 0);
- }
+ err = call_firmware_op(prepare_idle, TF_PM_MODE_LP2_NOFLUSH_L2);
+ if (err && err != -ENOSYS)
+ return err;
return cpu_suspend(0, tegra30_pm_secondary_cpu_suspend);
}
@@ -356,9 +347,7 @@ static int tegra_cpuidle_probe(struct platform_device *pdev)
* is disabled.
*/
if (!IS_ENABLED(CONFIG_PM_SLEEP)) {
- if (!tegra_cpuidle_using_firmware())
- tegra_cpuidle_disable_state(TEGRA_C7);
-
+ tegra_cpuidle_disable_state(TEGRA_C7);
tegra_cpuidle_disable_state(TEGRA_CC6);
}
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 4070e573bf43..f70aa17e2a8e 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -181,9 +181,13 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
*/
if (s->target_residency > 0)
s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
+ else if (s->target_residency_ns < 0)
+ s->target_residency_ns = 0;
if (s->exit_latency > 0)
s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
+ else if (s->exit_latency_ns < 0)
+ s->exit_latency_ns = 0;
}
}
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index b0a7ad566081..c3aa8d6ccee3 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -271,7 +271,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
u64 predicted_ns;
u64 interactivity_req;
unsigned long nr_iowaiters;
- ktime_t delta_next;
+ ktime_t delta, delta_tick;
int i, idx;
if (data->needs_update) {
@@ -280,7 +280,12 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
}
/* determine the expected residency time, round up */
- data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next);
+ delta = tick_nohz_get_sleep_length(&delta_tick);
+ if (unlikely(delta < 0)) {
+ delta = 0;
+ delta_tick = 0;
+ }
+ data->next_timer_ns = delta;
nr_iowaiters = nr_iowait_cpu(dev->cpu);
data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
@@ -318,7 +323,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* state selection.
*/
if (predicted_ns < TICK_NSEC)
- predicted_ns = delta_next;
+ predicted_ns = data->next_timer_ns;
} else {
/*
* Use the performance multiplier and the user-configurable
@@ -377,7 +382,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* stuck in the shallow one for too long.
*/
if (drv->states[idx].target_residency_ns < TICK_NSEC &&
- s->target_residency_ns <= delta_next)
+ s->target_residency_ns <= delta_tick)
idx = i;
return idx;
@@ -399,7 +404,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
*stop_tick = false;
- if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) {
+ if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick) {
/*
* The tick is not going to be stopped and the target
* residency of the state to be returned is not within
@@ -411,7 +416,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
continue;
idx = i;
- if (drv->states[i].target_residency_ns <= delta_next)
+ if (drv->states[i].target_residency_ns <= delta_tick)
break;
}
}
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 6deaaf5f05b5..ac4bb27d69b0 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -100,8 +100,8 @@ struct teo_idle_state {
* @intervals: Saved idle duration values.
*/
struct teo_cpu {
- u64 time_span_ns;
- u64 sleep_length_ns;
+ s64 time_span_ns;
+ s64 sleep_length_ns;
struct teo_idle_state states[CPUIDLE_STATE_MAX];
int interval_idx;
u64 intervals[INTERVALS];
@@ -117,7 +117,8 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
- int i, idx_hit = -1, idx_timer = -1;
+ int i, idx_hit = 0, idx_timer = 0;
+ unsigned int hits, misses;
u64 measured_ns;
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
@@ -174,25 +175,22 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* also increase the "early hits" metric for the state that actually
* matches the measured idle duration.
*/
- if (idx_timer >= 0) {
- unsigned int hits = cpu_data->states[idx_timer].hits;
- unsigned int misses = cpu_data->states[idx_timer].misses;
-
- hits -= hits >> DECAY_SHIFT;
- misses -= misses >> DECAY_SHIFT;
-
- if (idx_timer > idx_hit) {
- misses += PULSE;
- if (idx_hit >= 0)
- cpu_data->states[idx_hit].early_hits += PULSE;
- } else {
- hits += PULSE;
- }
+ hits = cpu_data->states[idx_timer].hits;
+ hits -= hits >> DECAY_SHIFT;
+
+ misses = cpu_data->states[idx_timer].misses;
+ misses -= misses >> DECAY_SHIFT;
- cpu_data->states[idx_timer].misses = misses;
- cpu_data->states[idx_timer].hits = hits;
+ if (idx_timer == idx_hit) {
+ hits += PULSE;
+ } else {
+ misses += PULSE;
+ cpu_data->states[idx_hit].early_hits += PULSE;
}
+ cpu_data->states[idx_timer].misses = misses;
+ cpu_data->states[idx_timer].hits = hits;
+
/*
* Save idle duration values corresponding to non-timer wakeups for
* pattern detection.
@@ -216,7 +214,7 @@ static bool teo_time_ok(u64 interval_ns)
*/
static int teo_find_shallower_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int state_idx,
- u64 duration_ns)
+ s64 duration_ns)
{
int i;
@@ -242,10 +240,10 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
- u64 duration_ns;
+ int max_early_idx, prev_max_early_idx, constraint_idx, idx0, idx, i;
unsigned int hits, misses, early_hits;
- int max_early_idx, prev_max_early_idx, constraint_idx, idx, i;
ktime_t delta_tick;
+ s64 duration_ns;
if (dev->last_state_idx >= 0) {
teo_update(drv, dev);
@@ -264,6 +262,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
prev_max_early_idx = -1;
constraint_idx = drv->state_count;
idx = -1;
+ idx0 = idx;
for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
@@ -324,6 +323,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
idx = i; /* first enabled state */
hits = cpu_data->states[i].hits;
misses = cpu_data->states[i].misses;
+ idx0 = i;
}
if (s->target_residency_ns > duration_ns)
@@ -376,11 +376,16 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (idx < 0) {
idx = 0; /* No states enabled. Must use 0. */
- } else if (idx > 0) {
+ } else if (idx > idx0) {
unsigned int count = 0;
u64 sum = 0;
/*
+ * The target residencies of at least two different enabled idle
+ * states are less than or equal to the current expected idle
+ * duration. Try to refine the selection using the most recent
+ * measured idle duration values.
+ *
* Count and sum the most recent idle duration values less than
* the current expected idle duration value.
*/
@@ -428,7 +433,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* till the closest timer including the tick, try to correct
* that.
*/
- if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick)
+ if (idx > idx0 &&
+ drv->states[idx].target_residency_ns > delta_tick)
idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
}
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 3273360f30f7..ec1b9d306ba6 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -744,8 +744,8 @@ static struct cpuidle_state icx_cstates[] __initdata = {
.name = "C6",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
- .exit_latency = 128,
- .target_residency = 384,
+ .exit_latency = 170,
+ .target_residency = 600,
.enter = &intel_idle,
.enter_s2idle = intel_idle_s2idle, },
{
@@ -1156,6 +1156,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl),
X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx),
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 16a17215f633..e4d4e399004b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1870,20 +1870,10 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
int err;
int i, bars = 0;
- /*
- * Power state could be unknown at this point, either due to a fresh
- * boot or a device removal call. So get the current power state
- * so that things like MSI message writing will behave as expected
- * (e.g. if the device really is in D0 at enable time).
- */
- if (dev->pm_cap) {
- u16 pmcsr;
- pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
- dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
- }
-
- if (atomic_inc_return(&dev->enable_cnt) > 1)
+ if (atomic_inc_return(&dev->enable_cnt) > 1) {
+ pci_update_current_state(dev, dev->current_state);
return 0; /* already enabled */
+ }
bridge = pci_upstream_bridge(dev);
if (bridge)
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index fdda2a737186..73cf68af9770 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -1069,6 +1069,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd),
X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd),
+ X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd),
{}
};
MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 78213d4b5b16..cc3b22881bfe 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -150,6 +150,7 @@ static int rapl_msr_probe(struct platform_device *pdev)
case X86_VENDOR_INTEL:
rapl_msr_priv = &rapl_msr_priv_intel;
break;
+ case X86_VENDOR_HYGON:
case X86_VENDOR_AMD:
rapl_msr_priv = &rapl_msr_priv_amd;
break;