diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/base/arch_topology.c | 89 | ||||
-rw-r--r-- | drivers/base/power/domain.c | 36 | ||||
-rw-r--r-- | drivers/base/power/wakeup_stats.c | 4 | ||||
-rw-r--r-- | drivers/clk/mvebu/armada-37xx-periph.c | 83 | ||||
-rw-r--r-- | drivers/cpufreq/Kconfig | 23 | ||||
-rw-r--r-- | drivers/cpufreq/Kconfig.arm | 10 | ||||
-rw-r--r-- | drivers/cpufreq/armada-37xx-cpufreq.c | 111 | ||||
-rw-r--r-- | drivers/cpufreq/cppc_cpufreq.c | 259 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq-dt.c | 9 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 3 | ||||
-rw-r--r-- | drivers/cpufreq/ia64-acpi-cpufreq.c | 4 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 107 | ||||
-rw-r--r-- | drivers/cpufreq/s5pv210-cpufreq.c | 14 | ||||
-rw-r--r-- | drivers/cpuidle/Kconfig.arm | 2 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-tegra.c | 19 | ||||
-rw-r--r-- | drivers/cpuidle/driver.c | 4 | ||||
-rw-r--r-- | drivers/cpuidle/governors/menu.c | 17 | ||||
-rw-r--r-- | drivers/cpuidle/governors/teo.c | 54 | ||||
-rw-r--r-- | drivers/idle/intel_idle.c | 5 | ||||
-rw-r--r-- | drivers/pci/pci.c | 16 | ||||
-rw-r--r-- | drivers/powercap/intel_rapl_common.c | 1 | ||||
-rw-r--r-- | drivers/powercap/intel_rapl_msr.c | 1 |
22 files changed, 586 insertions, 285 deletions
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index de8587cc119e..c1179edc0f3b 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -21,17 +21,94 @@ #include <linux/sched.h> #include <linux/smp.h> +static DEFINE_PER_CPU(struct scale_freq_data *, sft_data); +static struct cpumask scale_freq_counters_mask; +static bool scale_freq_invariant; + +static bool supports_scale_freq_counters(const struct cpumask *cpus) +{ + return cpumask_subset(cpus, &scale_freq_counters_mask); +} + bool topology_scale_freq_invariant(void) { return cpufreq_supports_freq_invariance() || - arch_freq_counters_available(cpu_online_mask); + supports_scale_freq_counters(cpu_online_mask); } -__weak bool arch_freq_counters_available(const struct cpumask *cpus) +static void update_scale_freq_invariant(bool status) { - return false; + if (scale_freq_invariant == status) + return; + + /* + * Task scheduler behavior depends on frequency invariance support, + * either cpufreq or counter driven. If the support status changes as + * a result of counter initialisation and use, retrigger the build of + * scheduling domains to ensure the information is propagated properly. + */ + if (topology_scale_freq_invariant() == status) { + scale_freq_invariant = status; + rebuild_sched_domains_energy(); + } } -DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE; + +void topology_set_scale_freq_source(struct scale_freq_data *data, + const struct cpumask *cpus) +{ + struct scale_freq_data *sfd; + int cpu; + + /* + * Avoid calling rebuild_sched_domains() unnecessarily if FIE is + * supported by cpufreq. + */ + if (cpumask_empty(&scale_freq_counters_mask)) + scale_freq_invariant = topology_scale_freq_invariant(); + + for_each_cpu(cpu, cpus) { + sfd = per_cpu(sft_data, cpu); + + /* Use ARCH provided counters whenever possible */ + if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) { + per_cpu(sft_data, cpu) = data; + cpumask_set_cpu(cpu, &scale_freq_counters_mask); + } + } + + update_scale_freq_invariant(true); +} +EXPORT_SYMBOL_GPL(topology_set_scale_freq_source); + +void topology_clear_scale_freq_source(enum scale_freq_source source, + const struct cpumask *cpus) +{ + struct scale_freq_data *sfd; + int cpu; + + for_each_cpu(cpu, cpus) { + sfd = per_cpu(sft_data, cpu); + + if (sfd && sfd->source == source) { + per_cpu(sft_data, cpu) = NULL; + cpumask_clear_cpu(cpu, &scale_freq_counters_mask); + } + } + + update_scale_freq_invariant(false); +} +EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source); + +void topology_scale_freq_tick(void) +{ + struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data); + + if (sfd) + sfd->set_freq_scale(); +} + +DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; +EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale); void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, unsigned long max_freq) @@ -47,13 +124,13 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq, * want to update the scale factor with information from CPUFREQ. * Instead the scale factor will be updated from arch_scale_freq_tick. */ - if (arch_freq_counters_available(cpus)) + if (supports_scale_freq_counters(cpus)) return; scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq; for_each_cpu(i, cpus) - per_cpu(freq_scale, i) = scale; + per_cpu(arch_freq_scale, i) = scale; } DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE; diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 78c310d3179d..b6a782c31613 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1088,34 +1088,6 @@ static void genpd_sync_power_on(struct generic_pm_domain *genpd, bool use_lock, } /** - * resume_needed - Check whether to resume a device before system suspend. - * @dev: Device to check. - * @genpd: PM domain the device belongs to. - * - * There are two cases in which a device that can wake up the system from sleep - * states should be resumed by genpd_prepare(): (1) if the device is enabled - * to wake up the system and it has to remain active for this purpose while the - * system is in the sleep state and (2) if the device is not enabled to wake up - * the system from sleep states and it generally doesn't generate wakeup signals - * by itself (those signals are generated on its behalf by other parts of the - * system). In the latter case it may be necessary to reconfigure the device's - * wakeup settings during system suspend, because it may have been set up to - * signal remote wakeup from the system's working state as needed by runtime PM. - * Return 'true' in either of the above cases. - */ -static bool resume_needed(struct device *dev, - const struct generic_pm_domain *genpd) -{ - bool active_wakeup; - - if (!device_can_wakeup(dev)) - return false; - - active_wakeup = genpd_is_active_wakeup(genpd); - return device_may_wakeup(dev) ? active_wakeup : !active_wakeup; -} - -/** * genpd_prepare - Start power transition of a device in a PM domain. * @dev: Device to start the transition of. * @@ -1135,14 +1107,6 @@ static int genpd_prepare(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - /* - * If a wakeup request is pending for the device, it should be woken up - * at this point and a system wakeup event should be reported if it's - * set up to wake up the system from sleep states. - */ - if (resume_needed(dev, genpd)) - pm_runtime_resume(dev); - genpd_lock(genpd); if (genpd->prepared_count++ == 0) diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c index d638259b829a..924fac493c4f 100644 --- a/drivers/base/power/wakeup_stats.c +++ b/drivers/base/power/wakeup_stats.c @@ -137,7 +137,7 @@ static struct device *wakeup_source_device_create(struct device *parent, struct wakeup_source *ws) { struct device *dev = NULL; - int retval = -ENODEV; + int retval; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { @@ -154,7 +154,7 @@ static struct device *wakeup_source_device_create(struct device *parent, dev_set_drvdata(dev, ws); device_set_pm_not_required(dev); - retval = kobject_set_name(&dev->kobj, "wakeup%d", ws->id); + retval = dev_set_name(dev, "wakeup%d", ws->id); if (retval) goto error; diff --git a/drivers/clk/mvebu/armada-37xx-periph.c b/drivers/clk/mvebu/armada-37xx-periph.c index f5746f9ea929..32ac6b6b7530 100644 --- a/drivers/clk/mvebu/armada-37xx-periph.c +++ b/drivers/clk/mvebu/armada-37xx-periph.c @@ -84,6 +84,7 @@ struct clk_pm_cpu { void __iomem *reg_div; u8 shift_div; struct regmap *nb_pm_base; + unsigned long l1_expiration; }; #define to_clk_double_div(_hw) container_of(_hw, struct clk_double_div, hw) @@ -440,33 +441,6 @@ static u8 clk_pm_cpu_get_parent(struct clk_hw *hw) return val; } -static int clk_pm_cpu_set_parent(struct clk_hw *hw, u8 index) -{ - struct clk_pm_cpu *pm_cpu = to_clk_pm_cpu(hw); - struct regmap *base = pm_cpu->nb_pm_base; - int load_level; - - /* - * We set the clock parent only if the DVFS is available but - * not enabled. - */ - if (IS_ERR(base) || armada_3700_pm_dvfs_is_enabled(base)) - return -EINVAL; - - /* Set the parent clock for all the load level */ - for (load_level = 0; load_level < LOAD_LEVEL_NR; load_level++) { - unsigned int reg, mask, val, - offset = ARMADA_37XX_NB_TBG_SEL_OFF; - - armada_3700_pm_dvfs_update_regs(load_level, ®, &offset); - - val = index << offset; - mask = ARMADA_37XX_NB_TBG_SEL_MASK << offset; - regmap_update_bits(base, reg, mask, val); - } - return 0; -} - static unsigned long clk_pm_cpu_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) { @@ -514,8 +488,10 @@ static long clk_pm_cpu_round_rate(struct clk_hw *hw, unsigned long rate, } /* - * Switching the CPU from the L2 or L3 frequencies (300 and 200 Mhz - * respectively) to L0 frequency (1.2 Ghz) requires a significant + * Workaround when base CPU frequnecy is 1000 or 1200 MHz + * + * Switching the CPU from the L2 or L3 frequencies (250/300 or 200 MHz + * respectively) to L0 frequency (1/1.2 GHz) requires a significant * amount of time to let VDD stabilize to the appropriate * voltage. This amount of time is large enough that it cannot be * covered by the hardware countdown register. Due to this, the CPU @@ -525,26 +501,56 @@ static long clk_pm_cpu_round_rate(struct clk_hw *hw, unsigned long rate, * To work around this problem, we prevent switching directly from the * L2/L3 frequencies to the L0 frequency, and instead switch to the L1 * frequency in-between. The sequence therefore becomes: - * 1. First switch from L2/L3(200/300MHz) to L1(600MHZ) + * 1. First switch from L2/L3 (200/250/300 MHz) to L1 (500/600 MHz) * 2. Sleep 20ms for stabling VDD voltage - * 3. Then switch from L1(600MHZ) to L0(1200Mhz). + * 3. Then switch from L1 (500/600 MHz) to L0 (1000/1200 MHz). */ -static void clk_pm_cpu_set_rate_wa(unsigned long rate, struct regmap *base) +static void clk_pm_cpu_set_rate_wa(struct clk_pm_cpu *pm_cpu, + unsigned int new_level, unsigned long rate, + struct regmap *base) { unsigned int cur_level; - if (rate != 1200 * 1000 * 1000) - return; - regmap_read(base, ARMADA_37XX_NB_CPU_LOAD, &cur_level); cur_level &= ARMADA_37XX_NB_CPU_LOAD_MASK; - if (cur_level <= ARMADA_37XX_DVFS_LOAD_1) + + if (cur_level == new_level) + return; + + /* + * System wants to go to L1 on its own. If we are going from L2/L3, + * remember when 20ms will expire. If from L0, set the value so that + * next switch to L0 won't have to wait. + */ + if (new_level == ARMADA_37XX_DVFS_LOAD_1) { + if (cur_level == ARMADA_37XX_DVFS_LOAD_0) + pm_cpu->l1_expiration = jiffies; + else + pm_cpu->l1_expiration = jiffies + msecs_to_jiffies(20); return; + } + + /* + * If we are setting to L2/L3, just invalidate L1 expiration time, + * sleeping is not needed. + */ + if (rate < 1000*1000*1000) + goto invalidate_l1_exp; + + /* + * We are going to L0 with rate >= 1GHz. Check whether we have been at + * L1 for long enough time. If not, go to L1 for 20ms. + */ + if (pm_cpu->l1_expiration && jiffies >= pm_cpu->l1_expiration) + goto invalidate_l1_exp; regmap_update_bits(base, ARMADA_37XX_NB_CPU_LOAD, ARMADA_37XX_NB_CPU_LOAD_MASK, ARMADA_37XX_DVFS_LOAD_1); msleep(20); + +invalidate_l1_exp: + pm_cpu->l1_expiration = 0; } static int clk_pm_cpu_set_rate(struct clk_hw *hw, unsigned long rate, @@ -578,7 +584,9 @@ static int clk_pm_cpu_set_rate(struct clk_hw *hw, unsigned long rate, reg = ARMADA_37XX_NB_CPU_LOAD; mask = ARMADA_37XX_NB_CPU_LOAD_MASK; - clk_pm_cpu_set_rate_wa(rate, base); + /* Apply workaround when base CPU frequency is 1000 or 1200 MHz */ + if (parent_rate >= 1000*1000*1000) + clk_pm_cpu_set_rate_wa(pm_cpu, load_level, rate, base); regmap_update_bits(base, reg, mask, load_level); @@ -592,7 +600,6 @@ static int clk_pm_cpu_set_rate(struct clk_hw *hw, unsigned long rate, static const struct clk_ops clk_pm_cpu_ops = { .get_parent = clk_pm_cpu_get_parent, - .set_parent = clk_pm_cpu_set_parent, .round_rate = clk_pm_cpu_round_rate, .set_rate = clk_pm_cpu_set_rate, .recalc_rate = clk_pm_cpu_recalc_rate, diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 85de313ddec2..c3038cdc6865 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -13,7 +13,8 @@ config CPU_FREQ clock speed, you need to either enable a dynamic cpufreq governor (see below) after boot, or use a userspace tool. - For details, take a look at <file:Documentation/cpu-freq>. + For details, take a look at + <file:Documentation/admin-guide/pm/cpufreq.rst>. If in doubt, say N. @@ -140,8 +141,6 @@ config CPU_FREQ_GOV_USERSPACE To compile this driver as a module, choose M here: the module will be called cpufreq_userspace. - For details, take a look at <file:Documentation/cpu-freq/>. - If in doubt, say Y. config CPU_FREQ_GOV_ONDEMAND @@ -158,7 +157,8 @@ config CPU_FREQ_GOV_ONDEMAND To compile this driver as a module, choose M here: the module will be called cpufreq_ondemand. - For details, take a look at linux/Documentation/cpu-freq. + For details, take a look at + <file:Documentation/admin-guide/pm/cpufreq.rst>. If in doubt, say N. @@ -182,7 +182,8 @@ config CPU_FREQ_GOV_CONSERVATIVE To compile this driver as a module, choose M here: the module will be called cpufreq_conservative. - For details, take a look at linux/Documentation/cpu-freq. + For details, take a look at + <file:Documentation/admin-guide/pm/cpufreq.rst>. If in doubt, say N. @@ -246,8 +247,6 @@ config IA64_ACPI_CPUFREQ This driver adds a CPUFreq driver which utilizes the ACPI Processor Performance States. - For details, take a look at <file:Documentation/cpu-freq/>. - If in doubt, say N. endif @@ -271,8 +270,6 @@ config LOONGSON2_CPUFREQ Loongson2F and it's successors support this feature. - For details, take a look at <file:Documentation/cpu-freq/>. - If in doubt, say N. config LOONGSON1_CPUFREQ @@ -282,8 +279,6 @@ config LOONGSON1_CPUFREQ This option adds a CPUFreq driver for loongson1 processors which support software configurable cpu frequency. - For details, take a look at <file:Documentation/cpu-freq/>. - If in doubt, say N. endif @@ -293,8 +288,6 @@ config SPARC_US3_CPUFREQ help This adds the CPUFreq driver for UltraSPARC-III processors. - For details, take a look at <file:Documentation/cpu-freq>. - If in doubt, say N. config SPARC_US2E_CPUFREQ @@ -302,8 +295,6 @@ config SPARC_US2E_CPUFREQ help This adds the CPUFreq driver for UltraSPARC-IIe processors. - For details, take a look at <file:Documentation/cpu-freq>. - If in doubt, say N. endif @@ -318,8 +309,6 @@ config SH_CPU_FREQ will also generate a notice in the boot log before disabling itself if the CPU in question is not capable of rate rounding. - For details, take a look at <file:Documentation/cpu-freq>. - If unsure, say N. endif diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index e65e0a43be64..a5c5f70acfc9 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -19,6 +19,16 @@ config ACPI_CPPC_CPUFREQ If in doubt, say N. +config ACPI_CPPC_CPUFREQ_FIE + bool "Frequency Invariance support for CPPC cpufreq driver" + depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY + default y + help + This extends frequency invariance support in the CPPC cpufreq driver, + by using CPPC delivered and reference performance counters. + + If in doubt, say N. + config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM tristate "Allwinner nvmem based SUN50I CPUFreq driver" depends on ARCH_SUNXI diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index b4af4094309b..3fc98a3ffd91 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -25,6 +25,10 @@ #include "cpufreq-dt.h" +/* Clk register set */ +#define ARMADA_37XX_CLK_TBG_SEL 0 +#define ARMADA_37XX_CLK_TBG_SEL_CPU_OFF 22 + /* Power management in North Bridge register set */ #define ARMADA_37XX_NB_L0L1 0x18 #define ARMADA_37XX_NB_L2L3 0x1C @@ -69,6 +73,8 @@ #define LOAD_LEVEL_NR 4 #define MIN_VOLT_MV 1000 +#define MIN_VOLT_MV_FOR_L1_1000MHZ 1108 +#define MIN_VOLT_MV_FOR_L1_1200MHZ 1155 /* AVS value for the corresponding voltage (in mV) */ static int avs_map[] = { @@ -80,6 +86,8 @@ static int avs_map[] = { }; struct armada37xx_cpufreq_state { + struct platform_device *pdev; + struct device *cpu_dev; struct regmap *regmap; u32 nb_l0l1; u32 nb_l2l3; @@ -120,10 +128,15 @@ static struct armada_37xx_dvfs *armada_37xx_cpu_freq_info_get(u32 freq) * will be configured then the DVFS will be enabled. */ static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base, - struct clk *clk, u8 *divider) + struct regmap *clk_base, u8 *divider) { + u32 cpu_tbg_sel; int load_lvl; - struct clk *parent; + + /* Determine to which TBG clock is CPU connected */ + regmap_read(clk_base, ARMADA_37XX_CLK_TBG_SEL, &cpu_tbg_sel); + cpu_tbg_sel >>= ARMADA_37XX_CLK_TBG_SEL_CPU_OFF; + cpu_tbg_sel &= ARMADA_37XX_NB_TBG_SEL_MASK; for (load_lvl = 0; load_lvl < LOAD_LEVEL_NR; load_lvl++) { unsigned int reg, mask, val, offset = 0; @@ -142,6 +155,11 @@ static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base, mask = (ARMADA_37XX_NB_CLK_SEL_MASK << ARMADA_37XX_NB_CLK_SEL_OFF); + /* Set TBG index, for all levels we use the same TBG */ + val = cpu_tbg_sel << ARMADA_37XX_NB_TBG_SEL_OFF; + mask = (ARMADA_37XX_NB_TBG_SEL_MASK + << ARMADA_37XX_NB_TBG_SEL_OFF); + /* * Set cpu divider based on the pre-computed array in * order to have balanced step. @@ -160,14 +178,6 @@ static void __init armada37xx_cpufreq_dvfs_setup(struct regmap *base, regmap_update_bits(base, reg, mask, val); } - - /* - * Set cpu clock source, for all the level we keep the same - * clock source that the one already configured. For this one - * we need to use the clock framework - */ - parent = clk_get_parent(clk); - clk_set_parent(clk, parent); } /* @@ -202,6 +212,8 @@ static u32 armada_37xx_avs_val_match(int target_vm) * - L2 & L3 voltage should be about 150mv smaller than L0 voltage. * This function calculates L1 & L2 & L3 AVS values dynamically based * on L0 voltage and fill all AVS values to the AVS value table. + * When base CPU frequency is 1000 or 1200 MHz then there is additional + * minimal avs value for load L1. */ static void __init armada37xx_cpufreq_avs_configure(struct regmap *base, struct armada_37xx_dvfs *dvfs) @@ -233,6 +245,19 @@ static void __init armada37xx_cpufreq_avs_configure(struct regmap *base, for (load_level = 1; load_level < LOAD_LEVEL_NR; load_level++) dvfs->avs[load_level] = avs_min; + /* + * Set the avs values for load L0 and L1 when base CPU frequency + * is 1000/1200 MHz to its typical initial values according to + * the Armada 3700 Hardware Specifications. + */ + if (dvfs->cpu_freq_max >= 1000*1000*1000) { + if (dvfs->cpu_freq_max >= 1200*1000*1000) + avs_min = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1200MHZ); + else + avs_min = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1000MHZ); + dvfs->avs[0] = dvfs->avs[1] = avs_min; + } + return; } @@ -252,6 +277,26 @@ static void __init armada37xx_cpufreq_avs_configure(struct regmap *base, target_vm = avs_map[l0_vdd_min] - 150; target_vm = target_vm > MIN_VOLT_MV ? target_vm : MIN_VOLT_MV; dvfs->avs[2] = dvfs->avs[3] = armada_37xx_avs_val_match(target_vm); + + /* + * Fix the avs value for load L1 when base CPU frequency is 1000/1200 MHz, + * otherwise the CPU gets stuck when switching from load L1 to load L0. + * Also ensure that avs value for load L1 is not higher than for L0. + */ + if (dvfs->cpu_freq_max >= 1000*1000*1000) { + u32 avs_min_l1; + + if (dvfs->cpu_freq_max >= 1200*1000*1000) + avs_min_l1 = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1200MHZ); + else + avs_min_l1 = armada_37xx_avs_val_match(MIN_VOLT_MV_FOR_L1_1000MHZ); + + if (avs_min_l1 > dvfs->avs[0]) + avs_min_l1 = dvfs->avs[0]; + + if (dvfs->avs[1] < avs_min_l1) + dvfs->avs[1] = avs_min_l1; + } } static void __init armada37xx_cpufreq_avs_setup(struct regmap *base, @@ -357,12 +402,17 @@ static int __init armada37xx_cpufreq_driver_init(void) struct armada_37xx_dvfs *dvfs; struct platform_device *pdev; unsigned long freq; - unsigned int cur_frequency, base_frequency; - struct regmap *nb_pm_base, *avs_base; + unsigned int base_frequency; + struct regmap *nb_clk_base, *nb_pm_base, *avs_base; struct device *cpu_dev; int load_lvl, ret; struct clk *clk, *parent; + nb_clk_base = + syscon_regmap_lookup_by_compatible("marvell,armada-3700-periph-clock-nb"); + if (IS_ERR(nb_clk_base)) + return -ENODEV; + nb_pm_base = syscon_regmap_lookup_by_compatible("marvell,armada-3700-nb-pm"); @@ -413,15 +463,7 @@ static int __init armada37xx_cpufreq_driver_init(void) return -EINVAL; } - /* Get nominal (current) CPU frequency */ - cur_frequency = clk_get_rate(clk); - if (!cur_frequency) { - dev_err(cpu_dev, "Failed to get clock rate for CPU\n"); - clk_put(clk); - return -EINVAL; - } - - dvfs = armada_37xx_cpu_freq_info_get(cur_frequency); + dvfs = armada_37xx_cpu_freq_info_get(base_frequency); if (!dvfs) { clk_put(clk); return -EINVAL; @@ -439,7 +481,7 @@ static int __init armada37xx_cpufreq_driver_init(void) armada37xx_cpufreq_avs_configure(avs_base, dvfs); armada37xx_cpufreq_avs_setup(avs_base, dvfs); - armada37xx_cpufreq_dvfs_setup(nb_pm_base, clk, dvfs->divider); + armada37xx_cpufreq_dvfs_setup(nb_pm_base, nb_clk_base, dvfs->divider); clk_put(clk); for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; @@ -466,6 +508,9 @@ static int __init armada37xx_cpufreq_driver_init(void) if (ret) goto disable_dvfs; + armada37xx_cpufreq_state->cpu_dev = cpu_dev; + armada37xx_cpufreq_state->pdev = pdev; + platform_set_drvdata(pdev, dvfs); return 0; disable_dvfs: @@ -473,7 +518,7 @@ disable_dvfs: remove_opp: /* clean-up the already added opp before leaving */ while (load_lvl-- > ARMADA_37XX_DVFS_LOAD_0) { - freq = cur_frequency / dvfs->divider[load_lvl]; + freq = base_frequency / dvfs->divider[load_lvl]; dev_pm_opp_remove(cpu_dev, freq); } @@ -484,6 +529,26 @@ remove_opp: /* late_initcall, to guarantee the driver is loaded after A37xx clock driver */ late_initcall(armada37xx_cpufreq_driver_init); +static void __exit armada37xx_cpufreq_driver_exit(void) +{ + struct platform_device *pdev = armada37xx_cpufreq_state->pdev; + struct armada_37xx_dvfs *dvfs = platform_get_drvdata(pdev); + unsigned long freq; + int load_lvl; + + platform_device_unregister(pdev); + + armada37xx_cpufreq_disable_dvfs(armada37xx_cpufreq_state->regmap); + + for (load_lvl = ARMADA_37XX_DVFS_LOAD_0; load_lvl < LOAD_LEVEL_NR; load_lvl++) { + freq = dvfs->cpu_freq_max / dvfs->divider[load_lvl]; + dev_pm_opp_remove(armada37xx_cpufreq_state->cpu_dev, freq); + } + + kfree(armada37xx_cpufreq_state); +} +module_exit(armada37xx_cpufreq_driver_exit); + static const struct of_device_id __maybe_unused armada37xx_cpufreq_of_match[] = { { .compatible = "marvell,armada-3700-nb-pm" }, { }, diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c index 8a482c434ea6..3848b4c222e1 100644 --- a/drivers/cpufreq/cppc_cpufreq.c +++ b/drivers/cpufreq/cppc_cpufreq.c @@ -10,14 +10,18 @@ #define pr_fmt(fmt) "CPPC Cpufreq:" fmt +#include <linux/arch_topology.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/delay.h> #include <linux/cpu.h> #include <linux/cpufreq.h> #include <linux/dmi.h> +#include <linux/irq_work.h> +#include <linux/kthread.h> #include <linux/time.h> #include <linux/vmalloc.h> +#include <uapi/linux/sched/types.h> #include <asm/unaligned.h> @@ -57,6 +61,204 @@ static struct cppc_workaround_oem_info wa_info[] = { } }; +#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE + +/* Frequency invariance support */ +struct cppc_freq_invariance { + int cpu; + struct irq_work irq_work; + struct kthread_work work; + struct cppc_perf_fb_ctrs prev_perf_fb_ctrs; + struct cppc_cpudata *cpu_data; +}; + +static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); +static struct kthread_worker *kworker_fie; +static bool fie_disabled; + +static struct cpufreq_driver cppc_cpufreq_driver; +static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu); +static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, + struct cppc_perf_fb_ctrs fb_ctrs_t0, + struct cppc_perf_fb_ctrs fb_ctrs_t1); + +/** + * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance + * @work: The work item. + * + * The CPPC driver register itself with the topology core to provide its own + * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which + * gets called by the scheduler on every tick. + * + * Note that the arch specific counters have higher priority than CPPC counters, + * if available, though the CPPC driver doesn't need to have any special + * handling for that. + * + * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we + * reach here from hard-irq context), which then schedules a normal work item + * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable + * based on the counter updates since the last tick. + */ +static void cppc_scale_freq_workfn(struct kthread_work *work) +{ + struct cppc_freq_invariance *cppc_fi; + struct cppc_perf_fb_ctrs fb_ctrs = {0}; + struct cppc_cpudata *cpu_data; + unsigned long local_freq_scale; + u64 perf; + + cppc_fi = container_of(work, struct cppc_freq_invariance, work); + cpu_data = cppc_fi->cpu_data; + + if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) { + pr_warn("%s: failed to read perf counters\n", __func__); + return; + } + + cppc_fi->prev_perf_fb_ctrs = fb_ctrs; + perf = cppc_perf_from_fbctrs(cpu_data, cppc_fi->prev_perf_fb_ctrs, + fb_ctrs); + + perf <<= SCHED_CAPACITY_SHIFT; + local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf); + if (WARN_ON(local_freq_scale > 1024)) + local_freq_scale = 1024; + + per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale; +} + +static void cppc_irq_work(struct irq_work *irq_work) +{ + struct cppc_freq_invariance *cppc_fi; + + cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work); + kthread_queue_work(kworker_fie, &cppc_fi->work); +} + +static void cppc_scale_freq_tick(void) +{ + struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id()); + + /* + * cppc_get_perf_ctrs() can potentially sleep, call that from the right + * context. + */ + irq_work_queue(&cppc_fi->irq_work); +} + +static struct scale_freq_data cppc_sftd = { + .source = SCALE_FREQ_SOURCE_CPPC, + .set_freq_scale = cppc_scale_freq_tick, +}; + +static void cppc_freq_invariance_policy_init(struct cpufreq_policy *policy, + struct cppc_cpudata *cpu_data) +{ + struct cppc_perf_fb_ctrs fb_ctrs = {0}; + struct cppc_freq_invariance *cppc_fi; + int i, ret; + + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + if (fie_disabled) + return; + + for_each_cpu(i, policy->cpus) { + cppc_fi = &per_cpu(cppc_freq_inv, i); + cppc_fi->cpu = i; + cppc_fi->cpu_data = cpu_data; + kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn); + init_irq_work(&cppc_fi->irq_work, cppc_irq_work); + + ret = cppc_get_perf_ctrs(i, &fb_ctrs); + if (ret) { + pr_warn("%s: failed to read perf counters: %d\n", + __func__, ret); + fie_disabled = true; + } else { + cppc_fi->prev_perf_fb_ctrs = fb_ctrs; + } + } +} + +static void __init cppc_freq_invariance_init(void) +{ + struct sched_attr attr = { + .size = sizeof(struct sched_attr), + .sched_policy = SCHED_DEADLINE, + .sched_nice = 0, + .sched_priority = 0, + /* + * Fake (unused) bandwidth; workaround to "fix" + * priority inheritance. + */ + .sched_runtime = 1000000, + .sched_deadline = 10000000, + .sched_period = 10000000, + }; + int ret; + + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + if (fie_disabled) + return; + + kworker_fie = kthread_create_worker(0, "cppc_fie"); + if (IS_ERR(kworker_fie)) + return; + + ret = sched_setattr_nocheck(kworker_fie->task, &attr); + if (ret) { + pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__, + ret); + kthread_destroy_worker(kworker_fie); + return; + } + + /* Register for freq-invariance */ + topology_set_scale_freq_source(&cppc_sftd, cpu_present_mask); +} + +static void cppc_freq_invariance_exit(void) +{ + struct cppc_freq_invariance *cppc_fi; + int i; + + if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) + return; + + if (fie_disabled) + return; + + topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpu_present_mask); + + for_each_possible_cpu(i) { + cppc_fi = &per_cpu(cppc_freq_inv, i); + irq_work_sync(&cppc_fi->irq_work); + } + + kthread_destroy_worker(kworker_fie); + kworker_fie = NULL; +} + +#else +static inline void +cppc_freq_invariance_policy_init(struct cpufreq_policy *policy, + struct cppc_cpudata *cpu_data) +{ +} + +static inline void cppc_freq_invariance_init(void) +{ +} + +static inline void cppc_freq_invariance_exit(void) +{ +} +#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */ + /* Callback function used to retrieve the max frequency from DMI */ static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) { @@ -216,26 +418,16 @@ static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu) { unsigned long implementor = read_cpuid_implementor(); unsigned long part_num = read_cpuid_part_number(); - unsigned int delay_us = 0; switch (implementor) { case ARM_CPU_IMP_QCOM: switch (part_num) { case QCOM_CPU_PART_FALKOR_V1: case QCOM_CPU_PART_FALKOR: - delay_us = 10000; - break; - default: - delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC; - break; + return 10000; } - break; - default: - delay_us = cppc_get_transition_latency(cpu) / NSEC_PER_USEC; - break; } - - return delay_us; + return cppc_get_transition_latency(cpu) / NSEC_PER_USEC; } #else @@ -355,9 +547,12 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy) cpu_data->perf_ctrls.desired_perf = caps->highest_perf; ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); - if (ret) + if (ret) { pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", caps->highest_perf, cpu, ret); + } else { + cppc_freq_invariance_policy_init(policy, cpu_data); + } return ret; } @@ -370,12 +565,12 @@ static inline u64 get_delta(u64 t1, u64 t0) return (u32)t1 - (u32)t0; } -static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, - struct cppc_perf_fb_ctrs fb_ctrs_t0, - struct cppc_perf_fb_ctrs fb_ctrs_t1) +static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, + struct cppc_perf_fb_ctrs fb_ctrs_t0, + struct cppc_perf_fb_ctrs fb_ctrs_t1) { u64 delta_reference, delta_delivered; - u64 reference_perf, delivered_perf; + u64 reference_perf; reference_perf = fb_ctrs_t0.reference_perf; @@ -384,12 +579,21 @@ static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, delta_delivered = get_delta(fb_ctrs_t1.delivered, fb_ctrs_t0.delivered); - /* Check to avoid divide-by zero */ - if (delta_reference || delta_delivered) - delivered_perf = (reference_perf * delta_delivered) / - delta_reference; - else - delivered_perf = cpu_data->perf_ctrls.desired_perf; + /* Check to avoid divide-by zero and invalid delivered_perf */ + if (!delta_reference || !delta_delivered) + return cpu_data->perf_ctrls.desired_perf; + + return (reference_perf * delta_delivered) / delta_reference; +} + +static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, + struct cppc_perf_fb_ctrs fb_ctrs_t0, + struct cppc_perf_fb_ctrs fb_ctrs_t1) +{ + u64 delivered_perf; + + delivered_perf = cppc_perf_from_fbctrs(cpu_data, fb_ctrs_t0, + fb_ctrs_t1); return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); } @@ -514,6 +718,8 @@ static void cppc_check_hisi_workaround(void) static int __init cppc_cpufreq_init(void) { + int ret; + if ((acpi_disabled) || !acpi_cpc_valid()) return -ENODEV; @@ -521,7 +727,11 @@ static int __init cppc_cpufreq_init(void) cppc_check_hisi_workaround(); - return cpufreq_register_driver(&cppc_cpufreq_driver); + ret = cpufreq_register_driver(&cppc_cpufreq_driver); + if (!ret) + cppc_freq_invariance_init(); + + return ret; } static inline void free_cpu_data(void) @@ -538,6 +748,7 @@ static inline void free_cpu_data(void) static void __exit cppc_cpufreq_exit(void) { + cppc_freq_invariance_exit(); cpufreq_unregister_driver(&cppc_cpufreq_driver); free_cpu_data(); diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index b1e1bdc63b01..ece52863ba62 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -255,10 +255,15 @@ static int dt_cpufreq_early_init(struct device *dev, int cpu) * before updating priv->cpus. Otherwise, we will end up creating * duplicate OPPs for the CPUs. * - * OPPs might be populated at runtime, don't check for error here. + * OPPs might be populated at runtime, don't fail for error here unless + * it is -EPROBE_DEFER. */ - if (!dev_pm_opp_of_cpumask_add_table(priv->cpus)) + ret = dev_pm_opp_of_cpumask_add_table(priv->cpus); + if (!ret) { priv->have_static_opps = true; + } else if (ret == -EPROBE_DEFER) { + goto out; + } /* * The OPP table must be initialized, statically or dynamically, by this diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1d1b563cea4b..802abc925b2a 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -42,9 +42,6 @@ static LIST_HEAD(cpufreq_policy_list); #define for_each_inactive_policy(__policy) \ for_each_suitable_policy(__policy, false) -#define for_each_policy(__policy) \ - list_for_each_entry(__policy, &cpufreq_policy_list, policy_list) - /* Iterate over governors */ static LIST_HEAD(cpufreq_governor_list); #define for_each_governor(__governor) \ diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c index 2efe7189ccc4..c6bdc455517f 100644 --- a/drivers/cpufreq/ia64-acpi-cpufreq.c +++ b/drivers/cpufreq/ia64-acpi-cpufreq.c @@ -54,7 +54,7 @@ processor_set_pstate ( retval = ia64_pal_set_pstate((u64)value); if (retval) { - pr_debug("Failed to set freq to 0x%x, with error 0x%lx\n", + pr_debug("Failed to set freq to 0x%x, with error 0x%llx\n", value, retval); return -ENODEV; } @@ -77,7 +77,7 @@ processor_get_pstate ( if (retval) pr_debug("Failed to get current freq with " - "error 0x%lx, idx 0x%x\n", retval, *value); + "error 0x%llx, idx 0x%x\n", retval, *value); return (int)retval; } diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 5175ae3cac44..f0401064d7aa 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -819,19 +819,21 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; -static void intel_pstate_get_hwp_max(struct cpudata *cpu, int *phy_max, - int *current_max) +static void __intel_pstate_get_hwp_cap(struct cpudata *cpu) { u64 cap; rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap); WRITE_ONCE(cpu->hwp_cap_cached, cap); - if (global.no_turbo || global.turbo_disabled) - *current_max = HWP_GUARANTEED_PERF(cap); - else - *current_max = HWP_HIGHEST_PERF(cap); + cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(cap); + cpu->pstate.turbo_pstate = HWP_HIGHEST_PERF(cap); +} - *phy_max = HWP_HIGHEST_PERF(cap); +static void intel_pstate_get_hwp_cap(struct cpudata *cpu) +{ + __intel_pstate_get_hwp_cap(cpu); + cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; + cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; } static void intel_pstate_hwp_set(unsigned int cpu) @@ -1195,12 +1197,13 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b, static void update_qos_request(enum freq_qos_req_type type) { - int max_state, turbo_max, freq, i, perf_pct; struct freq_qos_request *req; struct cpufreq_policy *policy; + int i; for_each_possible_cpu(i) { struct cpudata *cpu = all_cpu_data[i]; + unsigned int freq, perf_pct; policy = cpufreq_cpu_get(i); if (!policy) @@ -1213,9 +1216,7 @@ static void update_qos_request(enum freq_qos_req_type type) continue; if (hwp_active) - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); - else - turbo_max = cpu->pstate.turbo_pstate; + intel_pstate_get_hwp_cap(cpu); if (type == FREQ_QOS_MIN) { perf_pct = global.min_perf_pct; @@ -1224,8 +1225,7 @@ static void update_qos_request(enum freq_qos_req_type type) perf_pct = global.max_perf_pct; } - freq = DIV_ROUND_UP(turbo_max * perf_pct, 100); - freq *= cpu->pstate.scaling; + freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * perf_pct, 100); if (freq_qos_update_request(req, freq) < 0) pr_warn("Failed to update freq constraint: CPU%d\n", i); @@ -1715,21 +1715,17 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); - cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); cpu->pstate.scaling = pstate_funcs.get_scaling(); if (hwp_active && !hwp_mode_bdw) { - unsigned int phy_max, current_max; - - intel_pstate_get_hwp_max(cpu, &phy_max, ¤t_max); - cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; - cpu->pstate.turbo_pstate = phy_max; - cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(READ_ONCE(cpu->hwp_cap_cached)); + __intel_pstate_get_hwp_cap(cpu); } else { - cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; cpu->pstate.max_pstate = pstate_funcs.get_max(); + cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); } + cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; + cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; if (pstate_funcs.get_aperf_mperf_shift) cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); @@ -2199,41 +2195,34 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, unsigned int policy_min, unsigned int policy_max) { + int scaling = cpu->pstate.scaling; int32_t max_policy_perf, min_policy_perf; - int max_state, turbo_max; - int max_freq; /* - * HWP needs some special consideration, because on BDX the - * HWP_REQUEST uses abstract value to represent performance - * rather than pure ratios. + * HWP needs some special consideration, because HWP_REQUEST uses + * abstract values to represent performance rather than pure ratios. */ - if (hwp_active) { - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); - } else { - max_state = global.no_turbo || global.turbo_disabled ? - cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; - turbo_max = cpu->pstate.turbo_pstate; - } - max_freq = max_state * cpu->pstate.scaling; + if (hwp_active) + intel_pstate_get_hwp_cap(cpu); - max_policy_perf = max_state * policy_max / max_freq; + max_policy_perf = policy_max / scaling; if (policy_max == policy_min) { min_policy_perf = max_policy_perf; } else { - min_policy_perf = max_state * policy_min / max_freq; + min_policy_perf = policy_min / scaling; min_policy_perf = clamp_t(int32_t, min_policy_perf, 0, max_policy_perf); } - pr_debug("cpu:%d max_state %d min_policy_perf:%d max_policy_perf:%d\n", - cpu->cpu, max_state, min_policy_perf, max_policy_perf); + pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n", + cpu->cpu, min_policy_perf, max_policy_perf); /* Normalize user input to [min_perf, max_perf] */ if (per_cpu_limits) { cpu->min_perf_ratio = min_policy_perf; cpu->max_perf_ratio = max_policy_perf; } else { + int turbo_max = cpu->pstate.turbo_pstate; int32_t global_min, global_max; /* Global limits are in percent of the maximum turbo P-state. */ @@ -2322,10 +2311,9 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, update_turbo_state(); if (hwp_active) { - int max_state, turbo_max; - - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); - max_freq = max_state * cpu->pstate.scaling; + intel_pstate_get_hwp_cap(cpu); + max_freq = global.no_turbo || global.turbo_disabled ? + cpu->pstate.max_freq : cpu->pstate.turbo_freq; } else { max_freq = intel_pstate_get_max_freq(cpu); } @@ -2416,25 +2404,15 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu->max_perf_ratio = 0xFF; cpu->min_perf_ratio = 0; - policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; - policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; - /* cpuinfo and default policy values */ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; update_turbo_state(); global.turbo_disabled_mf = global.turbo_disabled; policy->cpuinfo.max_freq = global.turbo_disabled ? - cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; - policy->cpuinfo.max_freq *= cpu->pstate.scaling; - - if (hwp_active) { - unsigned int max_freq; - - max_freq = global.turbo_disabled ? cpu->pstate.max_freq : cpu->pstate.turbo_freq; - if (max_freq < policy->cpuinfo.max_freq) - policy->cpuinfo.max_freq = max_freq; - } + + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; intel_pstate_init_acpi_perf_limits(policy); @@ -2683,10 +2661,10 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) { - int max_state, turbo_max, min_freq, max_freq, ret; struct freq_qos_request *req; struct cpudata *cpu; struct device *dev; + int ret, freq; dev = get_cpu_device(policy->cpu); if (!dev) @@ -2711,30 +2689,31 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) if (hwp_active) { u64 value; - intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP; + + intel_pstate_get_hwp_cap(cpu); + rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value); WRITE_ONCE(cpu->hwp_req_cached, value); + cpu->epp_cached = intel_pstate_get_epp(cpu, value); } else { - turbo_max = cpu->pstate.turbo_pstate; policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY; } - min_freq = DIV_ROUND_UP(turbo_max * global.min_perf_pct, 100); - min_freq *= cpu->pstate.scaling; - max_freq = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100); - max_freq *= cpu->pstate.scaling; + freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.min_perf_pct, 100); ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN, - min_freq); + freq); if (ret < 0) { dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret); goto free_req; } + freq = DIV_ROUND_UP(cpu->pstate.turbo_freq * global.max_perf_pct, 100); + ret = freq_qos_add_request(&policy->constraints, req + 1, FREQ_QOS_MAX, - max_freq); + freq); if (ret < 0) { dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret); goto remove_min_req; diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 69786e5bbf05..ad7d4f272ddc 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -91,7 +91,7 @@ static DEFINE_MUTEX(set_freq_lock); /* Use 800MHz when entering sleep mode */ #define SLEEP_FREQ (800 * 1000) -/* Tracks if cpu freqency can be updated anymore */ +/* Tracks if CPU frequency can be updated anymore */ static bool no_cpufreq_access; /* @@ -190,7 +190,7 @@ static u32 clkdiv_val[5][11] = { /* * This function set DRAM refresh counter - * accoriding to operating frequency of DRAM + * according to operating frequency of DRAM * ch: DMC port number 0 or 1 * freq: Operating frequency of DRAM(KHz) */ @@ -320,7 +320,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index) /* * 3. DMC1 refresh count for 133Mhz if (index == L4) is - * true refresh counter is already programed in upper + * true refresh counter is already programmed in upper * code. 0x287@83Mhz */ if (!bus_speed_changing) @@ -378,7 +378,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index) /* * 6. Turn on APLL * 6-1. Set PMS values - * 6-2. Wait untile the PLL is locked + * 6-2. Wait until the PLL is locked */ if (index == L0) writel_relaxed(APLL_VAL_1000, S5P_APLL_CON); @@ -390,7 +390,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index) } while (!(reg & (0x1 << 29))); /* - * 7. Change souce clock from SCLKMPLL(667Mhz) + * 7. Change source clock from SCLKMPLL(667Mhz) * to SCLKA2M(200Mhz) in MFC_MUX and G3D MUX * (667/4=166)->(200/4=50)Mhz */ @@ -439,8 +439,8 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index) } /* - * L4 level need to change memory bus speed, hence onedram clock divier - * and memory refresh parameter should be changed + * L4 level needs to change memory bus speed, hence ONEDRAM clock + * divider and memory refresh parameter should be changed */ if (bus_speed_changing) { reg = readl_relaxed(S5P_CLK_DIV6); diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index 0844fadc4be8..334f83e56120 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -107,7 +107,7 @@ config ARM_TEGRA_CPUIDLE config ARM_QCOM_SPM_CPUIDLE bool "CPU Idle Driver for Qualcomm Subsystem Power Manager (SPM)" - depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64 + depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64 && MMU select ARM_CPU_SUSPEND select CPU_IDLE_MULTIPLE_DRIVERS select DT_IDLE_STATES diff --git a/drivers/cpuidle/cpuidle-tegra.c b/drivers/cpuidle/cpuidle-tegra.c index 191966dc8d02..508bd9f23792 100644 --- a/drivers/cpuidle/cpuidle-tegra.c +++ b/drivers/cpuidle/cpuidle-tegra.c @@ -48,11 +48,6 @@ enum tegra_state { static atomic_t tegra_idle_barrier; static atomic_t tegra_abort_flag; -static inline bool tegra_cpuidle_using_firmware(void) -{ - return firmware_ops->prepare_idle && firmware_ops->do_idle; -} - static void tegra_cpuidle_report_cpus_state(void) { unsigned long cpu, lcpu, csr; @@ -135,13 +130,9 @@ static int tegra_cpuidle_c7_enter(void) { int err; - if (tegra_cpuidle_using_firmware()) { - err = call_firmware_op(prepare_idle, TF_PM_MODE_LP2_NOFLUSH_L2); - if (err) - return err; - - return call_firmware_op(do_idle, 0); - } + err = call_firmware_op(prepare_idle, TF_PM_MODE_LP2_NOFLUSH_L2); + if (err && err != -ENOSYS) + return err; return cpu_suspend(0, tegra30_pm_secondary_cpu_suspend); } @@ -356,9 +347,7 @@ static int tegra_cpuidle_probe(struct platform_device *pdev) * is disabled. */ if (!IS_ENABLED(CONFIG_PM_SLEEP)) { - if (!tegra_cpuidle_using_firmware()) - tegra_cpuidle_disable_state(TEGRA_C7); - + tegra_cpuidle_disable_state(TEGRA_C7); tegra_cpuidle_disable_state(TEGRA_CC6); } diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 4070e573bf43..f70aa17e2a8e 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -181,9 +181,13 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) */ if (s->target_residency > 0) s->target_residency_ns = s->target_residency * NSEC_PER_USEC; + else if (s->target_residency_ns < 0) + s->target_residency_ns = 0; if (s->exit_latency > 0) s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC; + else if (s->exit_latency_ns < 0) + s->exit_latency_ns = 0; } } diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index b0a7ad566081..c3aa8d6ccee3 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -271,7 +271,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, u64 predicted_ns; u64 interactivity_req; unsigned long nr_iowaiters; - ktime_t delta_next; + ktime_t delta, delta_tick; int i, idx; if (data->needs_update) { @@ -280,7 +280,12 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, } /* determine the expected residency time, round up */ - data->next_timer_ns = tick_nohz_get_sleep_length(&delta_next); + delta = tick_nohz_get_sleep_length(&delta_tick); + if (unlikely(delta < 0)) { + delta = 0; + delta_tick = 0; + } + data->next_timer_ns = delta; nr_iowaiters = nr_iowait_cpu(dev->cpu); data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters); @@ -318,7 +323,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * state selection. */ if (predicted_ns < TICK_NSEC) - predicted_ns = delta_next; + predicted_ns = data->next_timer_ns; } else { /* * Use the performance multiplier and the user-configurable @@ -377,7 +382,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * stuck in the shallow one for too long. */ if (drv->states[idx].target_residency_ns < TICK_NSEC && - s->target_residency_ns <= delta_next) + s->target_residency_ns <= delta_tick) idx = i; return idx; @@ -399,7 +404,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, predicted_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) { *stop_tick = false; - if (idx > 0 && drv->states[idx].target_residency_ns > delta_next) { + if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick) { /* * The tick is not going to be stopped and the target * residency of the state to be returned is not within @@ -411,7 +416,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, continue; idx = i; - if (drv->states[i].target_residency_ns <= delta_next) + if (drv->states[i].target_residency_ns <= delta_tick) break; } } diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c index 6deaaf5f05b5..ac4bb27d69b0 100644 --- a/drivers/cpuidle/governors/teo.c +++ b/drivers/cpuidle/governors/teo.c @@ -100,8 +100,8 @@ struct teo_idle_state { * @intervals: Saved idle duration values. */ struct teo_cpu { - u64 time_span_ns; - u64 sleep_length_ns; + s64 time_span_ns; + s64 sleep_length_ns; struct teo_idle_state states[CPUIDLE_STATE_MAX]; int interval_idx; u64 intervals[INTERVALS]; @@ -117,7 +117,8 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus); static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); - int i, idx_hit = -1, idx_timer = -1; + int i, idx_hit = 0, idx_timer = 0; + unsigned int hits, misses; u64 measured_ns; if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { @@ -174,25 +175,22 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) * also increase the "early hits" metric for the state that actually * matches the measured idle duration. */ - if (idx_timer >= 0) { - unsigned int hits = cpu_data->states[idx_timer].hits; - unsigned int misses = cpu_data->states[idx_timer].misses; - - hits -= hits >> DECAY_SHIFT; - misses -= misses >> DECAY_SHIFT; - - if (idx_timer > idx_hit) { - misses += PULSE; - if (idx_hit >= 0) - cpu_data->states[idx_hit].early_hits += PULSE; - } else { - hits += PULSE; - } + hits = cpu_data->states[idx_timer].hits; + hits -= hits >> DECAY_SHIFT; + + misses = cpu_data->states[idx_timer].misses; + misses -= misses >> DECAY_SHIFT; - cpu_data->states[idx_timer].misses = misses; - cpu_data->states[idx_timer].hits = hits; + if (idx_timer == idx_hit) { + hits += PULSE; + } else { + misses += PULSE; + cpu_data->states[idx_hit].early_hits += PULSE; } + cpu_data->states[idx_timer].misses = misses; + cpu_data->states[idx_timer].hits = hits; + /* * Save idle duration values corresponding to non-timer wakeups for * pattern detection. @@ -216,7 +214,7 @@ static bool teo_time_ok(u64 interval_ns) */ static int teo_find_shallower_state(struct cpuidle_driver *drv, struct cpuidle_device *dev, int state_idx, - u64 duration_ns) + s64 duration_ns) { int i; @@ -242,10 +240,10 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, { struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); s64 latency_req = cpuidle_governor_latency_req(dev->cpu); - u64 duration_ns; + int max_early_idx, prev_max_early_idx, constraint_idx, idx0, idx, i; unsigned int hits, misses, early_hits; - int max_early_idx, prev_max_early_idx, constraint_idx, idx, i; ktime_t delta_tick; + s64 duration_ns; if (dev->last_state_idx >= 0) { teo_update(drv, dev); @@ -264,6 +262,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, prev_max_early_idx = -1; constraint_idx = drv->state_count; idx = -1; + idx0 = idx; for (i = 0; i < drv->state_count; i++) { struct cpuidle_state *s = &drv->states[i]; @@ -324,6 +323,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, idx = i; /* first enabled state */ hits = cpu_data->states[i].hits; misses = cpu_data->states[i].misses; + idx0 = i; } if (s->target_residency_ns > duration_ns) @@ -376,11 +376,16 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, if (idx < 0) { idx = 0; /* No states enabled. Must use 0. */ - } else if (idx > 0) { + } else if (idx > idx0) { unsigned int count = 0; u64 sum = 0; /* + * The target residencies of at least two different enabled idle + * states are less than or equal to the current expected idle + * duration. Try to refine the selection using the most recent + * measured idle duration values. + * * Count and sum the most recent idle duration values less than * the current expected idle duration value. */ @@ -428,7 +433,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, * till the closest timer including the tick, try to correct * that. */ - if (idx > 0 && drv->states[idx].target_residency_ns > delta_tick) + if (idx > idx0 && + drv->states[idx].target_residency_ns > delta_tick) idx = teo_find_shallower_state(drv, dev, idx, delta_tick); } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 3273360f30f7..ec1b9d306ba6 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -744,8 +744,8 @@ static struct cpuidle_state icx_cstates[] __initdata = { .name = "C6", .desc = "MWAIT 0x20", .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, - .exit_latency = 128, - .target_residency = 384, + .exit_latency = 170, + .target_residency = 600, .enter = &intel_idle, .enter_s2idle = intel_idle_s2idle, }, { @@ -1156,6 +1156,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &idle_cpu_icx), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 16a17215f633..e4d4e399004b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1870,20 +1870,10 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) int err; int i, bars = 0; - /* - * Power state could be unknown at this point, either due to a fresh - * boot or a device removal call. So get the current power state - * so that things like MSI message writing will behave as expected - * (e.g. if the device really is in D0 at enable time). - */ - if (dev->pm_cap) { - u16 pmcsr; - pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); - dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); - } - - if (atomic_inc_return(&dev->enable_cnt) > 1) + if (atomic_inc_return(&dev->enable_cnt) > 1) { + pci_update_current_state(dev, dev->current_state); return 0; /* already enabled */ + } bridge = pci_upstream_bridge(dev); if (bridge) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index fdda2a737186..73cf68af9770 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -1069,6 +1069,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd), X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd), + X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd), {} }; MODULE_DEVICE_TABLE(x86cpu, rapl_ids); diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 78213d4b5b16..cc3b22881bfe 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -150,6 +150,7 @@ static int rapl_msr_probe(struct platform_device *pdev) case X86_VENDOR_INTEL: rapl_msr_priv = &rapl_msr_priv_intel; break; + case X86_VENDOR_HYGON: case X86_VENDOR_AMD: rapl_msr_priv = &rapl_msr_priv_amd; break; |