cpufreq: Add special-purpose fast-switching callback for drivers

First off, some cpufreq drivers (eg. intel_pstate) can pass hints beyond the current target frequency to the hardware and there are no provisions for doing that in the cpufreq framework. In particular, today the driver has to assume that it should not allow the frequency to fall below the one requested by the governor (or the required capacity may not be provided) which may not be the case and which may lead to excessive energy usage in some scenarios. Second, the hints passed by these drivers to the hardware need not be in terms of the frequency, so representing the utilization numbers coming from the scheduler as frequency before passing them to those drivers is not really useful. Address the two points above by adding a special-purpose replacement for the ->fast_switch callback, called ->adjust_perf, allowing the governor to pass abstract performance level (rather than frequency) values for the minimum (required) and target (desired) performance along with the CPU capacity to compare them to. Also update the schedutil governor to use the new callback instead of ->fast_switch if present and if the utilization mertics are frequency-invariant (that is requisite for the direct mapping between the utilization and the CPU performance levels to be a reasonable approximation). Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 2020-12-14 21:08:00 +0100
committer: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 2020-12-15 19:24:18 +0100
commit: ee2cc4276ba4909438f5894a218877660e1536d9 (patch)
tree: 691fe7a3348b9a8f343d50408cf2a39645abd943 /kernel/sched/cpufreq_schedutil.c
parent: ca6827de4b67367e73fdf43d2ea0a0064423edfb (diff)
1 files changed, 58 insertions, 10 deletions
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 319a270d13c1..803bcb30db27 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -432,13 +432,10 @@ static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_p
 		sg_policy->limits_changed = true;
 }
 
-static void sugov_update_single(struct update_util_data *hook, u64 time,
-				unsigned int flags)
+static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
+					      u64 time, unsigned int flags)
 {
-	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
-	unsigned int cached_freq = sg_policy->cached_raw_freq;
-	unsigned int next_f;
 
 	sugov_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
@@ -446,11 +443,25 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	ignore_dl_rate_limit(sg_cpu, sg_policy);
 
 	if (!sugov_should_update_freq(sg_policy, time))
-		return;
+		return false;
 
 	sugov_get_util(sg_cpu);
 	sugov_iowait_apply(sg_cpu, time);
 
+	return true;
+}
+
+static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
+				     unsigned int flags)
+{
+	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+	unsigned int cached_freq = sg_policy->cached_raw_freq;
+	unsigned int next_f;
+
+	if (!sugov_update_single_common(sg_cpu, time, flags))
+		return;
+
 	next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
 	/*
 	 * Do not reduce the frequency if the CPU has not been idle
@@ -477,6 +488,38 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	}
 }
 
+static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
+				     unsigned int flags)
+{
+	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+	unsigned long prev_util = sg_cpu->util;
+
+	/*
+	 * Fall back to the "frequency" path if frequency invariance is not
+	 * supported, because the direct mapping between the utilization and
+	 * the performance levels depends on the frequency invariance.
+	 */
+	if (!arch_scale_freq_invariant()) {
+		sugov_update_single_freq(hook, time, flags);
+		return;
+	}
+
+	if (!sugov_update_single_common(sg_cpu, time, flags))
+		return;
+
+	/*
+	 * Do not reduce the target performance level if the CPU has not been
+	 * idle recently, as the reduction is likely to be premature then.
+	 */
+	if (sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
+		sg_cpu->util = prev_util;
+
+	cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
+				   map_util_perf(sg_cpu->util), sg_cpu->max);
+
+	sg_cpu->sg_policy->last_freq_update_time = time;
+}
+
 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
@@ -815,6 +858,7 @@ static void sugov_exit(struct cpufreq_policy *policy)
 static int sugov_start(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy = policy->governor_data;
+	void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
 	unsigned int cpu;
 
 	sg_policy->freq_update_delay_ns	= sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
@@ -834,13 +878,17 @@ static int sugov_start(struct cpufreq_policy *policy)
 		sg_cpu->sg_policy		= sg_policy;
 	}
 
+	if (policy_is_shared(policy))
+		uu = sugov_update_shared;
+	else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf())
+		uu = sugov_update_single_perf;
+	else
+		uu = sugov_update_single_freq;
+
 	for_each_cpu(cpu, policy->cpus) {
 		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
 
-		cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
-					     policy_is_shared(policy) ?
-							sugov_update_shared :
-							sugov_update_single);
+		cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu);
 	}
 	return 0;
 }
author	Rafael J. Wysocki <rafael.j.wysocki@intel.com>	2020-12-14 21:08:00 +0100
committer	Rafael J. Wysocki <rafael.j.wysocki@intel.com>	2020-12-15 19:24:18 +0100
commit	ee2cc4276ba4909438f5894a218877660e1536d9 (patch)
tree	691fe7a3348b9a8f343d50408cf2a39645abd943 /kernel/sched/cpufreq_schedutil.c
parent	ca6827de4b67367e73fdf43d2ea0a0064423edfb (diff)