summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-09-04 13:27:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-09-04 13:27:24 -0700
commitf162626a038ec06da98ac38ce3d6bdbd715e9c5f (patch)
treecce5ef725db1920f734c3c04049427ed7eea3447
parentd824e0809ce3c9e935f3aa37381cda7fd4184f12 (diff)
parentf7ce2c3afc938b7d743ee8e5563560c04e17d7be (diff)
Merge tag 'pm-5.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management fixes from Rafael Wysocki: "These fix reference counting in the operating performance points (OPP) framework and address a few intel_pstate driver issues, mostly related to switching driver operation modes and similar with hardware-managed P-states (HWP) enabled. Specifics: - Fix reference counting of operating performance points (OPP) tables (Viresh Kumar). - Address intel_pstate driver interface issues, mostly related to switching operation modes and handling CPU offline and online and system-wide suspend/resume with hardware-managed P-states (HWP) enabled (Rafael Wysocki). - Fix the maximum frequency computation in the intel_pstate driver with turbo P-states disabled by the platform firmware and HWP enabled (Francisco Jerez)" * tag 'pm-5.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: cpufreq: intel_pstate: Fix intel_pstate_get_hwp_max() for turbo disabled cpufreq: intel_pstate: Free memory only when turning off cpufreq: intel_pstate: Add ->offline and ->online callbacks cpufreq: intel_pstate: Tweak the EPP sysfs interface cpufreq: intel_pstate: Update cached EPP in the active mode cpufreq: intel_pstate: Refuse to turn off with HWP enabled opp: Don't drop reference for an OPP table that was never parsed
-rw-r--r--Documentation/admin-guide/pm/intel_pstate.rst4
-rw-r--r--drivers/cpufreq/intel_pstate.c236
-rw-r--r--drivers/opp/core.c22
-rw-r--r--drivers/opp/opp.h2
4 files changed, 168 insertions, 96 deletions
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index cdd1a9a7f9a2..5072e7064d13 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -123,7 +123,9 @@ Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's
internal P-state selection logic is expected to focus entirely on performance.
This will override the EPP/EPB setting coming from the ``sysfs`` interface
-(see `Energy vs Performance Hints`_ below).
+(see `Energy vs Performance Hints`_ below). Moreover, any attempts to change
+the EPP/EPB to a value different from 0 ("performance") via ``sysfs`` in this
+configuration will be rejected.
Also, in this configuration the range of P-states available to the processor's
internal P-state selection logic is always restricted to the upper boundary
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index e0220a6fbc69..a827b000ef51 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -219,14 +219,13 @@ struct global_params {
* @epp_policy: Last saved policy used to set EPP/EPB
* @epp_default: Power on default HWP energy performance
* preference/bias
- * @epp_saved: Saved EPP/EPB during system suspend or CPU offline
- * operation
* @epp_cached Cached HWP energy-performance preference value
* @hwp_req_cached: Cached value of the last HWP Request MSR
* @hwp_cap_cached: Cached value of the last HWP Capabilities MSR
* @last_io_update: Last time when IO wake flag was set
* @sched_flags: Store scheduler flags for possible cross CPU update
* @hwp_boost_min: Last HWP boosted min performance
+ * @suspended: Whether or not the driver has been suspended.
*
* This structure stores per CPU instance data for all CPUs.
*/
@@ -258,13 +257,13 @@ struct cpudata {
s16 epp_powersave;
s16 epp_policy;
s16 epp_default;
- s16 epp_saved;
s16 epp_cached;
u64 hwp_req_cached;
u64 hwp_cap_cached;
u64 last_io_update;
unsigned int sched_flags;
u32 hwp_boost_min;
+ bool suspended;
};
static struct cpudata **all_cpu_data;
@@ -644,6 +643,8 @@ static int intel_pstate_get_energy_pref_index(struct cpudata *cpu_data, int *raw
static int intel_pstate_set_epp(struct cpudata *cpu, u32 epp)
{
+ int ret;
+
/*
* Use the cached HWP Request MSR value, because in the active mode the
* register itself may be updated by intel_pstate_hwp_boost_up() or
@@ -659,7 +660,11 @@ static int intel_pstate_set_epp(struct cpudata *cpu, u32 epp)
* function, so it cannot run in parallel with the update below.
*/
WRITE_ONCE(cpu->hwp_req_cached, value);
- return wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ ret = wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
+ if (!ret)
+ cpu->epp_cached = epp;
+
+ return ret;
}
static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
@@ -678,6 +683,14 @@ static int intel_pstate_set_energy_pref_index(struct cpudata *cpu_data,
else if (epp == -EINVAL)
epp = epp_values[pref_index - 1];
+ /*
+ * To avoid confusion, refuse to set EPP to any values different
+ * from 0 (performance) if the current policy is "performance",
+ * because those values would be overridden.
+ */
+ if (epp > 0 && cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE)
+ return -EBUSY;
+
ret = intel_pstate_set_epp(cpu_data, epp);
} else {
if (epp == -EINVAL)
@@ -762,10 +775,8 @@ static ssize_t store_energy_performance_preference(
cpufreq_stop_governor(policy);
ret = intel_pstate_set_epp(cpu, epp);
err = cpufreq_start_governor(policy);
- if (!ret) {
- cpu->epp_cached = epp;
+ if (!ret)
ret = err;
- }
}
}
@@ -825,7 +836,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
- if (global.no_turbo)
+ if (global.no_turbo || global.turbo_disabled)
*current_max = HWP_GUARANTEED_PERF(cap);
else
*current_max = HWP_HIGHEST_PERF(cap);
@@ -859,12 +870,6 @@ static void intel_pstate_hwp_set(unsigned int cpu)
cpu_data->epp_policy = cpu_data->policy;
- if (cpu_data->epp_saved >= 0) {
- epp = cpu_data->epp_saved;
- cpu_data->epp_saved = -EINVAL;
- goto update_epp;
- }
-
if (cpu_data->policy == CPUFREQ_POLICY_PERFORMANCE) {
epp = intel_pstate_get_epp(cpu_data, value);
cpu_data->epp_powersave = epp;
@@ -891,7 +896,6 @@ static void intel_pstate_hwp_set(unsigned int cpu)
epp = cpu_data->epp_powersave;
}
-update_epp:
if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
value &= ~GENMASK_ULL(31, 24);
value |= (u64)epp << 24;
@@ -903,14 +907,24 @@ skip_epp:
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
-static void intel_pstate_hwp_force_min_perf(int cpu)
+static void intel_pstate_hwp_offline(struct cpudata *cpu)
{
- u64 value;
+ u64 value = READ_ONCE(cpu->hwp_req_cached);
int min_perf;
- value = all_cpu_data[cpu]->hwp_req_cached;
+ if (boot_cpu_has(X86_FEATURE_HWP_EPP)) {
+ /*
+ * In case the EPP has been set to "performance" by the
+ * active mode "performance" scaling algorithm, replace that
+ * temporary value with the cached EPP one.
+ */
+ value &= ~GENMASK_ULL(31, 24);
+ value |= HWP_ENERGY_PERF_PREFERENCE(cpu->epp_cached);
+ WRITE_ONCE(cpu->hwp_req_cached, value);
+ }
+
value &= ~GENMASK_ULL(31, 0);
- min_perf = HWP_LOWEST_PERF(all_cpu_data[cpu]->hwp_cap_cached);
+ min_perf = HWP_LOWEST_PERF(cpu->hwp_cap_cached);
/* Set hwp_max = hwp_min */
value |= HWP_MAX_PERF(min_perf);
@@ -920,19 +934,7 @@ static void intel_pstate_hwp_force_min_perf(int cpu)
if (boot_cpu_has(X86_FEATURE_HWP_EPP))
value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
- wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
-}
-
-static int intel_pstate_hwp_save_state(struct cpufreq_policy *policy)
-{
- struct cpudata *cpu_data = all_cpu_data[policy->cpu];
-
- if (!hwp_active)
- return 0;
-
- cpu_data->epp_saved = intel_pstate_get_epp(cpu_data, 0);
-
- return 0;
+ wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value);
}
#define POWER_CTL_EE_ENABLE 1
@@ -959,8 +961,28 @@ static void set_power_ctl_ee_state(bool input)
static void intel_pstate_hwp_enable(struct cpudata *cpudata);
+static void intel_pstate_hwp_reenable(struct cpudata *cpu)
+{
+ intel_pstate_hwp_enable(cpu);
+ wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, READ_ONCE(cpu->hwp_req_cached));
+}
+
+static int intel_pstate_suspend(struct cpufreq_policy *policy)
+{
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+
+ pr_debug("CPU %d suspending\n", cpu->cpu);
+
+ cpu->suspended = true;
+
+ return 0;
+}
+
static int intel_pstate_resume(struct cpufreq_policy *policy)
{
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+
+ pr_debug("CPU %d resuming\n", cpu->cpu);
/* Only restore if the system default is changed */
if (power_ctl_ee_state == POWER_CTL_EE_ENABLE)
@@ -968,18 +990,16 @@ static int intel_pstate_resume(struct cpufreq_policy *policy)
else if (power_ctl_ee_state == POWER_CTL_EE_DISABLE)
set_power_ctl_ee_state(false);
- if (!hwp_active)
- return 0;
+ if (cpu->suspended && hwp_active) {
+ mutex_lock(&intel_pstate_limits_lock);
- mutex_lock(&intel_pstate_limits_lock);
+ /* Re-enable HWP, because "online" has not done that. */
+ intel_pstate_hwp_reenable(cpu);
- if (policy->cpu == 0)
- intel_pstate_hwp_enable(all_cpu_data[policy->cpu]);
+ mutex_unlock(&intel_pstate_limits_lock);
+ }
- all_cpu_data[policy->cpu]->epp_policy = 0;
- intel_pstate_hwp_set(policy->cpu);
-
- mutex_unlock(&intel_pstate_limits_lock);
+ cpu->suspended = false;
return 0;
}
@@ -1428,7 +1448,6 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00);
wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
- cpudata->epp_policy = 0;
if (cpudata->epp_default == -EINVAL)
cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
}
@@ -2097,25 +2116,31 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
all_cpu_data[cpunum] = cpu;
- cpu->epp_default = -EINVAL;
- cpu->epp_powersave = -EINVAL;
- cpu->epp_saved = -EINVAL;
- }
-
- cpu = all_cpu_data[cpunum];
+ cpu->cpu = cpunum;
- cpu->cpu = cpunum;
+ cpu->epp_default = -EINVAL;
- if (hwp_active) {
- const struct x86_cpu_id *id;
+ if (hwp_active) {
+ const struct x86_cpu_id *id;
- intel_pstate_hwp_enable(cpu);
+ intel_pstate_hwp_enable(cpu);
- id = x86_match_cpu(intel_pstate_hwp_boost_ids);
- if (id && intel_pstate_acpi_pm_profile_server())
- hwp_boost = true;
+ id = x86_match_cpu(intel_pstate_hwp_boost_ids);
+ if (id && intel_pstate_acpi_pm_profile_server())
+ hwp_boost = true;
+ }
+ } else if (hwp_active) {
+ /*
+ * Re-enable HWP in case this happens after a resume from ACPI
+ * S3 if the CPU was offline during the whole system/resume
+ * cycle.
+ */
+ intel_pstate_hwp_reenable(cpu);
}
+ cpu->epp_powersave = -EINVAL;
+ cpu->epp_policy = 0;
+
intel_pstate_get_cpu_pstates(cpu);
pr_debug("controlling: cpu %d\n", cpunum);
@@ -2296,28 +2321,61 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
return 0;
}
-static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
+static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
{
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+
+ pr_debug("CPU %d going offline\n", cpu->cpu);
+
+ if (cpu->suspended)
+ return 0;
+
+ /*
+ * If the CPU is an SMT thread and it goes offline with the performance
+ * settings different from the minimum, it will prevent its sibling
+ * from getting to lower performance levels, so force the minimum
+ * performance on CPU offline to prevent that from happening.
+ */
if (hwp_active)
- intel_pstate_hwp_force_min_perf(policy->cpu);
+ intel_pstate_hwp_offline(cpu);
else
- intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
+ intel_pstate_set_min_pstate(cpu);
+
+ intel_pstate_exit_perf_limits(policy);
+
+ return 0;
+}
+
+static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
+{
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+
+ pr_debug("CPU %d going online\n", cpu->cpu);
+
+ intel_pstate_init_acpi_perf_limits(policy);
+
+ if (hwp_active) {
+ /*
+ * Re-enable HWP and clear the "suspended" flag to let "resume"
+ * know that it need not do that.
+ */
+ intel_pstate_hwp_reenable(cpu);
+ cpu->suspended = false;
+ }
+
+ return 0;
}
static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
{
- pr_debug("CPU %d exiting\n", policy->cpu);
+ pr_debug("CPU %d stopping\n", policy->cpu);
intel_pstate_clear_update_util_hook(policy->cpu);
- if (hwp_active)
- intel_pstate_hwp_save_state(policy);
-
- intel_cpufreq_stop_cpu(policy);
}
static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
{
- intel_pstate_exit_perf_limits(policy);
+ pr_debug("CPU %d exiting\n", policy->cpu);
policy->fast_switch_possible = false;
@@ -2378,6 +2436,12 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
*/
policy->policy = CPUFREQ_POLICY_POWERSAVE;
+ if (hwp_active) {
+ struct cpudata *cpu = all_cpu_data[policy->cpu];
+
+ cpu->epp_cached = intel_pstate_get_epp(cpu, 0);
+ }
+
return 0;
}
@@ -2385,11 +2449,13 @@ static struct cpufreq_driver intel_pstate = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = intel_pstate_verify_policy,
.setpolicy = intel_pstate_set_policy,
- .suspend = intel_pstate_hwp_save_state,
+ .suspend = intel_pstate_suspend,
.resume = intel_pstate_resume,
.init = intel_pstate_cpu_init,
.exit = intel_pstate_cpu_exit,
.stop_cpu = intel_pstate_stop_cpu,
+ .offline = intel_pstate_cpu_offline,
+ .online = intel_pstate_cpu_online,
.update_limits = intel_pstate_update_limits,
.name = "intel_pstate",
};
@@ -2585,7 +2651,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP;
rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value);
WRITE_ONCE(cpu->hwp_req_cached, value);
- cpu->epp_cached = (value & GENMASK_ULL(31, 24)) >> 24;
+ cpu->epp_cached = intel_pstate_get_epp(cpu, value);
} else {
turbo_max = cpu->pstate.turbo_pstate;
policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY;
@@ -2644,7 +2710,10 @@ static struct cpufreq_driver intel_cpufreq = {
.fast_switch = intel_cpufreq_fast_switch,
.init = intel_cpufreq_cpu_init,
.exit = intel_cpufreq_cpu_exit,
- .stop_cpu = intel_cpufreq_stop_cpu,
+ .offline = intel_pstate_cpu_offline,
+ .online = intel_pstate_cpu_online,
+ .suspend = intel_pstate_suspend,
+ .resume = intel_pstate_resume,
.update_limits = intel_pstate_update_limits,
.name = "intel_cpufreq",
};
@@ -2667,9 +2736,6 @@ static void intel_pstate_driver_cleanup(void)
}
put_online_cpus();
- if (intel_pstate_driver == &intel_pstate)
- intel_pstate_sysfs_hide_hwp_dynamic_boost();
-
intel_pstate_driver = NULL;
}
@@ -2695,14 +2761,6 @@ static int intel_pstate_register_driver(struct cpufreq_driver *driver)
return 0;
}
-static int intel_pstate_unregister_driver(void)
-{
- cpufreq_unregister_driver(intel_pstate_driver);
- intel_pstate_driver_cleanup();
-
- return 0;
-}
-
static ssize_t intel_pstate_show_status(char *buf)
{
if (!intel_pstate_driver)
@@ -2714,20 +2772,23 @@ static ssize_t intel_pstate_show_status(char *buf)
static int intel_pstate_update_status(const char *buf, size_t size)
{
- int ret;
+ if (size == 3 && !strncmp(buf, "off", size)) {
+ if (!intel_pstate_driver)
+ return -EINVAL;
+
+ if (hwp_active)
+ return -EBUSY;
- if (size == 3 && !strncmp(buf, "off", size))
- return intel_pstate_driver ?
- intel_pstate_unregister_driver() : -EINVAL;
+ cpufreq_unregister_driver(intel_pstate_driver);
+ intel_pstate_driver_cleanup();
+ }
if (size == 6 && !strncmp(buf, "active", size)) {
if (intel_pstate_driver) {
if (intel_pstate_driver == &intel_pstate)
return 0;
- ret = intel_pstate_unregister_driver();
- if (ret)
- return ret;
+ cpufreq_unregister_driver(intel_pstate_driver);
}
return intel_pstate_register_driver(&intel_pstate);
@@ -2738,9 +2799,8 @@ static int intel_pstate_update_status(const char *buf, size_t size)
if (intel_pstate_driver == &intel_cpufreq)
return 0;
- ret = intel_pstate_unregister_driver();
- if (ret)
- return ret;
+ cpufreq_unregister_driver(intel_pstate_driver);
+ intel_pstate_sysfs_hide_hwp_dynamic_boost();
}
return intel_pstate_register_driver(&intel_cpufreq);
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 9668ea04cc80..3ca7543142bf 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1296,13 +1296,19 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq)
}
EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
-void _opp_remove_all_static(struct opp_table *opp_table)
+bool _opp_remove_all_static(struct opp_table *opp_table)
{
struct dev_pm_opp *opp, *tmp;
+ bool ret = true;
mutex_lock(&opp_table->lock);
- if (!opp_table->parsed_static_opps || --opp_table->parsed_static_opps)
+ if (!opp_table->parsed_static_opps) {
+ ret = false;
+ goto unlock;
+ }
+
+ if (--opp_table->parsed_static_opps)
goto unlock;
list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) {
@@ -1312,6 +1318,8 @@ void _opp_remove_all_static(struct opp_table *opp_table)
unlock:
mutex_unlock(&opp_table->lock);
+
+ return ret;
}
/**
@@ -2414,13 +2422,15 @@ void _dev_pm_opp_find_and_remove_table(struct device *dev)
return;
}
- _opp_remove_all_static(opp_table);
+ /*
+ * Drop the extra reference only if the OPP table was successfully added
+ * with dev_pm_opp_of_add_table() earlier.
+ **/
+ if (_opp_remove_all_static(opp_table))
+ dev_pm_opp_put_opp_table(opp_table);
/* Drop reference taken by _find_opp_table() */
dev_pm_opp_put_opp_table(opp_table);
-
- /* Drop reference taken while the OPP table was added */
- dev_pm_opp_put_opp_table(opp_table);
}
/**
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index e51646ff279e..c3fcd571e446 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -212,7 +212,7 @@ struct opp_table {
/* Routines internal to opp core */
void dev_pm_opp_get(struct dev_pm_opp *opp);
-void _opp_remove_all_static(struct opp_table *opp_table);
+bool _opp_remove_all_static(struct opp_table *opp_table);
void _get_opp_table_kref(struct opp_table *opp_table);
int _get_opp_count(struct opp_table *opp_table);
struct opp_table *_find_opp_table(struct device *dev);