summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 19:36:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 19:36:30 -0700
commit40e8e98f512fc76891ae2328a63e2e4ffdbe3010 (patch)
tree2dd0b2d0a18918518d4d1149cc0ded1e06e48fa1
parentbb6950556d4b1dd1226c1f09e84b53cb37e5340f (diff)
parentc89a27f4f8fbf4dcbaf1738b42b8c68e160d7cda (diff)
Merge tag 'pm-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "These add Intel TPMI (Topology Aware Register and PM Capsule Interface) support to the power capping subsystem, extend the intel_idle driver to work in VM guests where MWAIT is not available, extend the system-wide power management diagnostics, fix bugs and clean up code. Specifics: - Introduce power capping core support for Intel TPMI (Topology Aware Register and PM Capsule Interface) and a TPMI interface driver for Intel RAPL (Zhang Rui, Dan Carpenter) - Fix CONFIG_IOSF_MBI dependency in the Intel RAPL power capping driver (Zhang Rui) - Fix invalid initialization for pl4_supported field in the Intel RAPL power capping driver (Sumeet Pawnikar) - Clean up the intel_idle driver, make it work with VM guests that cannot use the MWAIT instruction and address the case in which the host may enter a deep idle state when the guest is idle (Arjan van de Ven) - Prevent cpufreq drivers that provide the ->adjust_perf() callback without a ->fast_switch() one which is used as a fallback from the former in some cases (Wyes Karny) - Fix some issues related to the AMD P-state cpufreq driver (Mario Limonciello, Wyes Karny) - Fix the energy_performance_preference attribute handling in the intel_pstate driver in passive mode (Tero Kristo) - Fix the handling of pm_suspend_target_state when CONFIG_PM is unset (Kai-Heng Feng) - Correct spelling mistake in a comment in the hibernation code (Wang Honghui) - Add arch_resume_nosmt() prototype to avoid a "missing prototypes" build warning (Arnd Bergmann) - Restrict pm_pr_dbg() to system-wide power transitions and use it in a few additional places (Mario Limonciello) - Drop verification of in-params from genpd_add_device() and ensure that all of its callers will do it (Ulf Hansson) - Prevent possible integer overflows from occurring in genpd_parse_state() (Nikita Zhandarovich) - Reorder fieldls in 'struct devfreq_dev_status' to reduce its size somewhat (Christophe JAILLET) - Ensure that the Exynos PPMU driver is already loaded before the Exynos Bus driver starts probing so as to avoid a possible freeze loading of the kernel modules (Marek Szyprowski) - Fix variable deferencing before NULL check in the mtk-cci devfreq driver (Sukrut Bellary)" * tag 'pm-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (42 commits) intel_idle: Add a "Long HLT" C1 state for the VM guest mode cpufreq: intel_pstate: Fix energy_performance_preference for passive cpufreq: amd-pstate: Add a kernel config option to set default mode cpufreq: amd-pstate: Set a fallback policy based on preferred_profile ACPI: CPPC: Add definition for undefined FADT preferred PM profile value cpufreq: amd-pstate: Set default governor to schedutil PM: domains: Move the verification of in-params from genpd_add_device() cpufreq: amd-pstate: Make amd-pstate EPP driver name hyphenated cpufreq: amd-pstate: Write CPPC enable bit per-socket intel_idle: Add support for using intel_idle in a VM guest using just hlt cpufreq: Fail driver register if it has adjust_perf without fast_switch intel_idle: clean up the (new) state_update_enter_method function intel_idle: refactor state->enter manipulation into its own function platform/x86/amd: pmc: Use pm_pr_dbg() for suspend related messages pinctrl: amd: Use pm_pr_dbg to show debugging messages ACPI: x86: Add pm_debug_messages for LPS0 _DSM state tracking include/linux/suspend.h: Only show pm_pr_dbg messages at suspend/resume powercap: RAPL: Fix a NULL vs IS_ERR() bug powercap: RAPL: Fix CONFIG_IOSF_MBI dependency powercap: RAPL: fix invalid initialization for pl4_supported field ...
-rw-r--r--drivers/acpi/x86/s2idle.c52
-rw-r--r--drivers/base/power/domain.c15
-rw-r--r--drivers/base/power/wakeup.c5
-rw-r--r--drivers/cpufreq/Kconfig2
-rw-r--r--drivers/cpufreq/Kconfig.x8617
-rw-r--r--drivers/cpufreq/amd-pstate.c131
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/cpufreq/intel_pstate.c2
-rw-r--r--drivers/devfreq/exynos-bus.c1
-rw-r--r--drivers/devfreq/mtk-cci-devfreq.c3
-rw-r--r--drivers/idle/intel_idle.c231
-rw-r--r--drivers/pinctrl/pinctrl-amd.c6
-rw-r--r--drivers/platform/x86/amd/pmc.c4
-rw-r--r--drivers/powercap/Kconfig18
-rw-r--r--drivers/powercap/Makefile1
-rw-r--r--drivers/powercap/intel_rapl_common.c883
-rw-r--r--drivers/powercap/intel_rapl_msr.c31
-rw-r--r--drivers/powercap/intel_rapl_tpmi.c325
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c11
-rw-r--r--include/acpi/actbl.h3
-rw-r--r--include/linux/amd-pstate.h4
-rw-r--r--include/linux/cpufreq.h5
-rw-r--r--include/linux/devfreq.h3
-rw-r--r--include/linux/intel_rapl.h40
-rw-r--r--include/linux/suspend.h14
-rw-r--r--kernel/power/main.c6
-rw-r--r--kernel/power/snapshot.c2
27 files changed, 1324 insertions, 494 deletions
diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c
index 7214197c15a0..ce62e61a9605 100644
--- a/drivers/acpi/x86/s2idle.c
+++ b/drivers/acpi/x86/s2idle.c
@@ -59,6 +59,7 @@ static int lps0_dsm_func_mask;
static guid_t lps0_dsm_guid_microsoft;
static int lps0_dsm_func_mask_microsoft;
+static int lps0_dsm_state;
/* Device constraint entry structure */
struct lpi_device_info {
@@ -320,6 +321,44 @@ static void lpi_check_constraints(void)
}
}
+static bool acpi_s2idle_vendor_amd(void)
+{
+ return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
+}
+
+static const char *acpi_sleep_dsm_state_to_str(unsigned int state)
+{
+ if (lps0_dsm_func_mask_microsoft || !acpi_s2idle_vendor_amd()) {
+ switch (state) {
+ case ACPI_LPS0_SCREEN_OFF:
+ return "screen off";
+ case ACPI_LPS0_SCREEN_ON:
+ return "screen on";
+ case ACPI_LPS0_ENTRY:
+ return "lps0 entry";
+ case ACPI_LPS0_EXIT:
+ return "lps0 exit";
+ case ACPI_LPS0_MS_ENTRY:
+ return "lps0 ms entry";
+ case ACPI_LPS0_MS_EXIT:
+ return "lps0 ms exit";
+ }
+ } else {
+ switch (state) {
+ case ACPI_LPS0_SCREEN_ON_AMD:
+ return "screen on";
+ case ACPI_LPS0_SCREEN_OFF_AMD:
+ return "screen off";
+ case ACPI_LPS0_ENTRY_AMD:
+ return "lps0 entry";
+ case ACPI_LPS0_EXIT_AMD:
+ return "lps0 exit";
+ }
+ }
+
+ return "unknown";
+}
+
static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, guid_t dsm_guid)
{
union acpi_object *out_obj;
@@ -331,14 +370,15 @@ static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, g
rev_id, func, NULL);
ACPI_FREE(out_obj);
- acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n",
- func, out_obj ? "successful" : "failed");
+ lps0_dsm_state = func;
+ if (pm_debug_messages_on) {
+ acpi_handle_info(lps0_device_handle,
+ "%s transitioned to state %s\n",
+ out_obj ? "Successfully" : "Failed to",
+ acpi_sleep_dsm_state_to_str(lps0_dsm_state));
+ }
}
-static bool acpi_s2idle_vendor_amd(void)
-{
- return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
-}
static int validate_dsm(acpi_handle handle, const char *uuid, int rev, guid_t *dsm_guid)
{
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 32084e38b73d..5cb2023581d4 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1632,9 +1632,6 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
dev_dbg(dev, "%s()\n", __func__);
- if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
- return -EINVAL;
-
gpd_data = genpd_alloc_dev_data(dev, gd);
if (IS_ERR(gpd_data))
return PTR_ERR(gpd_data);
@@ -1676,6 +1673,9 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
{
int ret;
+ if (!genpd || !dev)
+ return -EINVAL;
+
mutex_lock(&gpd_list_lock);
ret = genpd_add_device(genpd, dev, dev);
mutex_unlock(&gpd_list_lock);
@@ -2523,6 +2523,9 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev)
struct generic_pm_domain *genpd;
int ret;
+ if (!dev)
+ return -EINVAL;
+
mutex_lock(&gpd_list_lock);
genpd = genpd_get_from_provider(genpdspec);
@@ -2939,10 +2942,10 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state,
err = of_property_read_u32(state_node, "min-residency-us", &residency);
if (!err)
- genpd_state->residency_ns = 1000 * residency;
+ genpd_state->residency_ns = 1000LL * residency;
- genpd_state->power_on_latency_ns = 1000 * exit_latency;
- genpd_state->power_off_latency_ns = 1000 * entry_latency;
+ genpd_state->power_on_latency_ns = 1000LL * exit_latency;
+ genpd_state->power_off_latency_ns = 1000LL * entry_latency;
genpd_state->fwnode = &state_node->fwnode;
return 0;
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 7cc0c0cf8eaa..a917219feea6 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -19,11 +19,6 @@
#include "power.h"
-#ifndef CONFIG_SUSPEND
-suspend_state_t pm_suspend_target_state;
-#define pm_suspend_target_state (PM_SUSPEND_ON)
-#endif
-
#define list_for_each_entry_rcu_locked(pos, head, member) \
list_for_each_entry_rcu(pos, head, member, \
srcu_read_lock_held(&wakeup_srcu))
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 2c839bd2b051..a1c51abddbc5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -38,7 +38,7 @@ choice
prompt "Default CPUFreq governor"
default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1110_CPUFREQ
default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if ARM64 || ARM
- default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if X86_INTEL_PSTATE && SMP
+ default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if (X86_INTEL_PSTATE || X86_AMD_PSTATE) && SMP
default CPU_FREQ_DEFAULT_GOV_PERFORMANCE
help
This option sets which CPUFreq governor shall be loaded at
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 00476e94db90..438c9e75a04d 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -51,6 +51,23 @@ config X86_AMD_PSTATE
If in doubt, say N.
+config X86_AMD_PSTATE_DEFAULT_MODE
+ int "AMD Processor P-State default mode"
+ depends on X86_AMD_PSTATE
+ default 3 if X86_AMD_PSTATE
+ range 1 4
+ help
+ Select the default mode the amd-pstate driver will use on
+ supported hardware.
+ The value set has the following meanings:
+ 1 -> Disabled
+ 2 -> Passive
+ 3 -> Active (EPP)
+ 4 -> Guided
+
+ For details, take a look at:
+ <file:Documentation/admin-guide/pm/amd-pstate.rst>.
+
config X86_AMD_PSTATE_UT
tristate "selftest for AMD Processor P-State driver"
depends on X86 && ACPI_PROCESSOR
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index ddd346a239e0..81fba0dcbee9 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -62,7 +62,8 @@
static struct cpufreq_driver *current_pstate_driver;
static struct cpufreq_driver amd_pstate_driver;
static struct cpufreq_driver amd_pstate_epp_driver;
-static int cppc_state = AMD_PSTATE_DISABLE;
+static int cppc_state = AMD_PSTATE_UNDEFINED;
+static bool cppc_enabled;
/*
* AMD Energy Preference Performance (EPP)
@@ -228,7 +229,28 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
static inline int pstate_enable(bool enable)
{
- return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
+ int ret, cpu;
+ unsigned long logical_proc_id_mask = 0;
+
+ if (enable == cppc_enabled)
+ return 0;
+
+ for_each_present_cpu(cpu) {
+ unsigned long logical_id = topology_logical_die_id(cpu);
+
+ if (test_bit(logical_id, &logical_proc_id_mask))
+ continue;
+
+ set_bit(logical_id, &logical_proc_id_mask);
+
+ ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
+ enable);
+ if (ret)
+ return ret;
+ }
+
+ cppc_enabled = enable;
+ return 0;
}
static int cppc_enable(bool enable)
@@ -236,6 +258,9 @@ static int cppc_enable(bool enable)
int cpu, ret = 0;
struct cppc_perf_ctrls perf_ctrls;
+ if (enable == cppc_enabled)
+ return 0;
+
for_each_present_cpu(cpu) {
ret = cppc_set_enable(cpu, enable);
if (ret)
@@ -251,6 +276,7 @@ static int cppc_enable(bool enable)
}
}
+ cppc_enabled = enable;
return ret;
}
@@ -1045,6 +1071,26 @@ static const struct attribute_group amd_pstate_global_attr_group = {
.attrs = pstate_global_attributes,
};
+static bool amd_pstate_acpi_pm_profile_server(void)
+{
+ switch (acpi_gbl_FADT.preferred_profile) {
+ case PM_ENTERPRISE_SERVER:
+ case PM_SOHO_SERVER:
+ case PM_PERFORMANCE_SERVER:
+ return true;
+ }
+ return false;
+}
+
+static bool amd_pstate_acpi_pm_profile_undefined(void)
+{
+ if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED)
+ return true;
+ if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES)
+ return true;
+ return false;
+}
+
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
@@ -1102,10 +1148,14 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
policy->max = policy->cpuinfo.max_freq;
/*
- * Set the policy to powersave to provide a valid fallback value in case
+ * Set the policy to provide a valid fallback value in case
* the default cpufreq governor is neither powersave nor performance.
*/
- policy->policy = CPUFREQ_POLICY_POWERSAVE;
+ if (amd_pstate_acpi_pm_profile_server() ||
+ amd_pstate_acpi_pm_profile_undefined())
+ policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+ else
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
if (boot_cpu_has(X86_FEATURE_CPPC)) {
ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
@@ -1356,10 +1406,29 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
.online = amd_pstate_epp_cpu_online,
.suspend = amd_pstate_epp_suspend,
.resume = amd_pstate_epp_resume,
- .name = "amd_pstate_epp",
+ .name = "amd-pstate-epp",
.attr = amd_pstate_epp_attr,
};
+static int __init amd_pstate_set_driver(int mode_idx)
+{
+ if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
+ cppc_state = mode_idx;
+ if (cppc_state == AMD_PSTATE_DISABLE)
+ pr_info("driver is explicitly disabled\n");
+
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ current_pstate_driver = &amd_pstate_epp_driver;
+
+ if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
+ current_pstate_driver = &amd_pstate_driver;
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
static int __init amd_pstate_init(void)
{
struct device *dev_root;
@@ -1367,15 +1436,6 @@ static int __init amd_pstate_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return -ENODEV;
- /*
- * by default the pstate driver is disabled to load
- * enable the amd_pstate passive mode driver explicitly
- * with amd_pstate=passive or other modes in kernel command line
- */
- if (cppc_state == AMD_PSTATE_DISABLE) {
- pr_info("driver load is disabled, boot with specific mode to enable this\n");
- return -ENODEV;
- }
if (!acpi_cpc_valid()) {
pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
@@ -1386,6 +1446,33 @@ static int __init amd_pstate_init(void)
if (cpufreq_get_current_driver())
return -EEXIST;
+ switch (cppc_state) {
+ case AMD_PSTATE_UNDEFINED:
+ /* Disable on the following configs by default:
+ * 1. Undefined platforms
+ * 2. Server platforms
+ * 3. Shared memory designs
+ */
+ if (amd_pstate_acpi_pm_profile_undefined() ||
+ amd_pstate_acpi_pm_profile_server() ||
+ !boot_cpu_has(X86_FEATURE_CPPC)) {
+ pr_info("driver load is disabled, boot with specific mode to enable this\n");
+ return -ENODEV;
+ }
+ ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE);
+ if (ret)
+ return ret;
+ break;
+ case AMD_PSTATE_DISABLE:
+ return -ENODEV;
+ case AMD_PSTATE_PASSIVE:
+ case AMD_PSTATE_ACTIVE:
+ case AMD_PSTATE_GUIDED:
+ break;
+ default:
+ return -EINVAL;
+ }
+
/* capability check */
if (boot_cpu_has(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n");
@@ -1438,21 +1525,7 @@ static int __init amd_pstate_param(char *str)
size = strlen(str);
mode_idx = get_mode_idx_from_str(str, size);
- if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
- cppc_state = mode_idx;
- if (cppc_state == AMD_PSTATE_DISABLE)
- pr_info("driver is explicitly disabled\n");
-
- if (cppc_state == AMD_PSTATE_ACTIVE)
- current_pstate_driver = &amd_pstate_epp_driver;
-
- if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
- current_pstate_driver = &amd_pstate_driver;
-
- return 0;
- }
-
- return -EINVAL;
+ return amd_pstate_set_driver(mode_idx);
}
early_param("amd_pstate", amd_pstate_param);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 6b52ebe5a890..50bbc969ffe5 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2828,7 +2828,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
(driver_data->setpolicy && (driver_data->target_index ||
driver_data->target)) ||
(!driver_data->get_intermediate != !driver_data->target_intermediate) ||
- (!driver_data->online != !driver_data->offline))
+ (!driver_data->online != !driver_data->offline) ||
+ (driver_data->adjust_perf && !driver_data->fast_switch))
return -EINVAL;
pr_debug("trying to register driver %s\n", driver_data->name);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2548ec92faa2..f29182512b98 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -824,6 +824,8 @@ static ssize_t store_energy_performance_preference(
err = cpufreq_start_governor(policy);
if (!ret)
ret = err;
+ } else {
+ ret = 0;
}
}
diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index 88414445adf3..245898f1a88e 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -518,6 +518,7 @@ static struct platform_driver exynos_bus_platdrv = {
};
module_platform_driver(exynos_bus_platdrv);
+MODULE_SOFTDEP("pre: exynos_ppmu");
MODULE_DESCRIPTION("Generic Exynos Bus frequency driver");
MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/devfreq/mtk-cci-devfreq.c b/drivers/devfreq/mtk-cci-devfreq.c
index e5458ada5197..6354622eda65 100644
--- a/drivers/devfreq/mtk-cci-devfreq.c
+++ b/drivers/devfreq/mtk-cci-devfreq.c
@@ -127,7 +127,7 @@ static int mtk_ccifreq_target(struct device *dev, unsigned long *freq,
u32 flags)
{
struct mtk_ccifreq_drv *drv = dev_get_drvdata(dev);
- struct clk *cci_pll = clk_get_parent(drv->cci_clk);
+ struct clk *cci_pll;
struct dev_pm_opp *opp;
unsigned long opp_rate;
int voltage, pre_voltage, inter_voltage, target_voltage, ret;
@@ -139,6 +139,7 @@ static int mtk_ccifreq_target(struct device *dev, unsigned long *freq,
return 0;
inter_voltage = drv->inter_voltage;
+ cci_pll = clk_get_parent(drv->cci_clk);
opp_rate = *freq;
opp = devfreq_recommended_opp(dev, &opp_rate, 1);
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index aa2d19db2b1d..34201d7ef33e 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -199,6 +199,43 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
return __intel_idle(dev, drv, index);
}
+static __always_inline int __intel_idle_hlt(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ raw_safe_halt();
+ raw_local_irq_disable();
+ return index;
+}
+
+/**
+ * intel_idle_hlt - Ask the processor to enter the given idle state using hlt.
+ * @dev: cpuidle device of the target CPU.
+ * @drv: cpuidle driver (assumed to point to intel_idle_driver).
+ * @index: Target idle state index.
+ *
+ * Use the HLT instruction to notify the processor that the CPU represented by
+ * @dev is idle and it can try to enter the idle state corresponding to @index.
+ *
+ * Must be called under local_irq_disable().
+ */
+static __cpuidle int intel_idle_hlt(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ return __intel_idle_hlt(dev, drv, index);
+}
+
+static __cpuidle int intel_idle_hlt_irq_on(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ int ret;
+
+ raw_local_irq_enable();
+ ret = __intel_idle_hlt(dev, drv, index);
+ raw_local_irq_disable();
+
+ return ret;
+}
+
/**
* intel_idle_s2idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
@@ -1242,6 +1279,25 @@ static struct cpuidle_state snr_cstates[] __initdata = {
.enter = NULL }
};
+static struct cpuidle_state vmguest_cstates[] __initdata = {
+ {
+ .name = "C1",
+ .desc = "HLT",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
+ .exit_latency = 5,
+ .target_residency = 10,
+ .enter = &intel_idle_hlt, },
+ {
+ .name = "C1L",
+ .desc = "Long HLT",
+ .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .exit_latency = 5,
+ .target_residency = 200,
+ .enter = &intel_idle_hlt, },
+ {
+ .enter = NULL }
+};
+
static const struct idle_cpu idle_cpu_nehalem __initconst = {
.state_table = nehalem_cstates,
.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
@@ -1839,6 +1895,66 @@ static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
return true;
}
+static void state_update_enter_method(struct cpuidle_state *state, int cstate)
+{
+ if (state->enter == intel_idle_hlt) {
+ if (force_irq_on) {
+ pr_info("forced intel_idle_irq for state %d\n", cstate);
+ state->enter = intel_idle_hlt_irq_on;
+ }
+ return;
+ }
+ if (state->enter == intel_idle_hlt_irq_on)
+ return; /* no update scenarios */
+
+ if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
+ /*
+ * Combining with XSTATE with IBRS or IRQ_ENABLE flags
+ * is not currently supported but this driver.
+ */
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ state->enter = intel_idle_xstate;
+ return;
+ }
+
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+ state->flags & CPUIDLE_FLAG_IBRS) {
+ /*
+ * IBRS mitigation requires that C-states are entered
+ * with interrupts disabled.
+ */
+ WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ state->enter = intel_idle_ibrs;
+ return;
+ }
+
+ if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
+ state->enter = intel_idle_irq;
+ return;
+ }
+
+ if (force_irq_on) {
+ pr_info("forced intel_idle_irq for state %d\n", cstate);
+ state->enter = intel_idle_irq;
+ }
+}
+
+/*
+ * For mwait based states, we want to verify the cpuid data to see if the state
+ * is actually supported by this specific CPU.
+ * For non-mwait based states, this check should be skipped.
+ */
+static bool should_verify_mwait(struct cpuidle_state *state)
+{
+ if (state->enter == intel_idle_hlt)
+ return false;
+ if (state->enter == intel_idle_hlt_irq_on)
+ return false;
+
+ return true;
+}
+
static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
{
int cstate;
@@ -1887,35 +2003,15 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
}
mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
- if (!intel_idle_verify_cstate(mwait_hint))
+ if (should_verify_mwait(&cpuidle_state_table[cstate]) && !intel_idle_verify_cstate(mwait_hint))
continue;
/* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate];
state = &drv->states[drv->state_count];
- if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
- /*
- * Combining with XSTATE with IBRS or IRQ_ENABLE flags
- * is not currently supported but this driver.
- */
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
- state->enter = intel_idle_xstate;
- } else if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
- state->flags & CPUIDLE_FLAG_IBRS) {
- /*
- * IBRS mitigation requires that C-states are entered
- * with interrupts disabled.
- */
- WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
- state->enter = intel_idle_ibrs;
- } else if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
- state->enter = intel_idle_irq;
- } else if (force_irq_on) {
- pr_info("forced intel_idle_irq for state %d\n", cstate);
- state->enter = intel_idle_irq;
- }
+ state_update_enter_method(state, cstate);
+
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
@@ -2041,6 +2137,93 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
}
+/*
+ * Match up the latency and break even point of the bare metal (cpu based)
+ * states with the deepest VM available state.
+ *
+ * We only want to do this for the deepest state, the ones that has
+ * the TLB_FLUSHED flag set on the .
+ *
+ * All our short idle states are dominated by vmexit/vmenter latencies,
+ * not the underlying hardware latencies so we keep our values for these.
+ */
+static void matchup_vm_state_with_baremetal(void)
+{
+ int cstate;
+
+ for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
+ int matching_cstate;
+
+ if (intel_idle_max_cstate_reached(cstate))
+ break;
+
+ if (!cpuidle_state_table[cstate].enter)
+ break;
+
+ if (!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_TLB_FLUSHED))
+ continue;
+
+ for (matching_cstate = 0; matching_cstate < CPUIDLE_STATE_MAX; ++matching_cstate) {
+ if (!icpu->state_table[matching_cstate].enter)
+ break;
+ if (icpu->state_table[matching_cstate].exit_latency > cpuidle_state_table[cstate].exit_latency) {
+ cpuidle_state_table[cstate].exit_latency = icpu->state_table[matching_cstate].exit_latency;
+ cpuidle_state_table[cstate].target_residency = icpu->state_table[matching_cstate].target_residency;
+ }
+ }
+
+ }
+}
+
+
+static int __init intel_idle_vminit(const struct x86_cpu_id *id)
+{
+ int retval;
+
+ cpuidle_state_table = vmguest_cstates;
+
+ icpu = (const struct idle_cpu *)id->driver_data;
+
+ pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
+ boot_cpu_data.x86_model);
+
+ intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+ if (!intel_idle_cpuidle_devices)
+ return -ENOMEM;
+
+ /*
+ * We don't know exactly what the host will do when we go idle, but as a worst estimate
+ * we can assume that the exit latency of the deepest host state will be hit for our
+ * deep (long duration) guest idle state.
+ * The same logic applies to the break even point for the long duration guest idle state.
+ * So lets copy these two properties from the table we found for the host CPU type.
+ */
+ matchup_vm_state_with_baremetal();
+
+ intel_idle_cpuidle_driver_init(&intel_idle_driver);
+
+ retval = cpuidle_register_driver(&intel_idle_driver);
+ if (retval) {
+ struct cpuidle_driver *drv = cpuidle_get_driver();
+ printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
+ drv ? drv->name : "none");
+ goto init_driver_fail;
+ }
+
+ retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
+ intel_idle_cpu_online, NULL);
+ if (retval < 0)
+ goto hp_setup_fail;
+
+ return 0;
+hp_setup_fail:
+ intel_idle_cpuidle_devices_uninit();
+ cpuidle_unregister_driver(&intel_idle_driver);
+init_driver_fail:
+ free_percpu(intel_idle_cpuidle_devices);
+ return retval;
+}
+
static int __init intel_idle_init(void)
{
const struct x86_cpu_id *id;
@@ -2059,6 +2242,8 @@ static int __init intel_idle_init(void)
id = x86_match_cpu(intel_idle_ids);
if (id) {
if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+ return intel_idle_vminit(id);
pr_debug("Please enable MWAIT in BIOS SETUP\n");
return -ENODEV;
}
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index f279b360c20d..43d3530bab48 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -30,6 +30,7 @@
#include <linux/pinctrl/pinconf.h>
#include <linux/pinctrl/pinconf-generic.h>
#include <linux/pinctrl/pinmux.h>
+#include <linux/suspend.h>
#include "core.h"
#include "pinctrl-utils.h"
@@ -636,9 +637,8 @@ static bool do_amd_gpio_irq_handler(int irq, void *dev_id)
regval = readl(regs + i);
if (regval & PIN_IRQ_PENDING)
- dev_dbg(&gpio_dev->pdev->dev,
- "GPIO %d is active: 0x%x",
- irqnr + i, regval);
+ pm_pr_dbg("GPIO %d is active: 0x%x",
+ irqnr + i, regval);
/* caused wake on resume context for shared IRQ */
if (irq < 0 && (regval & BIT(WAKE_STS_OFF)))
diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c
index 427905714f79..1304cd6f13f6 100644
--- a/drivers/platform/x86/amd/pmc.c
+++ b/drivers/platform/x86/amd/pmc.c
@@ -543,7 +543,7 @@ static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev,
}
if (dev)
- dev_dbg(pdev->dev, "SMU idlemask s0i3: 0x%x\n", val);
+ pm_pr_dbg("SMU idlemask s0i3: 0x%x\n", val);
if (s)
seq_printf(s, "SMU idlemask : 0x%x\n", val);
@@ -769,7 +769,7 @@ static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
*arg |= (duration << 16);
rc = rtc_alarm_irq_enable(rtc_device, 0);
- dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration);
+ pm_pr_dbg("wakeup timer programmed for %lld seconds\n", duration);
return rc;
}
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 90d33cd1b670..69ef8d081c98 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -18,10 +18,12 @@ if POWERCAP
# Client driver configurations go here.
config INTEL_RAPL_CORE
tristate
+ depends on PCI
+ select IOSF_MBI
config INTEL_RAPL
tristate "Intel RAPL Support via MSR Interface"
- depends on X86 && IOSF_MBI
+ depends on X86 && PCI
select INTEL_RAPL_CORE
help
This enables support for the Intel Running Average Power Limit (RAPL)
@@ -33,6 +35,20 @@ config INTEL_RAPL
controller, CPU core (Power Plane 0), graphics uncore (Power Plane
1), etc.
+config INTEL_RAPL_TPMI
+ tristate "Intel RAPL Support via TPMI Interface"
+ depends on X86
+ depends on INTEL_TPMI
+ select INTEL_RAPL_CORE
+ help
+ This enables support for the Intel Running Average Power Limit (RAPL)
+ technology via TPMI interface, which allows power limits to be enforced
+ and monitored.
+
+ In RAPL, the platform level settings are divided into domains for
+ fine grained control. These domains include processor package, DRAM
+ controller, platform, etc.
+
config IDLE_INJECT
bool "Idle injection framework"
depends on CPU_IDLE
diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile
index 4474201b4aa7..5ab0dce565b9 100644
--- a/drivers/powercap/Makefile
+++ b/drivers/powercap/Makefile
@@ -5,5 +5,6 @@ obj-$(CONFIG_DTPM_DEVFREQ) += dtpm_devfreq.o
obj-$(CONFIG_POWERCAP) += powercap_sys.o
obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o
obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o
+obj-$(CONFIG_INTEL_RAPL_TPMI) += intel_rapl_tpmi.o
obj-$(CONFIG_IDLE_INJECT) += idle_inject.o
obj-$(CONFIG_ARM_SCMI_POWERCAP) += arm_scmi_powercap.o
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 8970c7b80884..4e646e5e48f6 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -75,6 +75,15 @@
#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
+/* bitmasks for RAPL TPMI, used by primitive access functions */
+#define TPMI_POWER_LIMIT_MASK 0x3FFFF
+#define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62)
+#define TPMI_TIME_WINDOW_MASK (0x7FULL<<18)
+#define TPMI_INFO_SPEC_MASK 0x3FFFF
+#define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18)
+#define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36)
+#define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54)
+
/* Non HW constants */
#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
#define RAPL_PRIMITIVE_DUMMY BIT(2)
@@ -94,26 +103,120 @@ enum unit_type {
#define DOMAIN_STATE_INACTIVE BIT(0)
#define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
-#define DOMAIN_STATE_BIOS_LOCKED BIT(2)
-static const char pl1_name[] = "long_term";
-static const char pl2_name[] = "short_term";
-static const char pl4_name[] = "peak_power";
+static const char *pl_names[NR_POWER_LIMITS] = {
+ [POWER_LIMIT1] = "long_term",
+ [POWER_LIMIT2] = "short_term",
+ [POWER_LIMIT4] = "peak_power",
+};
+
+enum pl_prims {
+ PL_ENABLE,
+ PL_CLAMP,
+ PL_LIMIT,
+ PL_TIME_WINDOW,
+ PL_MAX_POWER,
+ PL_LOCK,
+};
+
+static bool is_pl_valid(struct rapl_domain *rd, int pl)
+{
+ if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4)
+ return false;
+ return rd->rpl[pl].name ? true : false;
+}
+
+static int get_pl_lock_prim(struct rapl_domain *rd, int pl)
+{
+ if (rd->rp->priv->type == RAPL_IF_TPMI) {
+ if (pl == POWER_LIMIT1)
+ return PL1_LOCK;
+ if (pl == POWER_LIMIT2)
+ return PL2_LOCK;
+ if (pl == POWER_LIMIT4)
+ return PL4_LOCK;
+ }
+
+ /* MSR/MMIO Interface doesn't have Lock bit for PL4 */
+ if (pl == POWER_LIMIT4)
+ return -EINVAL;
+
+ /*
+ * Power Limit register that supports two power limits has a different
+ * bit position for the Lock bit.
+ */
+ if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2))
+ return FW_HIGH_LOCK;
+ return FW_LOCK;
+}
+
+static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
+{
+ switch (pl) {
+ case POWER_LIMIT1:
+ if (prim == PL_ENABLE)
+ return PL1_ENABLE;
+ if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
+ return PL1_CLAMP;
+ if (prim == PL_LIMIT)
+ return POWER_LIMIT1;
+ if (prim == PL_TIME_WINDOW)
+ return TIME_WINDOW1;
+ if (prim == PL_MAX_POWER)
+ return THERMAL_SPEC_POWER;
+ if (prim == PL_LOCK)
+ return get_pl_lock_prim(rd, pl);
+ return -EINVAL;
+ case POWER_LIMIT2:
+ if (prim == PL_ENABLE)
+ return PL2_ENABLE;
+ if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
+ return PL2_CLAMP;
+ if (prim == PL_LIMIT)
+ return POWER_LIMIT2;
+ if (prim == PL_TIME_WINDOW)
+ return TIME_WINDOW2;
+ if (prim == PL_MAX_POWER)
+ return MAX_POWER;
+ if (prim == PL_LOCK)
+ return get_pl_lock_prim(rd, pl);
+ return -EINVAL;
+ case POWER_LIMIT4:
+ if (prim == PL_LIMIT)
+ return POWER_LIMIT4;
+ if (prim == PL_ENABLE)
+ return PL4_ENABLE;
+ /* PL4 would be around two times PL2, use same prim as PL2. */
+ if (prim == PL_MAX_POWER)
+ return MAX_POWER;
+ if (prim == PL_LOCK)
+ return get_pl_lock_prim(rd, pl);
+ return -EINVAL;
+ default:
+ return -EINVAL;
+ }
+}
#define power_zone_to_rapl_domain(_zone) \
container_of(_zone, struct rapl_domain, power_zone)
struct rapl_defaults {
u8 floor_freq_reg_addr;
- int (*check_unit)(struct rapl_package *rp, int cpu);
+ int (*check_unit)(struct rapl_domain *rd);
void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
- u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
+ u64 (*compute_time_window)(struct rapl_domain *rd, u64 val,
bool to_raw);
unsigned int dram_domain_energy_unit;
unsigned int psys_domain_energy_unit;
bool spr_psys_bits;
};
-static struct rapl_defaults *rapl_defaults;
+static struct rapl_defaults *defaults_msr;
+static const struct rapl_defaults defaults_tpmi;
+
+static struct rapl_defaults *get_defaults(struct rapl_package *rp)
+{
+ return rp->priv->defaults;
+}
/* Sideband MBI registers */
#define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
@@ -150,6 +253,12 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
static int rapl_write_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
unsigned long long value);
+static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
+ enum pl_prims pl_prim,
+ bool xlate, u64 *data);
+static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
+ enum pl_prims pl_prim,
+ unsigned long long value);
static u64 rapl_unit_xlate(struct rapl_domain *rd,
enum unit_type type, u64 value, int to_raw);
static void package_power_limit_irq_save(struct rapl_package *rp);
@@ -217,7 +326,7 @@ static int find_nr_power_limit(struct rapl_domain *rd)
int i, nr_pl = 0;
for (i = 0; i < NR_POWER_LIMITS; i++) {
- if (rd->rpl[i].name)
+ if (is_pl_valid(rd, i))
nr_pl++;
}
@@ -227,37 +336,35 @@ static int find_nr_power_limit(struct rapl_domain *rd)
static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
-
- if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
- return -EACCES;
+ struct rapl_defaults *defaults = get_defaults(rd->rp);
+ int ret;
cpus_read_lock();
- rapl_write_data_raw(rd, PL1_ENABLE, mode);
- if (rapl_defaults->set_floor_freq)
- rapl_defaults->set_floor_freq(rd, mode);
+ ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
+ if (!ret && defaults->set_floor_freq)
+ defaults->set_floor_freq(rd, mode);
cpus_read_unlock();
- return 0;
+ return ret;
}
static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
u64 val;
+ int ret;
- if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
+ if (rd->rpl[POWER_LIMIT1].locked) {
*mode = false;
return 0;
}
cpus_read_lock();
- if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) {
- cpus_read_unlock();
- return -EIO;
- }
- *mode = val;
+ ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val);
+ if (!ret)
+ *mode = val;
cpus_read_unlock();
- return 0;
+ return ret;
}
/* per RAPL domain ops, in the order of rapl_domain_type */
@@ -313,8 +420,8 @@ static int contraint_to_pl(struct rapl_domain *rd, int cid)
{
int i, j;
- for (i = 0, j = 0; i < NR_POWER_LIMITS; i++) {
- if ((rd->rpl[i].name) && j++ == cid) {
+ for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) {
+ if (is_pl_valid(rd, i) && j++ == cid) {
pr_debug("%s: index %d\n", __func__, i);
return i;
}
@@ -335,36 +442,11 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
- if (id < 0) {
- ret = id;
- goto set_exit;
- }
-
rp = rd->rp;
- if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
- dev_warn(&power_zone->dev,
- "%s locked by BIOS, monitoring only\n", rd->name);
- ret = -EACCES;
- goto set_exit;
- }
-
- switch (rd->rpl[id].prim_id) {
- case PL1_ENABLE:
- rapl_write_data_raw(rd, POWER_LIMIT1, power_limit);
- break;
- case PL2_ENABLE:
- rapl_write_data_raw(rd, POWER_LIMIT2, power_limit);
- break;
- case PL4_ENABLE:
- rapl_write_data_raw(rd, POWER_LIMIT4, power_limit);
- break;
- default:
- ret = -EINVAL;
- }
+ ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit);
if (!ret)
package_power_limit_irq_save(rp);
-set_exit:
cpus_read_unlock();
return ret;
}
@@ -374,38 +456,17 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
{
struct rapl_domain *rd;
u64 val;
- int prim;
int ret = 0;
int id;
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
- if (id < 0) {
- ret = id;
- goto get_exit;
- }
- switch (rd->rpl[id].prim_id) {
- case PL1_ENABLE:
- prim = POWER_LIMIT1;
- break;
- case PL2_ENABLE:
- prim = POWER_LIMIT2;
- break;
- case PL4_ENABLE:
- prim = POWER_LIMIT4;
- break;
- default:
- cpus_read_unlock();
- return -EINVAL;
- }
- if (rapl_read_data_raw(rd, prim, true, &val))
- ret = -EIO;
- else
+ ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val);
+ if (!ret)
*data = val;
-get_exit:
cpus_read_unlock();
return ret;
@@ -421,23 +482,9 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
- if (id < 0) {
- ret = id;
- goto set_time_exit;
- }
- switch (rd->rpl[id].prim_id) {
- case PL1_ENABLE:
- rapl_write_data_raw(rd, TIME_WINDOW1, window);
- break;
- case PL2_ENABLE:
- rapl_write_data_raw(rd, TIME_WINDOW2, window);
- break;
- default:
- ret = -EINVAL;
- }
+ ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window);
-set_time_exit:
cpus_read_unlock();
return ret;
}
@@ -453,33 +500,11 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
- if (id < 0) {
- ret = id;
- goto get_time_exit;
- }
- switch (rd->rpl[id].prim_id) {
- case PL1_ENABLE:
- ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val);
- break;
- case PL2_ENABLE:
- ret = rapl_read_data_raw(rd, TIME_WINDOW2, true, &val);
- break;
- case PL4_ENABLE:
- /*
- * Time window parameter is not applicable for PL4 entry
- * so assigining '0' as default value.
- */
- val = 0;
- break;
- default:
- cpus_read_unlock();
- return -EINVAL;
- }
+ ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val);
if (!ret)
*data = val;
-get_time_exit:
cpus_read_unlock();
return ret;
@@ -499,36 +524,23 @@ static const char *get_constraint_name(struct powercap_zone *power_zone,
return NULL;
}
-static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
+static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data)
{
struct rapl_domain *rd;
u64 val;
- int prim;
int ret = 0;
+ int id;
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
- switch (rd->rpl[id].prim_id) {
- case PL1_ENABLE:
- prim = THERMAL_SPEC_POWER;
- break;
- case PL2_ENABLE:
- prim = MAX_POWER;
- break;
- case PL4_ENABLE:
- prim = MAX_POWER;
- break;
- default:
- cpus_read_unlock();
- return -EINVAL;
- }
- if (rapl_read_data_raw(rd, prim, true, &val))
- ret = -EIO;
- else
+ id = contraint_to_pl(rd, cid);
+
+ ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val);
+ if (!ret)
*data = val;
/* As a generalization rule, PL4 would be around two times PL2. */
- if (rd->rpl[id].prim_id == PL4_ENABLE)
+ if (id == POWER_LIMIT4)
*data = *data * 2;
cpus_read_unlock();
@@ -545,6 +557,12 @@ static const struct powercap_zone_constraint_ops constraint_ops = {
.get_name = get_constraint_name,
};
+/* Return the id used for read_raw/write_raw callback */
+static int get_rid(struct rapl_package *rp)
+{
+ return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id;
+}
+
/* called after domain detection and package level data are set */
static void rapl_init_domains(struct rapl_package *rp)
{
@@ -554,6 +572,7 @@ static void rapl_init_domains(struct rapl_package *rp)
for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
unsigned int mask = rp->domain_map & (1 << i);
+ int t;
if (!mask)
continue;
@@ -562,51 +581,26 @@ static void rapl_init_domains(struct rapl_package *rp)
if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) {
snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d",
- topology_physical_package_id(rp->lead_cpu));
- } else
+ rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) :
+ rp->id);
+ } else {
snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s",
rapl_domain_names[i]);
+ }
rd->id = i;
- rd->rpl[0].prim_id = PL1_ENABLE;
- rd->rpl[0].name = pl1_name;
- /*
- * The PL2 power domain is applicable for limits two
- * and limits three
- */
- if (rp->priv->limits[i] >= 2) {
- rd->rpl[1].prim_id = PL2_ENABLE;
- rd->rpl[1].name = pl2_name;
- }
+ /* PL1 is supported by default */
+ rp->priv->limits[i] |= BIT(POWER_LIMIT1);
- /* Enable PL4 domain if the total power limits are three */
- if (rp->priv->limits[i] == 3) {
- rd->rpl[2].prim_id = PL4_ENABLE;
- rd->rpl[2].name = pl4_name;
+ for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) {
+ if (rp->priv->limits[i] & BIT(t))
+ rd->rpl[t].name = pl_names[t];
}
for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
rd->regs[j] = rp->priv->regs[i][j];
- switch (i) {
- case RAPL_DOMAIN_DRAM:
- rd->domain_energy_unit =
- rapl_defaults->dram_domain_energy_unit;
- if (rd->domain_energy_unit)
- pr_info("DRAM domain energy unit %dpj\n",
- rd->domain_energy_unit);
- break;
- case RAPL_DOMAIN_PLATFORM:
- rd->domain_energy_unit =
- rapl_defaults->psys_domain_energy_unit;
- if (rd->domain_energy_unit)
- pr_info("Platform domain energy unit %dpj\n",
- rd->domain_energy_unit);
- break;
- default:
- break;
- }
rd++;
}
}
@@ -615,23 +609,19 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
u64 value, int to_raw)
{
u64 units = 1;
- struct rapl_package *rp = rd->rp;
+ struct rapl_defaults *defaults = get_defaults(rd->rp);
u64 scale = 1;
switch (type) {
case POWER_UNIT:
- units = rp->power_unit;
+ units = rd->power_unit;
break;
case ENERGY_UNIT:
scale = ENERGY_UNIT_SCALE;
- /* per domain unit takes precedence */
- if (rd->domain_energy_unit)
- units = rd->domain_energy_unit;
- else
- units = rp->energy_unit;
+ units = rd->energy_unit;
break;
case TIME_UNIT:
- return rapl_defaults->compute_time_window(rp, value, to_raw);
+ return defaults->compute_time_window(rd, value, to_raw);
case ARBITRARY_UNIT:
default:
return value;
@@ -645,67 +635,141 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
return div64_u64(value, scale);
}
-/* in the order of enum rapl_primitives */
-static struct rapl_primitive_info rpi[] = {
+/* RAPL primitives for MSR and MMIO I/F */
+static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
/* name, mask, shift, msr index, unit divisor */
- PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
- RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
- PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
+ [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
- PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
+ [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
- PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
+ [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
- PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
+ [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
+ RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
+ [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
+ [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
+ [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
+ [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
+ [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0,
+ [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0,
RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
- PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
+ [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
- PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
+ [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
- PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK,
+ [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK,
0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
- PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
+ [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
- PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
+ [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
- PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48,
+ [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48,
RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
- PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
+ [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
- PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
+ [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
- PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
+ [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
- PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
+ [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
- PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
+ [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
+ [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
- PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
+ [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
- PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
+ [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
/* non-hardware */
- PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
+ [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
RAPL_PRIMITIVE_DERIVED),
- {NULL, 0, 0, 0},
};
+/* RAPL primitives for TPMI I/F */
+static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = {
+ /* name, mask, shift, msr index, unit divisor */
+ [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0,
+ RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
+ [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0,
+ RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0),
+ [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0,
+ RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
+ [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
+ RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
+ [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63,
+ RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
+ [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63,
+ RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
+ [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
+ RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
+ [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
+ RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
+ [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
+ RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
+ [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18,
+ RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
+ [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18,
+ RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0),
+ [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18,
+ RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
+ [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54,
+ RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
+ [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
+ RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
+ /* non-hardware */
+ [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0,
+ POWER_UNIT, RAPL_PRIMITIVE_DERIVED),
+};
+
+static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
+{
+ struct rapl_primitive_info *rpi = rp->priv->rpi;
+
+ if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi)
+ return NULL;
+
+ return &rpi[prim];
+}
+
+static int rapl_config(struct rapl_package *rp)
+{
+ switch (rp->priv->type) {
+ /* MMIO I/F shares the same register layout as MSR registers */
+ case RAPL_IF_MMIO:
+ case RAPL_IF_MSR:
+ rp->priv->defaults = (void *)defaults_msr;
+ rp->priv->rpi = (void *)rpi_msr;
+ break;
+ case RAPL_IF_TPMI:
+ rp->priv->defaults = (void *)&defaults_tpmi;
+ rp->priv->rpi = (void *)rpi_tpmi;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static enum rapl_primitives
prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
{
- if (!rapl_defaults->spr_psys_bits)
+ struct rapl_defaults *defaults = get_defaults(rd->rp);
+
+ if (!defaults->spr_psys_bits)
return prim;
if (rd->id != RAPL_DOMAIN_PLATFORM)
@@ -747,41 +811,33 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
{
u64 value;
enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
- struct rapl_primitive_info *rp = &rpi[prim_fixed];
+ struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
struct reg_action ra;
- int cpu;
- if (!rp->name || rp->flag & RAPL_PRIMITIVE_DUMMY)
+ if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
return -EINVAL;
- ra.reg = rd->regs[rp->id];
+ ra.reg = rd->regs[rpi->id];
if (!ra.reg)
return -EINVAL;
- cpu = rd->rp->lead_cpu;
-
- /* domain with 2 limits has different bit */
- if (prim == FW_LOCK && rd->rp->priv->limits[rd->id] == 2) {
- rp->mask = POWER_HIGH_LOCK;
- rp->shift = 63;
- }
/* non-hardware data are collected by the polling thread */
- if (rp->flag & RAPL_PRIMITIVE_DERIVED) {
+ if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
*data = rd->rdd.primitives[prim];
return 0;
}
- ra.mask = rp->mask;
+ ra.mask = rpi->mask;
- if (rd->rp->priv->read_raw(cpu, &ra)) {
- pr_debug("failed to read reg 0x%llx on cpu %d\n", ra.reg, cpu);
+ if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+ pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg, rd->rp->name, rd->name);
return -EIO;
}
- value = ra.value >> rp->shift;
+ value = ra.value >> rpi->shift;
if (xlate)
- *data = rapl_unit_xlate(rd, rp->unit, value, 0);
+ *data = rapl_unit_xlate(rd, rpi->unit, value, 0);
else
*data = value;
@@ -794,28 +850,56 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
unsigned long long value)
{
enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
- struct rapl_primitive_info *rp = &rpi[prim_fixed];
- int cpu;
+ struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
u64 bits;
struct reg_action ra;
int ret;
- cpu = rd->rp->lead_cpu;
- bits = rapl_unit_xlate(rd, rp->unit, value, 1);
- bits <<= rp->shift;
- bits &= rp->mask;
+ if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
+ return -EINVAL;
+
+ bits = rapl_unit_xlate(rd, rpi->unit, value, 1);
+ bits <<= rpi->shift;
+ bits &= rpi->mask;
memset(&ra, 0, sizeof(ra));
- ra.reg = rd->regs[rp->id];
- ra.mask = rp->mask;
+ ra.reg = rd->regs[rpi->id];
+ ra.mask = rpi->mask;
ra.value = bits;
- ret = rd->rp->priv->write_raw(cpu, &ra);
+ ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra);
return ret;
}
+static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
+ enum pl_prims pl_prim, bool xlate, u64 *data)
+{
+ enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
+
+ if (!is_pl_valid(rd, pl))
+ return -EINVAL;
+
+ return rapl_read_data_raw(rd, prim, xlate, data);
+}
+
+static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
+ enum pl_prims pl_prim,
+ unsigned long long value)
+{
+ enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
+
+ if (!is_pl_valid(rd, pl))
+ return -EINVAL;
+
+ if (rd->rpl[pl].locked) {
+ pr_warn("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]);
+ return -EACCES;
+ }
+
+ return rapl_write_data_raw(rd, prim, value);
+}
/*
* Raw RAPL data stored in MSRs are in certain scales. We need to
* convert them into standard units based on the units reported in
@@ -827,58 +911,58 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
* power unit : microWatts : Represented in milliWatts by default
* time unit : microseconds: Represented in seconds by default
*/
-static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
+static int rapl_check_unit_core(struct rapl_domain *rd)
{
struct reg_action ra;
u32 value;
- ra.reg = rp->priv->reg_unit;
+ ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
ra.mask = ~0;
- if (rp->priv->read_raw(cpu, &ra)) {
- pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
- rp->priv->reg_unit, cpu);
+ if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+ pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
+ ra.reg, rd->rp->name, rd->name);
return -ENODEV;
}
value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
- rp->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
+ rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
- rp->power_unit = 1000000 / (1 << value);
+ rd->power_unit = 1000000 / (1 << value);
value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
- rp->time_unit = 1000000 / (1 << value);
+ rd->time_unit = 1000000 / (1 << value);
- pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n",
- rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
+ pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
+ rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
return 0;
}
-static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
+static int rapl_check_unit_atom(struct rapl_domain *rd)
{
struct reg_action ra;
u32 value;
- ra.reg = rp->priv->reg_unit;
+ ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
ra.mask = ~0;
- if (rp->priv->read_raw(cpu, &ra)) {
- pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
- rp->priv->reg_unit, cpu);
+ if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+ pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
+ ra.reg, rd->rp->name, rd->name);
return -ENODEV;
}
value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
- rp->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
+ rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
- rp->power_unit = (1 << value) * 1000;
+ rd->power_unit = (1 << value) * 1000;
value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
- rp->time_unit = 1000000 / (1 << value);
+ rd->time_unit = 1000000 / (1 << value);
- pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n",
- rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
+ pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
+ rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
return 0;
}
@@ -910,6 +994,9 @@ static void power_limit_irq_save_cpu(void *info)
static void package_power_limit_irq_save(struct rapl_package *rp)
{
+ if (rp->lead_cpu < 0)
+ return;
+
if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
return;
@@ -924,6 +1011,9 @@ static void package_power_limit_irq_restore(struct rapl_package *rp)
{
u32 l, h;
+ if (rp->lead_cpu < 0)
+ return;
+
if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
return;
@@ -943,33 +1033,33 @@ static void package_power_limit_irq_restore(struct rapl_package *rp)
static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
{
- int nr_powerlimit = find_nr_power_limit(rd);
+ int i;
/* always enable clamp such that p-state can go below OS requested
* range. power capping priority over guranteed frequency.
*/
- rapl_write_data_raw(rd, PL1_CLAMP, mode);
+ rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode);
- /* some domains have pl2 */
- if (nr_powerlimit > 1) {
- rapl_write_data_raw(rd, PL2_ENABLE, mode);
- rapl_write_data_raw(rd, PL2_CLAMP, mode);
+ for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) {
+ rapl_write_pl_data(rd, i, PL_ENABLE, mode);
+ rapl_write_pl_data(rd, i, PL_CLAMP, mode);
}
}
static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
{
static u32 power_ctrl_orig_val;
+ struct rapl_defaults *defaults = get_defaults(rd->rp);
u32 mdata;
- if (!rapl_defaults->floor_freq_reg_addr) {
+ if (!defaults->floor_freq_reg_addr) {
pr_err("Invalid floor frequency config register\n");
return;
}
if (!power_ctrl_orig_val)
iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
- rapl_defaults->floor_freq_reg_addr,
+ defaults->floor_freq_reg_addr,
&power_ctrl_orig_val);
mdata = power_ctrl_orig_val;
if (enable) {
@@ -977,10 +1067,10 @@ static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
mdata |= 1 << 8;
}
iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
- rapl_defaults->floor_freq_reg_addr, mdata);
+ defaults->floor_freq_reg_addr, mdata);
}
-static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
+static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
bool to_raw)
{
u64 f, y; /* fraction and exp. used for time unit */
@@ -992,12 +1082,12 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
if (!to_raw) {
f = (value & 0x60) >> 5;
y = value & 0x1f;
- value = (1 << y) * (4 + f) * rp->time_unit / 4;
+ value = (1 << y) * (4 + f) * rd->time_unit / 4;
} else {
- if (value < rp->time_unit)
+ if (value < rd->time_unit)
return 0;
- do_div(value, rp->time_unit);
+ do_div(value, rd->time_unit);
y = ilog2(value);
/*
@@ -1013,7 +1103,7 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
return value;
}
-static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
+static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
bool to_raw)
{
/*
@@ -1021,13 +1111,56 @@ static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
* where time_unit is default to 1 sec. Never 0.
*/
if (!to_raw)
- return (value) ? value * rp->time_unit : rp->time_unit;
+ return (value) ? value * rd->time_unit : rd->time_unit;
- value = div64_u64(value, rp->time_unit);
+ value = div64_u64(value, rd->time_unit);
return value;
}
+/* TPMI Unit register has different layout */
+#define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET
+#define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK
+#define TPMI_ENERGY_UNIT_OFFSET 0x06
+#define TPMI_ENERGY_UNIT_MASK 0x7C0
+#define TPMI_TIME_UNIT_OFFSET 0x0C
+#define TPMI_TIME_UNIT_MASK 0xF000
+
+static int rapl_check_unit_tpmi(struct rapl_domain *rd)
+{
+ struct reg_action ra;
+ u32 value;
+
+ ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
+ ra.mask = ~0;
+ if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
+ pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
+ ra.reg, rd->rp->name, rd->name);
+ return -ENODEV;
+ }
+
+ value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET;
+ rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
+
+ value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET;
+ rd->power_unit = 1000000 / (1 << value);
+
+ value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET;
+ rd->time_unit = 1000000 / (1 << value);
+
+ pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
+ rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
+
+ return 0;
+}
+
+static const struct rapl_defaults defaults_tpmi = {
+ .check_unit = rapl_check_unit_tpmi,
+ /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */
+ .set_floor_freq = set_floor_freq_default,
+ .compute_time_window = rapl_compute_time_window_core,
+};
+
static const struct rapl_defaults rapl_defaults_core = {
.floor_freq_reg_addr = 0,
.check_unit = rapl_check_unit_core,
@@ -1159,8 +1292,10 @@ static void rapl_update_domain_data(struct rapl_package *rp)
rp->domains[dmn].name);
/* exclude non-raw primitives */
for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
+ struct rapl_primitive_info *rpi = get_rpi(rp, prim);
+
if (!rapl_read_data_raw(&rp->domains[dmn], prim,
- rpi[prim].unit, &val))
+ rpi->unit, &val))
rp->domains[dmn].rdd.primitives[prim] = val;
}
}
@@ -1239,7 +1374,7 @@ err_cleanup:
return ret;
}
-static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp)
+static int rapl_check_domain(int domain, struct rapl_package *rp)
{
struct reg_action ra;
@@ -1260,9 +1395,43 @@ static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp)
*/
ra.mask = ENERGY_STATUS_MASK;
- if (rp->priv->read_raw(cpu, &ra) || !ra.value)
+ if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value)
+ return -ENODEV;
+
+ return 0;
+}
+
+/*
+ * Get per domain energy/power/time unit.
+ * RAPL Interfaces without per domain unit register will use the package
+ * scope unit register to set per domain units.
+ */
+static int rapl_get_domain_unit(struct rapl_domain *rd)
+{
+ struct rapl_defaults *defaults = get_defaults(rd->rp);
+ int ret;
+
+ if (!rd->regs[RAPL_DOMAIN_REG_UNIT]) {
+ if (!rd->rp->priv->reg_unit) {
+ pr_err("No valid Unit register found\n");
+ return -ENODEV;
+ }
+ rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit;
+ }
+
+ if (!defaults->check_unit) {
+ pr_err("missing .check_unit() callback\n");
return -ENODEV;
+ }
+
+ ret = defaults->check_unit(rd);
+ if (ret)
+ return ret;
+ if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit)
+ rd->energy_unit = defaults->dram_domain_energy_unit;
+ if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit)
+ rd->energy_unit = defaults->psys_domain_energy_unit;
return 0;
}
@@ -1280,19 +1449,16 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
u64 val64;
int i;
- /* check if the domain is locked by BIOS, ignore if MSR doesn't exist */
- if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) {
- if (val64) {
- pr_info("RAPL %s domain %s locked by BIOS\n",
- rd->rp->name, rd->name);
- rd->state |= DOMAIN_STATE_BIOS_LOCKED;
+ for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
+ if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) {
+ if (val64) {
+ rd->rpl[i].locked = true;
+ pr_info("%s:%s:%s locked by BIOS\n",
+ rd->rp->name, rd->name, pl_names[i]);
+ }
}
- }
- /* check if power limit MSR exists, otherwise domain is monitoring only */
- for (i = 0; i < NR_POWER_LIMITS; i++) {
- int prim = rd->rpl[i].prim_id;
- if (rapl_read_data_raw(rd, prim, false, &val64))
+ if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
rd->rpl[i].name = NULL;
}
}
@@ -1300,14 +1466,14 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
/* Detect active and valid domains for the given CPU, caller must
* ensure the CPU belongs to the targeted package and CPU hotlug is disabled.
*/
-static int rapl_detect_domains(struct rapl_package *rp, int cpu)
+static int rapl_detect_domains(struct rapl_package *rp)
{
struct rapl_domain *rd;
int i;
for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
/* use physical package id to read counters */
- if (!rapl_check_domain(cpu, i, rp)) {
+ if (!rapl_check_domain(i, rp)) {
rp->domain_map |= 1 << i;
pr_info("Found RAPL domain %s\n", rapl_domain_names[i]);
}
@@ -1326,8 +1492,10 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
rapl_init_domains(rp);
- for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++)
+ for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
+ rapl_get_domain_unit(rd);
rapl_detect_powerlimit(rd);
+ }
return 0;
}
@@ -1340,13 +1508,13 @@ void rapl_remove_package(struct rapl_package *rp)
package_power_limit_irq_restore(rp);
for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
- rapl_write_data_raw(rd, PL1_ENABLE, 0);
- rapl_write_data_raw(rd, PL1_CLAMP, 0);
- if (find_nr_power_limit(rd) > 1) {
- rapl_write_data_raw(rd, PL2_ENABLE, 0);
- rapl_write_data_raw(rd, PL2_CLAMP, 0);
- rapl_write_data_raw(rd, PL4_ENABLE, 0);
+ int i;
+
+ for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
+ rapl_write_pl_data(rd, i, PL_ENABLE, 0);
+ rapl_write_pl_data(rd, i, PL_CLAMP, 0);
}
+
if (rd->id == RAPL_DOMAIN_PACKAGE) {
rd_package = rd;
continue;
@@ -1365,13 +1533,18 @@ void rapl_remove_package(struct rapl_package *rp)
EXPORT_SYMBOL_GPL(rapl_remove_package);
/* caller to ensure CPU hotplug lock is held */
-struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv)
+struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
{
- int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
+ int uid;
+
+ if (id_is_cpu)
+ uid = topology_logical_die_id(id);
+ else
+ uid = id;
list_for_each_entry(rp, &rapl_packages, plist) {
- if (rp->id == id
+ if (rp->id == uid
&& rp->priv->control_type == priv->control_type)
return rp;
}
@@ -1381,34 +1554,37 @@ struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv
EXPORT_SYMBOL_GPL(rapl_find_package_domain);
/* called from CPU hotplug notifier, hotplug lock held */
-struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
+struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
{
- int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
int ret;
- if (!rapl_defaults)
- return ERR_PTR(-ENODEV);
-
rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
if (!rp)
return ERR_PTR(-ENOMEM);
- /* add the new package to the list */
- rp->id = id;
- rp->lead_cpu = cpu;
- rp->priv = priv;
+ if (id_is_cpu) {
+ rp->id = topology_logical_die_id(id);
+ rp->lead_cpu = id;
+ if (topology_max_die_per_package() > 1)
+ snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d",
+ topology_physical_package_id(id), topology_die_id(id));
+ else
+ snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
+ topology_physical_package_id(id));
+ } else {
+ rp->id = id;
+ rp->lead_cpu = -1;
+ snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id);
+ }
- if (topology_max_die_per_package() > 1)
- snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH,
- "package-%d-die-%d",
- topology_physical_package_id(cpu), topology_die_id(cpu));
- else
- snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
- topology_physical_package_id(cpu));
+ rp->priv = priv;
+ ret = rapl_config(rp);
+ if (ret)
+ goto err_free_package;
/* check if the package contains valid domains */
- if (rapl_detect_domains(rp, cpu) || rapl_defaults->check_unit(rp, cpu)) {
+ if (rapl_detect_domains(rp)) {
ret = -ENODEV;
goto err_free_package;
}
@@ -1430,38 +1606,18 @@ static void power_limit_state_save(void)
{
struct rapl_package *rp;
struct rapl_domain *rd;
- int nr_pl, ret, i;
+ int ret, i;
cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
rd = power_zone_to_rapl_domain(rp->power_zone);
- nr_pl = find_nr_power_limit(rd);
- for (i = 0; i < nr_pl; i++) {
- switch (rd->rpl[i].prim_id) {
- case PL1_ENABLE:
- ret = rapl_read_data_raw(rd,
- POWER_LIMIT1, true,
- &rd->rpl[i].last_power_limit);
- if (ret)
- rd->rpl[i].last_power_limit = 0;
- break;
- case PL2_ENABLE:
- ret = rapl_read_data_raw(rd,
- POWER_LIMIT2, true,
+ for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
+ ret = rapl_read_pl_data(rd, i, PL_LIMIT, true,
&rd->rpl[i].last_power_limit);
- if (ret)
- rd->rpl[i].last_power_limit = 0;
- break;
- case PL4_ENABLE:
- ret = rapl_read_data_raw(rd,
- POWER_LIMIT4, true,
- &rd->rpl[i].last_power_limit);
- if (ret)
- rd->rpl[i].last_power_limit = 0;
- break;
- }
+ if (ret)
+ rd->rpl[i].last_power_limit = 0;
}
}
cpus_read_unlock();
@@ -1471,33 +1627,17 @@ static void power_limit_state_restore(void)
{
struct rapl_package *rp;
struct rapl_domain *rd;
- int nr_pl, i;
+ int i;
cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
rd = power_zone_to_rapl_domain(rp->power_zone);
- nr_pl = find_nr_power_limit(rd);
- for (i = 0; i < nr_pl; i++) {
- switch (rd->rpl[i].prim_id) {
- case PL1_ENABLE:
- if (rd->rpl[i].last_power_limit)
- rapl_write_data_raw(rd, POWER_LIMIT1,
- rd->rpl[i].last_power_limit);
- break;
- case PL2_ENABLE:
- if (rd->rpl[i].last_power_limit)
- rapl_write_data_raw(rd, POWER_LIMIT2,
- rd->rpl[i].last_power_limit);
- break;
- case PL4_ENABLE:
- if (rd->rpl[i].last_power_limit)
- rapl_write_data_raw(rd, POWER_LIMIT4,
- rd->rpl[i].last_power_limit);
- break;
- }
- }
+ for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++)
+ if (rd->rpl[i].last_power_limit)
+ rapl_write_pl_data(rd, i, PL_LIMIT,
+ rd->rpl[i].last_power_limit);
}
cpus_read_unlock();
}
@@ -1528,32 +1668,25 @@ static int __init rapl_init(void)
int ret;
id = x86_match_cpu(rapl_ids);
- if (!id) {
- pr_err("driver does not support CPU family %d model %d\n",
- boot_cpu_data.x86, boot_cpu_data.x86_model);
-
- return -ENODEV;
- }
+ if (id) {
+ defaults_msr = (struct rapl_defaults *)id->driver_data;
- rapl_defaults = (struct rapl_defaults *)id->driver_data;
-
- ret = register_pm_notifier(&rapl_pm_notifier);
- if (ret)
- return ret;
+ rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
+ if (!rapl_msr_platdev)
+ return -ENOMEM;
- rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
- if (!rapl_msr_platdev) {
- ret = -ENOMEM;
- goto end;
+ ret = platform_device_add(rapl_msr_platdev);
+ if (ret) {
+ platform_device_put(rapl_msr_platdev);
+ return ret;
+ }
}
- ret = platform_device_add(rapl_msr_platdev);
- if (ret)
+ ret = register_pm_notifier(&rapl_pm_notifier);
+ if (ret && rapl_msr_platdev) {
+ platform_device_del(rapl_msr_platdev);
platform_device_put(rapl_msr_platdev);
-
-end:
- if (ret)
- unregister_pm_notifier(&rapl_pm_notifier);
+ }
return ret;
}
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index a27673706c3d..569e25eab1e1 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -22,7 +22,6 @@
#include <linux/processor.h>
#include <linux/platform_device.h>
-#include <asm/iosf_mbi.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
@@ -34,6 +33,7 @@
static struct rapl_if_priv *rapl_msr_priv;
static struct rapl_if_priv rapl_msr_priv_intel = {
+ .type = RAPL_IF_MSR,
.reg_unit = MSR_RAPL_POWER_UNIT,
.regs[RAPL_DOMAIN_PACKAGE] = {
MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO },
@@ -45,11 +45,12 @@ static struct rapl_if_priv rapl_msr_priv_intel = {
MSR_DRAM_POWER_LIMIT, MSR_DRAM_ENERGY_STATUS, MSR_DRAM_PERF_STATUS, 0, MSR_DRAM_POWER_INFO },
.regs[RAPL_DOMAIN_PLATFORM] = {
MSR_PLATFORM_POWER_LIMIT, MSR_PLATFORM_ENERGY_STATUS, 0, 0, 0},
- .limits[RAPL_DOMAIN_PACKAGE] = 2,
- .limits[RAPL_DOMAIN_PLATFORM] = 2,
+ .limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2),
+ .limits[RAPL_DOMAIN_PLATFORM] = BIT(POWER_LIMIT2),
};
static struct rapl_if_priv rapl_msr_priv_amd = {
+ .type = RAPL_IF_MSR,
.reg_unit = MSR_AMD_RAPL_POWER_UNIT,
.regs[RAPL_DOMAIN_PACKAGE] = {
0, MSR_AMD_PKG_ENERGY_STATUS, 0, 0, 0 },
@@ -68,9 +69,9 @@ static int rapl_cpu_online(unsigned int cpu)
{
struct rapl_package *rp;
- rp = rapl_find_package_domain(cpu, rapl_msr_priv);
+ rp = rapl_find_package_domain(cpu, rapl_msr_priv, true);
if (!rp) {
- rp = rapl_add_package(cpu, rapl_msr_priv);
+ rp = rapl_add_package(cpu, rapl_msr_priv, true);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
@@ -83,7 +84,7 @@ static int rapl_cpu_down_prep(unsigned int cpu)
struct rapl_package *rp;
int lead_cpu;
- rp = rapl_find_package_domain(cpu, rapl_msr_priv);
+ rp = rapl_find_package_domain(cpu, rapl_msr_priv, true);
if (!rp)
return 0;
@@ -137,14 +138,14 @@ static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
/* List of verified CPUs. */
static const struct x86_cpu_id pl4_support_ids[] = {
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_N, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE_P, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_METEORLAKE, X86_FEATURE_ANY },
- { X86_VENDOR_INTEL, 6, INTEL_FAM6_METEORLAKE_L, X86_FEATURE_ANY },
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, NULL),
{}
};
@@ -169,7 +170,7 @@ static int rapl_msr_probe(struct platform_device *pdev)
rapl_msr_priv->write_raw = rapl_msr_write_raw;
if (id) {
- rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] = 3;
+ rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] |= BIT(POWER_LIMIT4);
rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4] =
MSR_VR_CURRENT_CONFIG;
pr_info("PL4 support detected.\n");
diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c
new file mode 100644
index 000000000000..4f4f13ded225
--- /dev/null
+++ b/drivers/powercap/intel_rapl_tpmi.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_rapl_tpmi: Intel RAPL driver via TPMI interface
+ *
+ * Copyright (c) 2023, Intel Corporation.
+ * All Rights Reserved.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/auxiliary_bus.h>
+#include <linux/io.h>
+#include <linux/intel_tpmi.h>
+#include <linux/intel_rapl.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#define TPMI_RAPL_VERSION 1
+
+/* 1 header + 10 registers + 5 reserved. 8 bytes for each. */
+#define TPMI_RAPL_DOMAIN_SIZE 128
+
+enum tpmi_rapl_domain_type {
+ TPMI_RAPL_DOMAIN_INVALID,
+ TPMI_RAPL_DOMAIN_SYSTEM,
+ TPMI_RAPL_DOMAIN_PACKAGE,
+ TPMI_RAPL_DOMAIN_RESERVED,
+ TPMI_RAPL_DOMAIN_MEMORY,
+ TPMI_RAPL_DOMAIN_MAX,
+};
+
+enum tpmi_rapl_register {
+ TPMI_RAPL_REG_HEADER,
+ TPMI_RAPL_REG_UNIT,
+ TPMI_RAPL_REG_PL1,
+ TPMI_RAPL_REG_PL2,
+ TPMI_RAPL_REG_PL3,
+ TPMI_RAPL_REG_PL4,
+ TPMI_RAPL_REG_RESERVED,
+ TPMI_RAPL_REG_ENERGY_STATUS,
+ TPMI_RAPL_REG_PERF_STATUS,
+ TPMI_RAPL_REG_POWER_INFO,
+ TPMI_RAPL_REG_INTERRUPT,
+ TPMI_RAPL_REG_MAX = 15,
+};
+
+struct tpmi_rapl_package {
+ struct rapl_if_priv priv;
+ struct intel_tpmi_plat_info *tpmi_info;
+ struct rapl_package *rp;
+ void __iomem *base;
+ struct list_head node;
+};
+
+static LIST_HEAD(tpmi_rapl_packages);
+static DEFINE_MUTEX(tpmi_rapl_lock);
+
+static struct powercap_control_type *tpmi_control_type;
+
+static int tpmi_rapl_read_raw(int id, struct reg_action *ra)
+{
+ if (!ra->reg)
+ return -EINVAL;
+
+ ra->value = readq((void __iomem *)ra->reg);
+
+ ra->value &= ra->mask;
+ return 0;
+}
+
+static int tpmi_rapl_write_raw(int id, struct reg_action *ra)
+{
+ u64 val;
+
+ if (!ra->reg)
+ return -EINVAL;
+
+ val = readq((void __iomem *)ra->reg);
+
+ val &= ~ra->mask;
+ val |= ra->value;
+
+ writeq(val, (void __iomem *)ra->reg);
+ return 0;
+}
+
+static struct tpmi_rapl_package *trp_alloc(int pkg_id)
+{
+ struct tpmi_rapl_package *trp;
+ int ret;
+
+ mutex_lock(&tpmi_rapl_lock);
+
+ if (list_empty(&tpmi_rapl_packages)) {
+ tpmi_control_type = powercap_register_control_type(NULL, "intel-rapl", NULL);
+ if (IS_ERR(tpmi_control_type)) {
+ ret = PTR_ERR(tpmi_control_type);
+ goto err_unlock;
+ }
+ }
+
+ trp = kzalloc(sizeof(*trp), GFP_KERNEL);
+ if (!trp) {
+ ret = -ENOMEM;
+ goto err_del_powercap;
+ }
+
+ list_add(&trp->node, &tpmi_rapl_packages);
+
+ mutex_unlock(&tpmi_rapl_lock);
+ return trp;
+
+err_del_powercap:
+ if (list_empty(&tpmi_rapl_packages))
+ powercap_unregister_control_type(tpmi_control_type);
+err_unlock:
+ mutex_unlock(&tpmi_rapl_lock);
+ return ERR_PTR(ret);
+}
+
+static void trp_release(struct tpmi_rapl_package *trp)
+{
+ mutex_lock(&tpmi_rapl_lock);
+ list_del(&trp->node);
+
+ if (list_empty(&tpmi_rapl_packages))
+ powercap_unregister_control_type(tpmi_control_type);
+
+ kfree(trp);
+ mutex_unlock(&tpmi_rapl_lock);
+}
+
+static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
+{
+ u8 tpmi_domain_version;
+ enum rapl_domain_type domain_type;
+ enum tpmi_rapl_domain_type tpmi_domain_type;
+ enum tpmi_rapl_register reg_index;
+ enum rapl_domain_reg_id reg_id;
+ int tpmi_domain_size, tpmi_domain_flags;
+ u64 *tpmi_rapl_regs = trp->base + offset;
+ u64 tpmi_domain_header = readq((void __iomem *)tpmi_rapl_regs);
+
+ /* Domain Parent bits are ignored for now */
+ tpmi_domain_version = tpmi_domain_header & 0xff;
+ tpmi_domain_type = tpmi_domain_header >> 8 & 0xff;
+ tpmi_domain_size = tpmi_domain_header >> 16 & 0xff;
+ tpmi_domain_flags = tpmi_domain_header >> 32 & 0xffff;
+
+ if (tpmi_domain_version != TPMI_RAPL_VERSION) {
+ pr_warn(FW_BUG "Unsupported version:%d\n", tpmi_domain_version);
+ return -ENODEV;
+ }
+
+ /* Domain size: in unit of 128 Bytes */
+ if (tpmi_domain_size != 1) {
+ pr_warn(FW_BUG "Invalid Domain size %d\n", tpmi_domain_size);
+ return -EINVAL;
+ }
+
+ /* Unit register and Energy Status register are mandatory for each domain */
+ if (!(tpmi_domain_flags & BIT(TPMI_RAPL_REG_UNIT)) ||
+ !(tpmi_domain_flags & BIT(TPMI_RAPL_REG_ENERGY_STATUS))) {
+ pr_warn(FW_BUG "Invalid Domain flag 0x%x\n", tpmi_domain_flags);
+ return -EINVAL;
+ }
+
+ switch (tpmi_domain_type) {
+ case TPMI_RAPL_DOMAIN_PACKAGE:
+ domain_type = RAPL_DOMAIN_PACKAGE;
+ break;
+ case TPMI_RAPL_DOMAIN_SYSTEM:
+ domain_type = RAPL_DOMAIN_PLATFORM;
+ break;
+ case TPMI_RAPL_DOMAIN_MEMORY:
+ domain_type = RAPL_DOMAIN_DRAM;
+ break;
+ default:
+ pr_warn(FW_BUG "Unsupported Domain type %d\n", tpmi_domain_type);
+ return -EINVAL;
+ }
+
+ if (trp->priv.regs[domain_type][RAPL_DOMAIN_REG_UNIT]) {
+ pr_warn(FW_BUG "Duplicate Domain type %d\n", tpmi_domain_type);
+ return -EINVAL;
+ }
+
+ reg_index = TPMI_RAPL_REG_HEADER;
+ while (++reg_index != TPMI_RAPL_REG_MAX) {
+ if (!(tpmi_domain_flags & BIT(reg_index)))
+ continue;
+
+ switch (reg_index) {
+ case TPMI_RAPL_REG_UNIT:
+ reg_id = RAPL_DOMAIN_REG_UNIT;
+ break;
+ case TPMI_RAPL_REG_PL1:
+ reg_id = RAPL_DOMAIN_REG_LIMIT;
+ trp->priv.limits[domain_type] |= BIT(POWER_LIMIT1);
+ break;
+ case TPMI_RAPL_REG_PL2:
+ reg_id = RAPL_DOMAIN_REG_PL2;
+ trp->priv.limits[domain_type] |= BIT(POWER_LIMIT2);
+ break;
+ case TPMI_RAPL_REG_PL4:
+ reg_id = RAPL_DOMAIN_REG_PL4;
+ trp->priv.limits[domain_type] |= BIT(POWER_LIMIT4);
+ break;
+ case TPMI_RAPL_REG_ENERGY_STATUS:
+ reg_id = RAPL_DOMAIN_REG_STATUS;
+ break;
+ case TPMI_RAPL_REG_PERF_STATUS:
+ reg_id = RAPL_DOMAIN_REG_PERF;
+ break;
+ case TPMI_RAPL_REG_POWER_INFO:
+ reg_id = RAPL_DOMAIN_REG_INFO;
+ break;
+ default:
+ continue;
+ }
+ trp->priv.regs[domain_type][reg_id] = (u64)&tpmi_rapl_regs[reg_index];
+ }
+
+ return 0;
+}
+
+static int intel_rapl_tpmi_probe(struct auxiliary_device *auxdev,
+ const struct auxiliary_device_id *id)
+{
+ struct tpmi_rapl_package *trp;
+ struct intel_tpmi_plat_info *info;
+ struct resource *res;
+ u32 offset;
+ int ret;
+
+ info = tpmi_get_platform_data(auxdev);
+ if (!info)
+ return -ENODEV;
+
+ trp = trp_alloc(info->package_id);
+ if (IS_ERR(trp))
+ return PTR_ERR(trp);
+
+ if (tpmi_get_resource_count(auxdev) > 1) {
+ dev_err(&auxdev->dev, "does not support multiple resources\n");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ res = tpmi_get_resource_at_index(auxdev, 0);
+ if (!res) {
+ dev_err(&auxdev->dev, "can't fetch device resource info\n");
+ ret = -EIO;
+ goto err;
+ }
+
+ trp->base = devm_ioremap_resource(&auxdev->dev, res);
+ if (IS_ERR(trp->base)) {
+ ret = PTR_ERR(trp->base);
+ goto err;
+ }
+
+ for (offset = 0; offset < resource_size(res); offset += TPMI_RAPL_DOMAIN_SIZE) {
+ ret = parse_one_domain(trp, offset);
+ if (ret)
+ goto err;
+ }
+
+ trp->tpmi_info = info;
+ trp->priv.type = RAPL_IF_TPMI;
+ trp->priv.read_raw = tpmi_rapl_read_raw;
+ trp->priv.write_raw = tpmi_rapl_write_raw;
+ trp->priv.control_type = tpmi_control_type;
+
+ /* RAPL TPMI I/F is per physical package */
+ trp->rp = rapl_find_package_domain(info->package_id, &trp->priv, false);
+ if (trp->rp) {
+ dev_err(&auxdev->dev, "Domain for Package%d already exists\n", info->package_id);
+ ret = -EEXIST;
+ goto err;
+ }
+
+ trp->rp = rapl_add_package(info->package_id, &trp->priv, false);
+ if (IS_ERR(trp->rp)) {
+ dev_err(&auxdev->dev, "Failed to add RAPL Domain for Package%d, %ld\n",
+ info->package_id, PTR_ERR(trp->rp));
+ ret = PTR_ERR(trp->rp);
+ goto err;
+ }
+
+ auxiliary_set_drvdata(auxdev, trp);
+
+ return 0;
+err:
+ trp_release(trp);
+ return ret;
+}
+
+static void intel_rapl_tpmi_remove(struct auxiliary_device *auxdev)
+{
+ struct tpmi_rapl_package *trp = auxiliary_get_drvdata(auxdev);
+
+ rapl_remove_package(trp->rp);
+ trp_release(trp);
+}
+
+static const struct auxiliary_device_id intel_rapl_tpmi_ids[] = {
+ {.name = "intel_vsec.tpmi-rapl" },
+ { }
+};
+
+MODULE_DEVICE_TABLE(auxiliary, intel_rapl_tpmi_ids);
+
+static struct auxiliary_driver intel_rapl_tpmi_driver = {
+ .probe = intel_rapl_tpmi_probe,
+ .remove = intel_rapl_tpmi_remove,
+ .id_table = intel_rapl_tpmi_ids,
+};
+
+module_auxiliary_driver(intel_rapl_tpmi_driver)
+
+MODULE_IMPORT_NS(INTEL_TPMI);
+
+MODULE_DESCRIPTION("Intel RAPL TPMI Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
index a205221ec8df..013f1633f082 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
@@ -15,8 +15,8 @@ static const struct rapl_mmio_regs rapl_mmio_default = {
.reg_unit = 0x5938,
.regs[RAPL_DOMAIN_PACKAGE] = { 0x59a0, 0x593c, 0x58f0, 0, 0x5930},
.regs[RAPL_DOMAIN_DRAM] = { 0x58e0, 0x58e8, 0x58ec, 0, 0},
- .limits[RAPL_DOMAIN_PACKAGE] = 2,
- .limits[RAPL_DOMAIN_DRAM] = 2,
+ .limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2),
+ .limits[RAPL_DOMAIN_DRAM] = BIT(POWER_LIMIT2),
};
static int rapl_mmio_cpu_online(unsigned int cpu)
@@ -27,9 +27,9 @@ static int rapl_mmio_cpu_online(unsigned int cpu)
if (topology_physical_package_id(cpu))
return 0;
- rp = rapl_find_package_domain(cpu, &rapl_mmio_priv);
+ rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
if (!rp) {
- rp = rapl_add_package(cpu, &rapl_mmio_priv);
+ rp = rapl_add_package(cpu, &rapl_mmio_priv, true);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
@@ -42,7 +42,7 @@ static int rapl_mmio_cpu_down_prep(unsigned int cpu)
struct rapl_package *rp;
int lead_cpu;
- rp = rapl_find_package_domain(cpu, &rapl_mmio_priv);
+ rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
if (!rp)
return 0;
@@ -97,6 +97,7 @@ int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc
rapl_regs->regs[domain][reg];
rapl_mmio_priv.limits[domain] = rapl_regs->limits[domain];
}
+ rapl_mmio_priv.type = RAPL_IF_MMIO;
rapl_mmio_priv.reg_unit = (u64)proc_priv->mmio_base + rapl_regs->reg_unit;
rapl_mmio_priv.read_raw = rapl_mmio_read_raw;
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index e5dfb6f4de52..451f6276da49 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -307,7 +307,8 @@ enum acpi_preferred_pm_profiles {
PM_SOHO_SERVER = 5,
PM_APPLIANCE_PC = 6,
PM_PERFORMANCE_SERVER = 7,
- PM_TABLET = 8
+ PM_TABLET = 8,
+ NR_PM_PROFILES = 9
};
/* Values for sleep_status and sleep_control registers (V5+ FADT) */
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index c10ebf8c42e6..446394f84606 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -94,7 +94,8 @@ struct amd_cpudata {
* enum amd_pstate_mode - driver working mode of amd pstate
*/
enum amd_pstate_mode {
- AMD_PSTATE_DISABLE = 0,
+ AMD_PSTATE_UNDEFINED = 0,
+ AMD_PSTATE_DISABLE,
AMD_PSTATE_PASSIVE,
AMD_PSTATE_ACTIVE,
AMD_PSTATE_GUIDED,
@@ -102,6 +103,7 @@ enum amd_pstate_mode {
};
static const char * const amd_pstate_mode_string[] = {
+ [AMD_PSTATE_UNDEFINED] = "undefined",
[AMD_PSTATE_DISABLE] = "disable",
[AMD_PSTATE_PASSIVE] = "passive",
[AMD_PSTATE_ACTIVE] = "active",
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 26e2eb399484..172ff51c1b2a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -340,7 +340,10 @@ struct cpufreq_driver {
/*
* ->fast_switch() replacement for drivers that use an internal
* representation of performance levels and can pass hints other than
- * the target performance level to the hardware.
+ * the target performance level to the hardware. This can only be set
+ * if ->fast_switch is set too, because in those cases (under specific
+ * conditions) scale invariance can be disabled, which causes the
+ * schedutil governor to fall back to the latter.
*/
void (*adjust_perf)(unsigned int cpu,
unsigned long min_perf,
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 7fd704bb8f3d..d312ffbac4dd 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -108,7 +108,6 @@ struct devfreq_dev_profile {
unsigned long initial_freq;
unsigned int polling_ms;
enum devfreq_timer timer;
- bool is_cooling_device;
int (*target)(struct device *dev, unsigned long *freq, u32 flags);
int (*get_dev_status)(struct device *dev,
@@ -118,6 +117,8 @@ struct devfreq_dev_profile {
unsigned long *freq_table;
unsigned int max_state;
+
+ bool is_cooling_device;
};
/**
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index 9f4b6f5b822f..e6936cb25047 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -14,6 +14,12 @@
#include <linux/powercap.h>
#include <linux/cpuhotplug.h>
+enum rapl_if_type {
+ RAPL_IF_MSR, /* RAPL I/F using MSR registers */
+ RAPL_IF_MMIO, /* RAPL I/F using MMIO registers */
+ RAPL_IF_TPMI, /* RAPL I/F using TPMI registers */
+};
+
enum rapl_domain_type {
RAPL_DOMAIN_PACKAGE, /* entire package/socket */
RAPL_DOMAIN_PP0, /* core power plane */
@@ -30,17 +36,23 @@ enum rapl_domain_reg_id {
RAPL_DOMAIN_REG_POLICY,
RAPL_DOMAIN_REG_INFO,
RAPL_DOMAIN_REG_PL4,
+ RAPL_DOMAIN_REG_UNIT,
+ RAPL_DOMAIN_REG_PL2,
RAPL_DOMAIN_REG_MAX,
};
struct rapl_domain;
enum rapl_primitives {
- ENERGY_COUNTER,
POWER_LIMIT1,
POWER_LIMIT2,
POWER_LIMIT4,
+ ENERGY_COUNTER,
FW_LOCK,
+ FW_HIGH_LOCK,
+ PL1_LOCK,
+ PL2_LOCK,
+ PL4_LOCK,
PL1_ENABLE, /* power limit 1, aka long term */
PL1_CLAMP, /* allow frequency to go below OS request */
@@ -74,12 +86,13 @@ struct rapl_domain_data {
unsigned long timestamp;
};
-#define NR_POWER_LIMITS (3)
+#define NR_POWER_LIMITS (POWER_LIMIT4 + 1)
+
struct rapl_power_limit {
struct powercap_zone_constraint *constraint;
- int prim_id; /* primitive ID used to enable */
struct rapl_domain *domain;
const char *name;
+ bool locked;
u64 last_power_limit;
};
@@ -96,7 +109,9 @@ struct rapl_domain {
struct rapl_power_limit rpl[NR_POWER_LIMITS];
u64 attr_map; /* track capabilities */
unsigned int state;
- unsigned int domain_energy_unit;
+ unsigned int power_unit;
+ unsigned int energy_unit;
+ unsigned int time_unit;
struct rapl_package *rp;
};
@@ -121,16 +136,20 @@ struct reg_action {
* registers.
* @write_raw: Callback for writing RAPL interface specific
* registers.
+ * @defaults: internal pointer to interface default settings
+ * @rpi: internal pointer to interface primitive info
*/
struct rapl_if_priv {
+ enum rapl_if_type type;
struct powercap_control_type *control_type;
- struct rapl_domain *platform_rapl_domain;
enum cpuhp_state pcap_rapl_online;
u64 reg_unit;
u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
int limits[RAPL_DOMAIN_MAX];
- int (*read_raw)(int cpu, struct reg_action *ra);
- int (*write_raw)(int cpu, struct reg_action *ra);
+ int (*read_raw)(int id, struct reg_action *ra);
+ int (*write_raw)(int id, struct reg_action *ra);
+ void *defaults;
+ void *rpi;
};
/* maximum rapl package domain name: package-%d-die-%d */
@@ -140,9 +159,6 @@ struct rapl_package {
unsigned int id; /* logical die id, equals physical 1-die systems */
unsigned int nr_domains;
unsigned long domain_map; /* bit map of active domains */
- unsigned int power_unit;
- unsigned int energy_unit;
- unsigned int time_unit;
struct rapl_domain *domains; /* array of domains, sized at runtime */
struct powercap_zone *power_zone; /* keep track of parent zone */
unsigned long power_limit_irq; /* keep track of package power limit
@@ -156,8 +172,8 @@ struct rapl_package {
struct rapl_if_priv *priv;
};
-struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv);
-struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv);
+struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu);
+struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu);
void rapl_remove_package(struct rapl_package *rp);
#endif /* __INTEL_RAPL_H__ */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index f16653f7be32..4d0095e8989e 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -202,6 +202,7 @@ struct platform_s2idle_ops {
};
#ifdef CONFIG_SUSPEND
+extern suspend_state_t pm_suspend_target_state;
extern suspend_state_t mem_sleep_current;
extern suspend_state_t mem_sleep_default;
@@ -337,6 +338,8 @@ extern bool sync_on_suspend_enabled;
#else /* !CONFIG_SUSPEND */
#define suspend_valid_only_mem NULL
+#define pm_suspend_target_state (PM_SUSPEND_ON)
+
static inline void pm_suspend_clear_flags(void) {}
static inline void pm_set_suspend_via_firmware(void) {}
static inline void pm_set_resume_via_firmware(void) {}
@@ -472,6 +475,8 @@ static inline int hibernate_quiet_exec(int (*func)(void *data), void *data) {
}
#endif /* CONFIG_HIBERNATION */
+int arch_resume_nosmt(void);
+
#ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV
int is_hibernate_resume_dev(dev_t dev);
#else
@@ -507,7 +512,6 @@ extern void pm_report_max_hw_sleep(u64 t);
/* drivers/base/power/wakeup.c */
extern bool events_check_enabled;
-extern suspend_state_t pm_suspend_target_state;
extern bool pm_wakeup_pending(void);
extern void pm_system_wakeup(void);
@@ -555,6 +559,7 @@ static inline void unlock_system_sleep(unsigned int flags) {}
#ifdef CONFIG_PM_SLEEP_DEBUG
extern bool pm_print_times_enabled;
extern bool pm_debug_messages_on;
+extern bool pm_debug_messages_should_print(void);
static inline int pm_dyn_debug_messages_on(void)
{
#ifdef CONFIG_DYNAMIC_DEBUG
@@ -568,14 +573,14 @@ static inline int pm_dyn_debug_messages_on(void)
#endif
#define __pm_pr_dbg(fmt, ...) \
do { \
- if (pm_debug_messages_on) \
+ if (pm_debug_messages_should_print()) \
printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
else if (pm_dyn_debug_messages_on()) \
pr_debug(fmt, ##__VA_ARGS__); \
} while (0)
#define __pm_deferred_pr_dbg(fmt, ...) \
do { \
- if (pm_debug_messages_on) \
+ if (pm_debug_messages_should_print()) \
printk_deferred(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
} while (0)
#else
@@ -593,7 +598,8 @@ static inline int pm_dyn_debug_messages_on(void)
/**
* pm_pr_dbg - print pm sleep debug messages
*
- * If pm_debug_messages_on is enabled, print message.
+ * If pm_debug_messages_on is enabled and the system is entering/leaving
+ * suspend, print message.
* If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is enabled,
* print message only from instances explicitly enabled on dynamic debug's
* control.
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 3113ec2f1db4..daa535012e51 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -556,6 +556,12 @@ power_attr_ro(pm_wakeup_irq);
bool pm_debug_messages_on __read_mostly;
+bool pm_debug_messages_should_print(void)
+{
+ return pm_debug_messages_on && pm_suspend_target_state != PM_SUSPEND_ON;
+}
+EXPORT_SYMBOL_GPL(pm_debug_messages_should_print);
+
static ssize_t pm_debug_messages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index cd8b7b35f1e8..b27affb7503f 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -398,7 +398,7 @@ struct mem_zone_bm_rtree {
unsigned int blocks; /* Number of Bitmap Blocks */
};
-/* strcut bm_position is used for browsing memory bitmaps */
+/* struct bm_position is used for browsing memory bitmaps */
struct bm_position {
struct mem_zone_bm_rtree *zone;