From 836aedb1414d4724b2ec68dd19810960c593720c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 24 Jan 2013 12:49:59 +0100 Subject: ACPI / PM: Expose power states of ACPI devices to user space Make it possible to retrieve the current power state of a device with ACPI power management from user space via sysfs by adding two new attributes, power_state and real_power_state, to the sysfs directory associated with the struct acpi_device object representing the device's ACPI node. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-power_state | 20 +++++++++++++++++++ .../ABI/testing/sysfs-devices-real_power_state | 23 ++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-devices-power_state create mode 100644 Documentation/ABI/testing/sysfs-devices-real_power_state (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-devices-power_state b/Documentation/ABI/testing/sysfs-devices-power_state new file mode 100644 index 000000000000..7ad9546748f0 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-power_state @@ -0,0 +1,20 @@ +What: /sys/devices/.../power_state +Date: January 2013 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../power_state attribute is only present for + device objects representing ACPI device nodes that provide power + management methods. + + If present, it contains a string representing the current ACPI + power state of the given device node. Its possible values, + "D0", "D1", "D2", "D3hot", and "D3cold", reflect the power state + names defined by the ACPI specification (ACPI 4 and above). + + If the device node uses shared ACPI power resources, this state + determines a list of power resources required not to be turned + off. However, some power resources needed by the device node in + higher-power (lower-number) states may also be ON because of + some other devices using them at the moment. + + This attribute is read-only. diff --git a/Documentation/ABI/testing/sysfs-devices-real_power_state b/Documentation/ABI/testing/sysfs-devices-real_power_state new file mode 100644 index 000000000000..8b3527c82a7d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-real_power_state @@ -0,0 +1,23 @@ +What: /sys/devices/.../real_power_state +Date: January 2013 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../real_power_state attribute is only present + for device objects representing ACPI device nodes that provide + power management methods and use ACPI power resources for power + management. + + If present, it contains a string representing the real ACPI + power state of the given device node as returned by the _PSC + control method or inferred from the configuration of power + resources. Its possible values, "D0", "D1", "D2", "D3hot", and + "D3cold", reflect the power state names defined by the ACPI + specification (ACPI 4 and above). + + In some situations the value of this attribute may be different + from the value of the /sys/devices/.../power_state attribute for + the same device object. If that happens, some shared power + resources used by the device node are only ON because of some + other devices using them at the moment. + + This attribute is read-only. -- cgit v1.2.3-58-ga151 From b1c0f99bfb89cd9b42e3119ab822a8102fa87909 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 24 Jan 2013 12:50:09 +0100 Subject: ACPI / PM: Expose current status of ACPI power resources Since ACPI power resources are going to be used more extensively on new hardware platforms, it becomes necessary for user space (powertop in particular) to observe some properties of those resources for diagnostics purposes. For this reason, expose the current status of each ACPI power resource to user space via sysfs by adding a new resource_in_use attribute to the sysfs directory representing the given power resource. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-resource_in_use | 12 ++++++++++ drivers/acpi/power.c | 26 +++++++++++++++++++++- drivers/acpi/scan.c | 3 +++ include/acpi/acpi_bus.h | 1 + 4 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/testing/sysfs-devices-resource_in_use (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-devices-resource_in_use b/Documentation/ABI/testing/sysfs-devices-resource_in_use new file mode 100644 index 000000000000..b4a3bc5922a3 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-resource_in_use @@ -0,0 +1,12 @@ +What: /sys/devices/.../resource_in_use +Date: January 2013 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../resource_in_use attribute is only present + for device objects representing ACPI power resources. + + If present, it contains a number (0 or 1) representing the + current status of the given power resource (0 means that the + resource is not in use and therefore it has been turned off). + + This attribute is read-only. diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 3f16dd4db23e..946720a4db57 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -87,6 +87,12 @@ static DEFINE_MUTEX(power_resource_list_lock); Power Resource Management -------------------------------------------------------------------------- */ +static inline +struct acpi_power_resource *to_power_resource(struct acpi_device *device) +{ + return container_of(device, struct acpi_power_resource, device); +} + static struct acpi_power_resource *acpi_power_get_context(acpi_handle handle) { struct acpi_device *device; @@ -94,7 +100,7 @@ static struct acpi_power_resource *acpi_power_get_context(acpi_handle handle) if (acpi_bus_get_device(handle, &device)) return NULL; - return container_of(device, struct acpi_power_resource, device); + return to_power_resource(device); } static int acpi_power_resources_list_add(acpi_handle handle, @@ -678,6 +684,21 @@ static void acpi_release_power_resource(struct device *dev) kfree(resource); } +static ssize_t acpi_power_in_use_show(struct device *dev, + struct device_attribute *attr, + char *buf) { + struct acpi_power_resource *resource; + + resource = to_power_resource(to_acpi_device(dev)); + return sprintf(buf, "%u\n", !!resource->ref_count); +} +static DEVICE_ATTR(resource_in_use, 0444, acpi_power_in_use_show, NULL); + +static void acpi_power_sysfs_remove(struct acpi_device *device) +{ + device_remove_file(&device->dev, &dev_attr_resource_in_use); +} + int acpi_add_power_resource(acpi_handle handle) { struct acpi_power_resource *resource; @@ -725,6 +746,9 @@ int acpi_add_power_resource(acpi_handle handle) if (result) goto err; + if (!device_create_file(&device->dev, &dev_attr_resource_in_use)) + device->remove = acpi_power_sysfs_remove; + mutex_lock(&power_resource_list_lock); list_add(&resource->list_node, &acpi_power_resource_list); mutex_unlock(&power_resource_list_lock); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 9761d589f3f5..9801837876b7 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -791,6 +791,9 @@ static void acpi_device_unregister(struct acpi_device *device) acpi_power_add_remove_device(device, false); acpi_device_remove_files(device); + if (device->remove) + device->remove(device); + device_del(&device->dev); /* * Drop the reference counts of all power resources the device depends diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index fca1b9cb27d9..aef56a9f4e70 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -280,6 +280,7 @@ struct acpi_device { struct mutex physical_node_lock; DECLARE_BITMAP(physical_node_id_bitmap, ACPI_MAX_PHYSICAL_NODE); struct list_head power_dependent; + void (*remove)(struct acpi_device *); }; static inline void *acpi_driver_data(struct acpi_device *d) -- cgit v1.2.3-58-ga151 From 18a387099b3e3fd901cc706f708b163aa45347b6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 25 Jan 2013 21:51:32 +0100 Subject: ACPI / PM: Expose lists of device power resources to user space Since ACPI power resources are going to be used more extensively on new hardware platforms, it is necessary to allow user space (powertop in particular) to look at the lists of power resources corresponding to different power states of devices for diagnostics and control purposes. For this reason, for each power state of an ACPI device node using power resources create a special attribute group under the device node's directory in sysfs containing links to sysfs directories representing the power resources in that list. The names of the new attribute groups are "power_resources_", where is the state name i.e. "D0", "D1", "D2", or "D3hot". Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-devices-power_resources_D0 | 13 +++ .../ABI/testing/sysfs-devices-power_resources_D1 | 14 +++ .../ABI/testing/sysfs-devices-power_resources_D2 | 14 +++ .../testing/sysfs-devices-power_resources_D3hot | 14 +++ drivers/acpi/power.c | 104 ++++++++++++++++++--- 5 files changed, 146 insertions(+), 13 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-devices-power_resources_D0 create mode 100644 Documentation/ABI/testing/sysfs-devices-power_resources_D1 create mode 100644 Documentation/ABI/testing/sysfs-devices-power_resources_D2 create mode 100644 Documentation/ABI/testing/sysfs-devices-power_resources_D3hot (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-devices-power_resources_D0 b/Documentation/ABI/testing/sysfs-devices-power_resources_D0 new file mode 100644 index 000000000000..73b77a6be196 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-power_resources_D0 @@ -0,0 +1,13 @@ +What: /sys/devices/.../power_resources_D0/ +Date: January 2013 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../power_resources_D0/ directory is only + present for device objects representing ACPI device nodes that + use ACPI power resources for power management. + + If present, it contains symbolic links to device directories + representing ACPI power resources that need to be turned on for + the given device node to be in ACPI power state D0. The names + of the links are the same as the names of the directories they + point to. diff --git a/Documentation/ABI/testing/sysfs-devices-power_resources_D1 b/Documentation/ABI/testing/sysfs-devices-power_resources_D1 new file mode 100644 index 000000000000..30c20703fb8c --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-power_resources_D1 @@ -0,0 +1,14 @@ +What: /sys/devices/.../power_resources_D1/ +Date: January 2013 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../power_resources_D1/ directory is only + present for device objects representing ACPI device nodes that + use ACPI power resources for power management and support ACPI + power state D1. + + If present, it contains symbolic links to device directories + representing ACPI power resources that need to be turned on for + the given device node to be in ACPI power state D1. The names + of the links are the same as the names of the directories they + point to. diff --git a/Documentation/ABI/testing/sysfs-devices-power_resources_D2 b/Documentation/ABI/testing/sysfs-devices-power_resources_D2 new file mode 100644 index 000000000000..fd9d84b421e1 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-power_resources_D2 @@ -0,0 +1,14 @@ +What: /sys/devices/.../power_resources_D2/ +Date: January 2013 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../power_resources_D2/ directory is only + present for device objects representing ACPI device nodes that + use ACPI power resources for power management and support ACPI + power state D2. + + If present, it contains symbolic links to device directories + representing ACPI power resources that need to be turned on for + the given device node to be in ACPI power state D2. The names + of the links are the same as the names of the directories they + point to. diff --git a/Documentation/ABI/testing/sysfs-devices-power_resources_D3hot b/Documentation/ABI/testing/sysfs-devices-power_resources_D3hot new file mode 100644 index 000000000000..3df32c20addf --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-power_resources_D3hot @@ -0,0 +1,14 @@ +What: /sys/devices/.../power_resources_D3hot/ +Date: January 2013 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../power_resources_D3hot/ directory is only + present for device objects representing ACPI device nodes that + use ACPI power resources for power management and support ACPI + power state D3hot. + + If present, it contains symbolic links to device directories + representing ACPI power resources that need to be turned on for + the given device node to be in ACPI power state D3hot. The + names of the links are the same as the names of the directories + they point to. diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 946720a4db57..9466f56b938f 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include "sleep.h" @@ -417,24 +418,101 @@ static void acpi_power_remove_dependent(struct acpi_power_resource *resource, } } -void acpi_power_add_remove_device(struct acpi_device *adev, bool add) +static struct attribute *attrs[] = { + NULL, +}; + +static struct attribute_group attr_groups[] = { + [ACPI_STATE_D0] = { + .name = "power_resources_D0", + .attrs = attrs, + }, + [ACPI_STATE_D1] = { + .name = "power_resources_D1", + .attrs = attrs, + }, + [ACPI_STATE_D2] = { + .name = "power_resources_D2", + .attrs = attrs, + }, + [ACPI_STATE_D3_HOT] = { + .name = "power_resources_D3hot", + .attrs = attrs, + }, +}; + +static void acpi_power_hide_list(struct acpi_device *adev, int state) +{ + struct acpi_device_power_state *ps = &adev->power.states[state]; + struct acpi_power_resource_entry *entry; + + if (list_empty(&ps->resources)) + return; + + list_for_each_entry_reverse(entry, &ps->resources, node) { + struct acpi_device *res_dev = &entry->resource->device; + + sysfs_remove_link_from_group(&adev->dev.kobj, + attr_groups[state].name, + dev_name(&res_dev->dev)); + } + sysfs_remove_group(&adev->dev.kobj, &attr_groups[state]); +} + +static void acpi_power_expose_list(struct acpi_device *adev, int state) { - if (adev->power.flags.power_resources) { - struct acpi_device_power_state *ps; - struct acpi_power_resource_entry *entry; - - ps = &adev->power.states[ACPI_STATE_D0]; - list_for_each_entry(entry, &ps->resources, node) { - struct acpi_power_resource *resource = entry->resource; - - if (add) - acpi_power_add_dependent(resource, adev); - else - acpi_power_remove_dependent(resource, adev); + struct acpi_device_power_state *ps = &adev->power.states[state]; + struct acpi_power_resource_entry *entry; + int ret; + + if (list_empty(&ps->resources)) + return; + + ret = sysfs_create_group(&adev->dev.kobj, &attr_groups[state]); + if (ret) + return; + + list_for_each_entry(entry, &ps->resources, node) { + struct acpi_device *res_dev = &entry->resource->device; + + ret = sysfs_add_link_to_group(&adev->dev.kobj, + attr_groups[state].name, + &res_dev->dev.kobj, + dev_name(&res_dev->dev)); + if (ret) { + acpi_power_hide_list(adev, state); + break; } } } +void acpi_power_add_remove_device(struct acpi_device *adev, bool add) +{ + struct acpi_device_power_state *ps; + struct acpi_power_resource_entry *entry; + int state; + + if (!adev->power.flags.power_resources) + return; + + ps = &adev->power.states[ACPI_STATE_D0]; + list_for_each_entry(entry, &ps->resources, node) { + struct acpi_power_resource *resource = entry->resource; + + if (add) + acpi_power_add_dependent(resource, adev); + else + acpi_power_remove_dependent(resource, adev); + } + + for (state = ACPI_STATE_D0; state <= ACPI_STATE_D3_HOT; state++) { + if (add) + acpi_power_expose_list(adev, state); + else + acpi_power_hide_list(adev, state); + } +} + int acpi_power_min_system_level(struct list_head *list) { struct acpi_power_resource_entry *entry; -- cgit v1.2.3-58-ga151 From 43720bd6014327ac454434496cb953edcdb9f8d6 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 11 Jan 2013 13:43:45 +0100 Subject: PM / tracing: remove deprecated power trace API The text in Documentation said it would be removed in 2.6.41; the text in the Kconfig said removal in the 3.1 release. Either way you look at it, we are well past both, so push it off a cliff. Note that the POWER_CSTATE and the POWER_PSTATE are part of the legacy tracing API. Remove all tracepoints which use these flags. As can be seen from context, most already have a trace entry via trace_cpu_idle anyways. Also, the cpufreq/cpufreq.c PSTATE one is actually unpaired, as compared to the CSTATE ones which all have a clear start/stop. As part of this, the trace_power_frequency also becomes orphaned, so it too is deleted. Signed-off-by: Paul Gortmaker Acked-by: Steven Rostedt Signed-off-by: Rafael J. Wysocki --- Documentation/trace/events-power.txt | 27 +---------- arch/arm/mach-omap2/pm34xx.c | 2 - arch/x86/kernel/process.c | 6 --- drivers/cpufreq/cpufreq.c | 1 - drivers/cpuidle/cpuidle.c | 2 - include/trace/events/power.h | 92 ------------------------------------ kernel/trace/Kconfig | 15 ------ kernel/trace/power-traces.c | 3 -- 8 files changed, 1 insertion(+), 147 deletions(-) (limited to 'Documentation') diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.txt index cf794af22855..e1498ff8cf94 100644 --- a/Documentation/trace/events-power.txt +++ b/Documentation/trace/events-power.txt @@ -17,7 +17,7 @@ Cf. include/trace/events/power.h for the events definitions. 1. Power state switch events ============================ -1.1 New trace API +1.1 Trace API ----------------- A 'cpu' event class gathers the CPU-related events: cpuidle and @@ -41,31 +41,6 @@ The event which has 'state=4294967295' in the trace is very important to the use space tools which are using it to detect the end of the current state, and so to correctly draw the states diagrams and to calculate accurate statistics etc. -1.2 DEPRECATED trace API ------------------------- - -A new Kconfig option CONFIG_EVENT_POWER_TRACING_DEPRECATED with the default value of -'y' has been created. This allows the legacy trace power API to be used conjointly -with the new trace API. -The Kconfig option, the old trace API (in include/trace/events/power.h) and the -old trace points will disappear in a future release (namely 2.6.41). - -power_start "type=%lu state=%lu cpu_id=%lu" -power_frequency "type=%lu state=%lu cpu_id=%lu" -power_end "cpu_id=%lu" - -The 'type' parameter takes one of those macros: - . POWER_NONE = 0, - . POWER_CSTATE = 1, /* C-State */ - . POWER_PSTATE = 2, /* Frequency change or DVFS */ - -The 'state' parameter is set depending on the type: - . Target C-state for type=POWER_CSTATE, - . Target frequency for type=POWER_PSTATE, - -power_end is used to indicate the exit of a state, corresponding to the latest -power_start event. - 2. Clocks events ================ The clock events are used for clock enable/disable and for diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 7be3622cfc85..2d93d8b23835 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -351,12 +351,10 @@ static void omap3_pm_idle(void) if (omap_irq_pending()) goto out; - trace_power_start(POWER_CSTATE, 1, smp_processor_id()); trace_cpu_idle(1, smp_processor_id()); omap_sram_idle(); - trace_power_end(smp_processor_id()); trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); out: diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2ed787f15bf0..dcfc1f410dc4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -375,7 +375,6 @@ void cpu_idle(void) */ void default_idle(void) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); trace_cpu_idle_rcuidle(1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* @@ -389,7 +388,6 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; - trace_power_end_rcuidle(smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_APM_MODULE @@ -423,7 +421,6 @@ void stop_this_cpu(void *dummy) static void mwait_idle(void) { if (!need_resched()) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -434,7 +431,6 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); - trace_power_end_rcuidle(smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else local_irq_enable(); @@ -447,12 +443,10 @@ static void mwait_idle(void) */ static void poll_idle(void) { - trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id()); trace_cpu_idle_rcuidle(0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end_rcuidle(smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 1f93dbd72355..99faadf454ec 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -294,7 +294,6 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new, (unsigned long)freqs->cpu); - trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu); trace_cpu_frequency(freqs->new, freqs->cpu); srcu_notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index e1f6860e069c..eba69290e074 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -144,7 +144,6 @@ int cpuidle_idle_call(void) return 0; } - trace_power_start_rcuidle(POWER_CSTATE, next_state, dev->cpu); trace_cpu_idle_rcuidle(next_state, dev->cpu); if (cpuidle_state_is_coupled(dev, drv, next_state)) @@ -153,7 +152,6 @@ int cpuidle_idle_call(void) else entered_state = cpuidle_enter_state(dev, drv, next_state); - trace_power_end_rcuidle(dev->cpu); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); /* give the governor an opportunity to reflect on the outcome */ diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 0c9783841a30..427acab5d69a 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -99,98 +99,6 @@ DEFINE_EVENT(wakeup_source, wakeup_source_deactivate, TP_ARGS(name, state) ); -#ifdef CONFIG_EVENT_POWER_TRACING_DEPRECATED - -/* - * The power events are used for cpuidle & suspend (power_start, power_end) - * and for cpufreq (power_frequency) - */ -DECLARE_EVENT_CLASS(power, - - TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), - - TP_ARGS(type, state, cpu_id), - - TP_STRUCT__entry( - __field( u64, type ) - __field( u64, state ) - __field( u64, cpu_id ) - ), - - TP_fast_assign( - __entry->type = type; - __entry->state = state; - __entry->cpu_id = cpu_id; - ), - - TP_printk("type=%lu state=%lu cpu_id=%lu", (unsigned long)__entry->type, - (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) -); - -DEFINE_EVENT(power, power_start, - - TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), - - TP_ARGS(type, state, cpu_id) -); - -DEFINE_EVENT(power, power_frequency, - - TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), - - TP_ARGS(type, state, cpu_id) -); - -TRACE_EVENT(power_end, - - TP_PROTO(unsigned int cpu_id), - - TP_ARGS(cpu_id), - - TP_STRUCT__entry( - __field( u64, cpu_id ) - ), - - TP_fast_assign( - __entry->cpu_id = cpu_id; - ), - - TP_printk("cpu_id=%lu", (unsigned long)__entry->cpu_id) - -); - -/* Deprecated dummy functions must be protected against multi-declartion */ -#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED -#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED - -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; -#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ - -#else /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */ - -#ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED -#define _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -/* These dummy declaration have to be ripped out when the deprecated - events get removed */ -static inline void trace_power_start(u64 type, u64 state, u64 cpuid) {}; -static inline void trace_power_end(u64 cpuid) {}; -static inline void trace_power_start_rcuidle(u64 type, u64 state, u64 cpuid) {}; -static inline void trace_power_end_rcuidle(u64 cpuid) {}; -static inline void trace_power_frequency(u64 type, u64 state, u64 cpuid) {}; -#endif /* _PWR_EVENT_AVOID_DOUBLE_DEFINING_DEPRECATED */ - -#endif /* CONFIG_EVENT_POWER_TRACING_DEPRECATED */ - /* * The clock events are used for clock enable/disable and for * clock rate change diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5d89335a485f..ad0a067ad4b3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -78,21 +78,6 @@ config EVENT_TRACING select CONTEXT_SWITCH_TRACER bool -config EVENT_POWER_TRACING_DEPRECATED - depends on EVENT_TRACING - bool "Deprecated power event trace API, to be removed" - default y - help - Provides old power event types: - C-state/idle accounting events: - power:power_start - power:power_end - and old cpufreq accounting event: - power:power_frequency - This is for userspace compatibility - and will vanish after 5 kernel iterations, - namely 3.1. - config CONTEXT_SWITCH_TRACER bool diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c index f55fcf61b223..1c71382b283d 100644 --- a/kernel/trace/power-traces.c +++ b/kernel/trace/power-traces.c @@ -13,8 +13,5 @@ #define CREATE_TRACE_POINTS #include -#ifdef EVENT_POWER_TRACING_DEPRECATED -EXPORT_TRACEPOINT_SYMBOL_GPL(power_start); -#endif EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle); -- cgit v1.2.3-58-ga151 From fbadc58dd3a52c330c8f3926aa93011bf9d91fa0 Mon Sep 17 00:00:00 2001 From: ShuoX Liu Date: Wed, 23 Jan 2013 21:49:37 +0100 Subject: PM / Runtime: Add new helper function: pm_runtime_active() This boolean function simply returns whether or not the runtime status of the device is 'active'. The typical scenario is driver calls pm_runtime_get firstly, then check pm_runtime_active in atomic environment. Also add entry to Documentation/power/runtime.txt Signed-off-by: Yanmin Zhang Signed-off-by: ShuoX Liu Signed-off-by: Rafael J. Wysocki --- Documentation/power/runtime_pm.txt | 4 ++++ include/linux/pm_runtime.h | 7 +++++++ 2 files changed, 11 insertions(+) (limited to 'Documentation') diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 03591a750f99..6c9f5d9aa115 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -426,6 +426,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: 'power.runtime_error' is set or 'power.disable_depth' is greater than zero) + bool pm_runtime_active(struct device *dev); + - return true if the device's runtime PM status is 'active' or its + 'power.disable_depth' field is not equal to zero, or false otherwise + bool pm_runtime_suspended(struct device *dev); - return true if the device's runtime PM status is 'suspended' and its 'power.disable_depth' field is equal to zero, or false otherwise diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index f271860c78d5..c785c215abfc 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -80,6 +80,12 @@ static inline bool pm_runtime_suspended(struct device *dev) && !dev->power.disable_depth; } +static inline bool pm_runtime_active(struct device *dev) +{ + return dev->power.runtime_status == RPM_ACTIVE + || dev->power.disable_depth; +} + static inline bool pm_runtime_status_suspended(struct device *dev) { return dev->power.runtime_status == RPM_SUSPENDED; @@ -132,6 +138,7 @@ static inline void pm_runtime_put_noidle(struct device *dev) {} static inline bool device_run_wake(struct device *dev) { return false; } static inline void device_set_run_wake(struct device *dev, bool enable) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } +static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } static inline bool pm_runtime_enabled(struct device *dev) { return false; } -- cgit v1.2.3-58-ga151 From ca589f9469641916f4f9bd6a820012a27102ef63 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 30 Jan 2013 14:27:29 +0100 Subject: ACPI / scan: Introduce struct acpi_scan_handler Introduce struct acpi_scan_handler for representing objects that will do configuration tasks depending on ACPI device nodes' hardware IDs (HIDs). Currently, those tasks are done either directly by the ACPI namespace scanning code or by ACPI device drivers designed specifically for this purpose. None of the above is desirable, however, because doing that directly in the namespace scanning code makes that code overly complicated and difficult to follow and doing that in "special" device drivers leads to a great deal of confusion about their role and to confusing interactions with the driver core (for example, sysfs directories are created for those drivers, but they are completely unnecessary and only increase the kernel's memory footprint in vain). Signed-off-by: Rafael J. Wysocki Acked-by: Yinghai Lu Acked-by: Yasuaki Ishimatsu Acked-by: Toshi Kani --- Documentation/acpi/scan_handlers.txt | 77 ++++++++++++++++++++++++++++++++++++ drivers/acpi/scan.c | 60 ++++++++++++++++++++++++---- include/acpi/acpi_bus.h | 14 +++++++ 3 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 Documentation/acpi/scan_handlers.txt (limited to 'Documentation') diff --git a/Documentation/acpi/scan_handlers.txt b/Documentation/acpi/scan_handlers.txt new file mode 100644 index 000000000000..3246ccf15992 --- /dev/null +++ b/Documentation/acpi/scan_handlers.txt @@ -0,0 +1,77 @@ +ACPI Scan Handlers + +Copyright (C) 2012, Intel Corporation +Author: Rafael J. Wysocki + +During system initialization and ACPI-based device hot-add, the ACPI namespace +is scanned in search of device objects that generally represent various pieces +of hardware. This causes a struct acpi_device object to be created and +registered with the driver core for every device object in the ACPI namespace +and the hierarchy of those struct acpi_device objects reflects the namespace +layout (i.e. parent device objects in the namespace are represented by parent +struct acpi_device objects and analogously for their children). Those struct +acpi_device objects are referred to as "device nodes" in what follows, but they +should not be confused with struct device_node objects used by the Device Trees +parsing code (although their role is analogous to the role of those objects). + +During ACPI-based device hot-remove device nodes representing pieces of hardware +being removed are unregistered and deleted. + +The core ACPI namespace scanning code in drivers/acpi/scan.c carries out basic +initialization of device nodes, such as retrieving common configuration +information from the device objects represented by them and populating them with +appropriate data, but some of them require additional handling after they have +been registered. For example, if the given device node represents a PCI host +bridge, its registration should cause the PCI bus under that bridge to be +enumerated and PCI devices on that bus to be registered with the driver core. +Similarly, if the device node represents a PCI interrupt link, it is necessary +to configure that link so that the kernel can use it. + +Those additional configuration tasks usually depend on the type of the hardware +component represented by the given device node which can be determined on the +basis of the device node's hardware ID (HID). They are performed by objects +called ACPI scan handlers represented by the following structure: + +struct acpi_scan_handler { + const struct acpi_device_id *ids; + struct list_head list_node; + int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id); + void (*detach)(struct acpi_device *dev); +}; + +where ids is the list of IDs of device nodes the given handler is supposed to +take care of, list_node is the hook to the global list of ACPI scan handlers +maintained by the ACPI core and the .attach() and .detach() callbacks are +executed, respectively, after registration of new device nodes and before +unregistration of device nodes the handler attached to previously. + +The namespace scanning function, acpi_bus_scan(), first registers all of the +device nodes in the given namespace scope with the driver core. Then, it tries +to match a scan handler against each of them using the ids arrays of the +available scan handlers. If a matching scan handler is found, its .attach() +callback is executed for the given device node. If that callback returns 1, +that means that the handler has claimed the device node and is now responsible +for carrying out any additional configuration tasks related to it. It also will +be responsible for preparing the device node for unregistration in that case. +The device node's handler field is then populated with the address of the scan +handler that has claimed it. + +If the .attach() callback returns 0, it means that the device node is not +interesting to the given scan handler and may be matched against the next scan +handler in the list. If it returns a (negative) error code, that means that +the namespace scan should be terminated due to a serious error. The error code +returned should then reflect the type of the error. + +The namespace trimming function, acpi_bus_trim(), first executes .detach() +callbacks from the scan handlers of all device nodes in the given namespace +scope (if they have scan handlers). Next, it unregisters all of the device +nodes in that scope. + +ACPI scan handlers can be added to the list maintained by the ACPI core with the +help of the acpi_scan_add_handler() function taking a pointer to the new scan +handler as an argument. The order in which scan handlers are added to the list +is the order in which they are matched against device nodes during namespace +scans. + +All scan handles must be added to the list before acpi_bus_scan() is run for the +first time and they cannot be removed from it. diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index fe9f2c926663..1453cd0672fb 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -53,6 +53,7 @@ static const struct acpi_device_id acpi_platform_device_ids[] = { static LIST_HEAD(acpi_device_list); static LIST_HEAD(acpi_bus_id_list); static DEFINE_MUTEX(acpi_scan_lock); +static LIST_HEAD(acpi_scan_handlers_list); DEFINE_MUTEX(acpi_device_lock); LIST_HEAD(acpi_wakeup_device_list); @@ -62,6 +63,15 @@ struct acpi_device_bus_id{ struct list_head node; }; +int acpi_scan_add_handler(struct acpi_scan_handler *handler) +{ + if (!handler || !handler->attach) + return -EINVAL; + + list_add_tail(&handler->list_node, &acpi_scan_handlers_list); + return 0; +} + /* * Creates hid/cid(s) string needed for modalias and uevent * e.g. on a device with hid:IBM0001 and cid:ACPI0001 you get: @@ -1570,20 +1580,42 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used, return AE_OK; } +static int acpi_scan_attach_handler(struct acpi_device *device) +{ + struct acpi_scan_handler *handler; + int ret = 0; + + list_for_each_entry(handler, &acpi_scan_handlers_list, list_node) { + const struct acpi_device_id *id; + + id = __acpi_match_device(device, handler->ids); + if (!id) + continue; + + ret = handler->attach(device, id); + if (ret > 0) { + device->handler = handler; + break; + } else if (ret < 0) { + break; + } + } + return ret; +} + static acpi_status acpi_bus_device_attach(acpi_handle handle, u32 lvl_not_used, void *not_used, void **ret_not_used) { const struct acpi_device_id *id; - acpi_status status = AE_OK; struct acpi_device *device; unsigned long long sta_not_used; - int type_not_used; + int ret; /* * Ignore errors ignored by acpi_bus_check_add() to avoid terminating * namespace walks prematurely. */ - if (acpi_bus_type_and_status(handle, &type_not_used, &sta_not_used)) + if (acpi_bus_type_and_status(handle, &ret, &sta_not_used)) return AE_OK; if (acpi_bus_get_device(handle, &device)) @@ -1593,10 +1625,15 @@ static acpi_status acpi_bus_device_attach(acpi_handle handle, u32 lvl_not_used, if (id) { /* This is a known good platform device. */ acpi_create_platform_device(device, id->driver_data); - } else if (device_attach(&device->dev) < 0) { - status = AE_CTRL_DEPTH; + return AE_OK; } - return status; + + ret = acpi_scan_attach_handler(device); + if (ret) + return ret > 0 ? AE_OK : AE_CTRL_DEPTH; + + ret = device_attach(&device->dev); + return ret >= 0 ? AE_OK : AE_CTRL_DEPTH; } /** @@ -1639,8 +1676,17 @@ static acpi_status acpi_bus_device_detach(acpi_handle handle, u32 lvl_not_used, struct acpi_device *device = NULL; if (!acpi_bus_get_device(handle, &device)) { + struct acpi_scan_handler *dev_handler = device->handler; + device->removal_type = ACPI_BUS_REMOVAL_EJECT; - device_release_driver(&device->dev); + if (dev_handler) { + if (dev_handler->detach) + dev_handler->detach(device); + + device->handler = NULL; + } else { + device_release_driver(&device->dev); + } } return AE_OK; } diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index ad0a86ac5cce..41850cb21730 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -83,6 +83,18 @@ enum acpi_bus_device_type { struct acpi_driver; struct acpi_device; +/* + * ACPI Scan Handler + * ----------------- + */ + +struct acpi_scan_handler { + const struct acpi_device_id *ids; + struct list_head list_node; + int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id); + void (*detach)(struct acpi_device *dev); +}; + /* * ACPI Driver * ----------- @@ -269,6 +281,7 @@ struct acpi_device { struct acpi_device_wakeup wakeup; struct acpi_device_perf performance; struct acpi_device_dir dir; + struct acpi_scan_handler *handler; struct acpi_driver *driver; void *driver_data; struct device dev; @@ -382,6 +395,7 @@ int acpi_bus_receive_event(struct acpi_bus_event *event); static inline int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data) { return 0; } #endif +int acpi_scan_add_handler(struct acpi_scan_handler *handler); int acpi_bus_register_driver(struct acpi_driver *driver); void acpi_bus_unregister_driver(struct acpi_driver *driver); int acpi_bus_scan(acpi_handle handle); -- cgit v1.2.3-58-ga151 From 951fc5f45836988c7df1d05c7f4658f331e7a920 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 31 Jan 2013 02:03:53 +0000 Subject: cpufreq: Update Documentation for cpus and related_cpus Documentation related to cpus and related_cpus is confusing and not very clear. Over that CPUFreq core has seen much changes recently. Lets update documentation and comments for cpus and related_cpus. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/cpu-freq/cpu-drivers.txt | 6 ++++++ Documentation/cpu-freq/user-guide.txt | 8 ++++---- include/linux/cpufreq.h | 6 ++++-- 3 files changed, 14 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index c436096351f8..72f70b16d299 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -111,6 +111,12 @@ policy->governor must contain the "default policy" for For setting some of these values, the frequency table helpers might be helpful. See the section 2 for more information on them. +SMP systems normally have same clock source for a group of cpus. For these the +.init() would be called only once for the first online cpu. Here the .init() +routine must initialize policy->cpus with mask of all possible cpus (Online + +Offline) that share the clock. Then the core would copy this mask onto +policy->related_cpus and will reset policy->cpus to carry only online cpus. + 1.3 verify ------------ diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index 04f6b32993e6..ff2f28332cc4 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -190,11 +190,11 @@ scaling_max_freq show the current "policy limits" (in first set scaling_max_freq, then scaling_min_freq. -affected_cpus : List of CPUs that require software coordination - of frequency. +affected_cpus : List of Online CPUs that require software + coordination of frequency. -related_cpus : List of CPUs that need some sort of frequency - coordination, whether software or hardware. +related_cpus : List of Online + Offline CPUs that need software + coordination of frequency. scaling_driver : Hardware driver for cpufreq. diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 5fdc6c6e3f8a..753b198750cf 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -89,8 +89,10 @@ struct cpufreq_real_policy { }; struct cpufreq_policy { - cpumask_var_t cpus; /* CPUs requiring sw coordination */ - cpumask_var_t related_cpus; /* CPUs with any coordination */ + /* CPUs sharing clock, require sw coordination */ + cpumask_var_t cpus; /* Online CPUs only */ + cpumask_var_t related_cpus; /* Online + Offline CPUs */ + unsigned int shared_type; /* ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu nr of CPU managing this policy */ -- cgit v1.2.3-58-ga151 From 2a4bd9f0db24ba14c8b38777d77add2682233c79 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 5 Feb 2013 22:52:51 +0100 Subject: cpufreq: kirkwood: Add a cpufreq driver for Marvell Kirkwood SoCs The Marvell Kirkwood SoCs have simple cpufreq support in hardware. The CPU can either use the a high speed cpu clock, or the slower DDR clock. Add a driver to swap between these two clock sources. Signed-off-by: Andrew Lunn Acked-by: Jason Cooper Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- Documentation/devicetree/bindings/arm/kirkwood.txt | 27 +++ drivers/clk/mvebu/clk-gating-ctrl.c | 1 + drivers/cpufreq/Kconfig.arm | 6 + drivers/cpufreq/Makefile | 1 + drivers/cpufreq/kirkwood-cpufreq.c | 259 +++++++++++++++++++++ 5 files changed, 294 insertions(+) create mode 100644 Documentation/devicetree/bindings/arm/kirkwood.txt create mode 100644 drivers/cpufreq/kirkwood-cpufreq.c (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/arm/kirkwood.txt b/Documentation/devicetree/bindings/arm/kirkwood.txt new file mode 100644 index 000000000000..98cce9a653eb --- /dev/null +++ b/Documentation/devicetree/bindings/arm/kirkwood.txt @@ -0,0 +1,27 @@ +Marvell Kirkwood Platforms Device Tree Bindings +----------------------------------------------- + +Boards with a SoC of the Marvell Kirkwood +shall have the following property: + +Required root node property: + +compatible: must contain "marvell,kirkwood"; + +In order to support the kirkwood cpufreq driver, there must be a node +cpus/cpu@0 with three clocks, "cpu_clk", "ddrclk" and "powersave", +where the "powersave" clock is a gating clock used to switch the CPU +between the "cpu_clk" and the "ddrclk". + +Example: + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "marvell,sheeva-88SV131"; + clocks = <&core_clk 1>, <&core_clk 3>, <&gate_clk 11>; + clock-names = "cpu_clk", "ddrclk", "powersave"; + }; diff --git a/drivers/clk/mvebu/clk-gating-ctrl.c b/drivers/clk/mvebu/clk-gating-ctrl.c index 8fa5408b6c7d..ebf141d4374b 100644 --- a/drivers/clk/mvebu/clk-gating-ctrl.c +++ b/drivers/clk/mvebu/clk-gating-ctrl.c @@ -193,6 +193,7 @@ static const struct mvebu_soc_descr __initconst kirkwood_gating_descr[] = { { "runit", NULL, 7 }, { "xor0", NULL, 8 }, { "audio", NULL, 9 }, + { "powersave", "cpuclk", 11 }, { "sata0", NULL, 14 }, { "sata1", NULL, 15 }, { "xor1", NULL, 16 }, diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index c65226c4717d..7f333af1c059 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -77,6 +77,12 @@ config ARM_EXYNOS5250_CPUFREQ This adds the CPUFreq driver for Samsung EXYNOS5250 SoC. +config ARM_KIRKWOOD_CPUFREQ + def_bool ARCH_KIRKWOOD && OF + help + This adds the CPUFreq driver for Marvell Kirkwood + SoCs. + config ARM_IMX6Q_CPUFREQ tristate "Freescale i.MX6Q cpufreq support" depends on SOC_IMX6Q diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index cd3d4ffe9626..5399c45ac311 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_ARM_EXYNOS_CPUFREQ) += exynos-cpufreq.o obj-$(CONFIG_ARM_EXYNOS4210_CPUFREQ) += exynos4210-cpufreq.o obj-$(CONFIG_ARM_EXYNOS4X12_CPUFREQ) += exynos4x12-cpufreq.o obj-$(CONFIG_ARM_EXYNOS5250_CPUFREQ) += exynos5250-cpufreq.o +obj-$(CONFIG_ARM_KIRKWOOD_CPUFREQ) += kirkwood-cpufreq.o obj-$(CONFIG_ARM_OMAP2PLUS_CPUFREQ) += omap-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o obj-$(CONFIG_ARM_HIGHBANK_CPUFREQ) += highbank-cpufreq.o diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c new file mode 100644 index 000000000000..0e83e3c24f5b --- /dev/null +++ b/drivers/cpufreq/kirkwood-cpufreq.c @@ -0,0 +1,259 @@ +/* + * kirkwood_freq.c: cpufreq driver for the Marvell kirkwood + * + * Copyright (C) 2013 Andrew Lunn + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CPU_SW_INT_BLK BIT(28) + +static struct priv +{ + struct clk *cpu_clk; + struct clk *ddr_clk; + struct clk *powersave_clk; + struct device *dev; + void __iomem *base; +} priv; + +#define STATE_CPU_FREQ 0x01 +#define STATE_DDR_FREQ 0x02 + +/* + * Kirkwood can swap the clock to the CPU between two clocks: + * + * - cpu clk + * - ddr clk + * + * The frequencies are set at runtime before registering this * + * table. + */ +static struct cpufreq_frequency_table kirkwood_freq_table[] = { + {STATE_CPU_FREQ, 0}, /* CPU uses cpuclk */ + {STATE_DDR_FREQ, 0}, /* CPU uses ddrclk */ + {0, CPUFREQ_TABLE_END}, +}; + +static unsigned int kirkwood_cpufreq_get_cpu_frequency(unsigned int cpu) +{ + if (__clk_is_enabled(priv.powersave_clk)) + return kirkwood_freq_table[1].frequency; + return kirkwood_freq_table[0].frequency; +} + +static void kirkwood_cpufreq_set_cpu_state(unsigned int index) +{ + struct cpufreq_freqs freqs; + unsigned int state = kirkwood_freq_table[index].index; + unsigned long reg; + + freqs.old = kirkwood_cpufreq_get_cpu_frequency(0); + freqs.new = kirkwood_freq_table[index].frequency; + freqs.cpu = 0; /* Kirkwood is UP */ + + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + dev_dbg(priv.dev, "Attempting to set frequency to %i KHz\n", + kirkwood_freq_table[index].frequency); + dev_dbg(priv.dev, "old frequency was %i KHz\n", + kirkwood_cpufreq_get_cpu_frequency(0)); + + if (freqs.old != freqs.new) { + local_irq_disable(); + + /* Disable interrupts to the CPU */ + reg = readl_relaxed(priv.base); + reg |= CPU_SW_INT_BLK; + writel_relaxed(reg, priv.base); + + switch (state) { + case STATE_CPU_FREQ: + clk_disable(priv.powersave_clk); + break; + case STATE_DDR_FREQ: + clk_enable(priv.powersave_clk); + break; + } + + /* Wait-for-Interrupt, while the hardware changes frequency */ + cpu_do_idle(); + + /* Enable interrupts to the CPU */ + reg = readl_relaxed(priv.base); + reg &= ~CPU_SW_INT_BLK; + writel_relaxed(reg, priv.base); + + local_irq_enable(); + } + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +}; + +static int kirkwood_cpufreq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, kirkwood_freq_table); +} + +static int kirkwood_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int index = 0; + + if (cpufreq_frequency_table_target(policy, kirkwood_freq_table, + target_freq, relation, &index)) + return -EINVAL; + + kirkwood_cpufreq_set_cpu_state(index); + + return 0; +} + +/* Module init and exit code */ +static int kirkwood_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + int result; + + /* cpuinfo and default policy values */ + policy->cpuinfo.transition_latency = 5000; /* 5uS */ + policy->cur = kirkwood_cpufreq_get_cpu_frequency(0); + + result = cpufreq_frequency_table_cpuinfo(policy, kirkwood_freq_table); + if (result) + return result; + + cpufreq_frequency_table_get_attr(kirkwood_freq_table, policy->cpu); + + return 0; +} + +static int kirkwood_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static struct freq_attr *kirkwood_cpufreq_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static struct cpufreq_driver kirkwood_cpufreq_driver = { + .get = kirkwood_cpufreq_get_cpu_frequency, + .verify = kirkwood_cpufreq_verify, + .target = kirkwood_cpufreq_target, + .init = kirkwood_cpufreq_cpu_init, + .exit = kirkwood_cpufreq_cpu_exit, + .name = "kirkwood-cpufreq", + .owner = THIS_MODULE, + .attr = kirkwood_cpufreq_attr, +}; + +static int kirkwood_cpufreq_probe(struct platform_device *pdev) +{ + struct device_node *np; + struct resource *res; + int err; + + priv.dev = &pdev->dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Cannot get memory resource\n"); + return -ENODEV; + } + priv.base = devm_request_and_ioremap(&pdev->dev, res); + if (!priv.base) { + dev_err(&pdev->dev, "Cannot ioremap\n"); + return -EADDRNOTAVAIL; + } + + np = of_find_node_by_path("/cpus/cpu@0"); + if (!np) + return -ENODEV; + + priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk"); + if (IS_ERR(priv.cpu_clk)) { + dev_err(priv.dev, "Unable to get cpuclk"); + return PTR_ERR(priv.cpu_clk); + } + + clk_prepare_enable(priv.cpu_clk); + kirkwood_freq_table[0].frequency = clk_get_rate(priv.cpu_clk) / 1000; + + priv.ddr_clk = of_clk_get_by_name(np, "ddrclk"); + if (IS_ERR(priv.ddr_clk)) { + dev_err(priv.dev, "Unable to get ddrclk"); + err = PTR_ERR(priv.ddr_clk); + goto out_cpu; + } + + clk_prepare_enable(priv.ddr_clk); + kirkwood_freq_table[1].frequency = clk_get_rate(priv.ddr_clk) / 1000; + + priv.powersave_clk = of_clk_get_by_name(np, "powersave"); + if (IS_ERR(priv.powersave_clk)) { + dev_err(priv.dev, "Unable to get powersave"); + err = PTR_ERR(priv.powersave_clk); + goto out_ddr; + } + clk_prepare(priv.powersave_clk); + + of_node_put(np); + np = NULL; + + err = cpufreq_register_driver(&kirkwood_cpufreq_driver); + if (!err) + return 0; + + dev_err(priv.dev, "Failed to register cpufreq driver"); + + clk_disable_unprepare(priv.powersave_clk); +out_ddr: + clk_disable_unprepare(priv.ddr_clk); +out_cpu: + clk_disable_unprepare(priv.cpu_clk); + of_node_put(np); + + return err; +} + +static int kirkwood_cpufreq_remove(struct platform_device *pdev) +{ + cpufreq_unregister_driver(&kirkwood_cpufreq_driver); + + clk_disable_unprepare(priv.powersave_clk); + clk_disable_unprepare(priv.ddr_clk); + clk_disable_unprepare(priv.cpu_clk); + + return 0; +} + +static struct platform_driver kirkwood_cpufreq_platform_driver = { + .probe = kirkwood_cpufreq_probe, + .remove = kirkwood_cpufreq_remove, + .driver = { + .name = "kirkwood-cpufreq", + .owner = THIS_MODULE, + }, +}; + +module_platform_driver(kirkwood_cpufreq_platform_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Andrew Lunn Date: Fri, 1 Feb 2013 08:56:03 +0000 Subject: suspend: enable freeze timeout configuration through sys At present, the value of timeout for freezing is 20s, which is meaningless in case that one thread is frozen with mutex locked and another thread is trying to lock the mutex, as this time of freezing will fail unavoidably. And if there is no new wakeup event registered, the system will waste at most 20s for such meaningless trying of freezing. With this patch, the value of timeout can be configured to smaller value, so such meaningless trying of freezing will be aborted in earlier time, and later freezing can be also triggered in earlier time. And more power will be saved. In normal case on mobile phone, it costs real little time to freeze processes. On some platform, it only costs about 20ms to freeze user space processes and 10ms to freeze kernel freezable threads. Signed-off-by: Liu Chuansheng Signed-off-by: Li Fei Signed-off-by: Rafael J. Wysocki --- Documentation/power/freezing-of-tasks.txt | 5 +++++ include/linux/freezer.h | 5 +++++ kernel/power/main.c | 27 +++++++++++++++++++++++++++ kernel/power/process.c | 4 ++-- 4 files changed, 39 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt index 6ec291ea1c78..85894d83b352 100644 --- a/Documentation/power/freezing-of-tasks.txt +++ b/Documentation/power/freezing-of-tasks.txt @@ -223,3 +223,8 @@ since they ask the freezer to skip freezing this task, since it is anyway only after the entire suspend/hibernation sequence is complete. So, to summarize, use [un]lock_system_sleep() instead of directly using mutex_[un]lock(&pm_mutex). That would prevent freezing failures. + +V. Miscellaneous +/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze +all user space processes or all freezable kernel threads, in unit of millisecond. +The default value is 20000, with range of unsigned integer. diff --git a/include/linux/freezer.h b/include/linux/freezer.h index e4238ceaa4d6..e70df40d84f6 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -12,6 +12,11 @@ extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */ extern bool pm_freezing; /* PM freezing in effect */ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ +/* + * Timeout for stopping processes + */ +extern unsigned int freeze_timeout_msecs; + /* * Check if a process has been frozen */ diff --git a/kernel/power/main.c b/kernel/power/main.c index b1c26a92ca9f..d77663bfedeb 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -553,6 +553,30 @@ power_attr(pm_trace_dev_match); #endif /* CONFIG_PM_TRACE */ +#ifdef CONFIG_FREEZER +static ssize_t pm_freeze_timeout_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", freeze_timeout_msecs); +} + +static ssize_t pm_freeze_timeout_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + if (kstrtoul(buf, 10, &val)) + return -EINVAL; + + freeze_timeout_msecs = val; + return n; +} + +power_attr(pm_freeze_timeout); + +#endif /* CONFIG_FREEZER*/ + static struct attribute * g[] = { &state_attr.attr, #ifdef CONFIG_PM_TRACE @@ -575,6 +599,9 @@ static struct attribute * g[] = { #ifdef CONFIG_PM_SLEEP_DEBUG &pm_print_times_attr.attr, #endif +#endif +#ifdef CONFIG_FREEZER + &pm_freeze_timeout_attr.attr, #endif NULL, }; diff --git a/kernel/power/process.c b/kernel/power/process.c index d5a258b60c6f..98088e0e71e8 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -21,7 +21,7 @@ /* * Timeout for stopping processes */ -#define TIMEOUT (20 * HZ) +unsigned int __read_mostly freeze_timeout_msecs = 20 * MSEC_PER_SEC; static int try_to_freeze_tasks(bool user_only) { @@ -36,7 +36,7 @@ static int try_to_freeze_tasks(bool user_only) do_gettimeofday(&start); - end_time = jiffies + TIMEOUT; + end_time = jiffies + msecs_to_jiffies(freeze_timeout_msecs); if (!user_only) freeze_workqueues_begin(); -- cgit v1.2.3-58-ga151 From 69fb3676df3329a7142803bb3502fa59dc0db2e3 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 10 Feb 2013 01:38:39 -0500 Subject: x86 idle: remove mwait_idle() and "idle=mwait" cmdline param mwait_idle() is a C1-only idle loop intended to be more efficient than HLT, starting on Pentium-4 HT-enabled processors. But mwait_idle() has been replaced by the more general mwait_idle_with_hints(), which handles both C1 and deeper C-states. ACPI processor_idle and intel_idle use only mwait_idle_with_hints(), and no longer use mwait_idle(). Here we simplify the x86 native idle code by removing mwait_idle(), and the "idle=mwait" bootparam used to invoke it. Since Linux 3.0 there has been a boot-time warning when "idle=mwait" was invoked saying it would be removed in 2012. This removal was also noted in the (now removed:-) feature-removal-schedule.txt. After this change, kernels configured with (CONFIG_ACPI=n && CONFIG_INTEL_IDLE=n) when run on hardware that supports MWAIT will simply use HLT. If MWAIT is desired on those systems, cpuidle and the cpuidle drivers above can be enabled. Signed-off-by: Len Brown Cc: x86@kernel.org --- Documentation/kernel-parameters.txt | 7 +--- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/process.c | 79 +------------------------------------ arch/x86/kernel/smpboot.c | 2 +- drivers/acpi/processor_idle.c | 1 - 5 files changed, 4 insertions(+), 87 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 363e348bff9b..3b0cd1e612da 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1039,16 +1039,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Claim all unknown PCI IDE storage controllers. idle= [X86] - Format: idle=poll, idle=mwait, idle=halt, idle=nomwait + Format: idle=poll, idle=halt, idle=nomwait Poll forces a polling idle loop that can slightly improve the performance of waking up a idle CPU, but will use a lot of power and make the system run hot. Not recommended. - idle=mwait: On systems which support MONITOR/MWAIT but - the kernel chose to not use it because it doesn't save - as much power as a normal idle loop, use the - MONITOR/MWAIT idle loop anyways. Performance should be - the same as idle=poll. idle=halt: Halt is forced to be used for CPU idle. In such case C2/C3 won't be used again. idle=nomwait: Disable mwait for CPU C-states diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c2f7f472275e..8a28feae1c92 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -725,7 +725,7 @@ extern unsigned long boot_option_idle_override; extern bool amd_e400_c1e_detected; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, - IDLE_POLL, IDLE_FORCE_MWAIT}; + IDLE_POLL}; extern void enable_sep_cpu(void); extern int sysenter_setup(void); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7ed9f6b08ba0..cd5a4c9ef835 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -421,27 +421,6 @@ void stop_this_cpu(void *dummy) } } -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - if (!need_resched()) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle_rcuidle(1, smp_processor_id()); - if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) - clflush((void *)¤t_thread_info()->flags); - - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(0, 0); - else - local_irq_enable(); - trace_power_end_rcuidle(smp_processor_id()); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); - } else - local_irq_enable(); -} - /* * On SMP it's slightly faster (but much more power-consuming!) * to poll the ->work.need_resched flag instead of waiting for the @@ -458,53 +437,6 @@ static void poll_idle(void) trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } -/* - * mwait selection logic: - * - * It depends on the CPU. For AMD CPUs that support MWAIT this is - * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings - * then depend on a clock divisor and current Pstate of the core. If - * all cores of a processor are in halt state (C1) the processor can - * enter the C1E (C1 enhanced) state. If mwait is used this will never - * happen. - * - * idle=mwait overrides this decision and forces the usage of mwait. - */ - -#define MWAIT_INFO 0x05 -#define MWAIT_ECX_EXTENDED_INFO 0x01 -#define MWAIT_EDX_C1 0xf0 - -int mwait_usable(const struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - - /* Use mwait if idle=mwait boot option is given */ - if (boot_option_idle_override == IDLE_FORCE_MWAIT) - return 1; - - /* - * Any idle= boot option other than idle=mwait means that we must not - * use mwait. Eg: idle=halt or idle=poll or idle=nomwait - */ - if (boot_option_idle_override != IDLE_NO_OVERRIDE) - return 0; - - if (c->cpuid_level < MWAIT_INFO) - return 0; - - cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx); - /* Check, whether EDX has extended info about MWAIT */ - if (!(ecx & MWAIT_ECX_EXTENDED_INFO)) - return 1; - - /* - * edx enumeratios MONITOR/MWAIT extensions. Check, whether - * C1 supports MWAIT - */ - return (edx & MWAIT_EDX_C1); -} - bool amd_e400_c1e_detected; EXPORT_SYMBOL(amd_e400_c1e_detected); @@ -576,13 +508,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) if (pm_idle) return; - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * One CPU supports mwait => All CPUs supports mwait - */ - pr_info("using mwait in idle threads\n"); - pm_idle = mwait_idle; - } else if (cpu_has_amd_erratum(amd_erratum_400)) { + if (cpu_has_amd_erratum(amd_erratum_400)) { /* E400: APIC timer interrupt does not wake up CPU from C1e */ pr_info("using AMD E400 aware idle routine\n"); pm_idle = amd_e400_idle; @@ -606,9 +532,6 @@ static int __init idle_setup(char *str) pr_info("using polling idle threads\n"); pm_idle = poll_idle; boot_option_idle_override = IDLE_POLL; - } else if (!strcmp(str, "mwait")) { - boot_option_idle_override = IDLE_FORCE_MWAIT; - WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n"); } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ed0fe385289d..a6ceaedc396a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1369,7 +1369,7 @@ static inline void mwait_play_dead(void) void *mwait_ptr; struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); - if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) + if (!this_cpu_has(X86_FEATURE_MWAIT)) return; if (!this_cpu_has(X86_FEATURE_CLFLSH)) return; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index ed9a1cc690be..52a5e3a4cdc3 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -84,7 +84,6 @@ static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); static int disabled_by_idle_boot_param(void) { return boot_option_idle_override == IDLE_POLL || - boot_option_idle_override == IDLE_FORCE_MWAIT || boot_option_idle_override == IDLE_HALT; } -- cgit v1.2.3-58-ga151 From 27be457000211a6903968dfce06d5f73f051a217 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 10 Feb 2013 02:28:46 -0500 Subject: x86 idle: remove 32-bit-only "no-hlt" parameter, hlt_works_ok flag Remove 32-bit x86 a cmdline param "no-hlt", and the cpuinfo_x86.hlt_works_ok that it sets. If a user wants to avoid HLT, then "idle=poll" is much more useful, as it avoids invocation of HLT in idle, while "no-hlt" failed to do so. Indeed, hlt_works_ok was consulted in only 3 places. First, in /proc/cpuinfo where "hlt_bug yes" would be printed if and only if the user booted the system with "no-hlt" -- as there was no other code to set that flag. Second, check_hlt() would not invoke halt() if "no-hlt" were on the cmdline. Third, it was consulted in stop_this_cpu(), which is invoked by native_machine_halt()/reboot_interrupt()/smp_stop_nmi_callback() -- all cases where the machine is being shutdown/reset. The flag was not consulted in the more frequently invoked play_dead()/hlt_play_dead() used in processor offline and suspend. Since Linux-3.0 there has been a run-time notice upon "no-hlt" invocations indicating that it would be removed in 2012. Signed-off-by: Len Brown Cc: x86@kernel.org --- Documentation/kernel-parameters.txt | 4 ---- arch/x86/include/asm/processor.h | 10 ---------- arch/x86/kernel/cpu/bugs.c | 27 --------------------------- arch/x86/kernel/cpu/proc.c | 2 -- arch/x86/kernel/process.c | 6 ++---- arch/x86/xen/setup.c | 3 --- 6 files changed, 2 insertions(+), 50 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 3b0cd1e612da..109ee45cf20d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1881,10 +1881,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted. wfi(ARM) instruction doesn't work correctly and not to use it. This is also useful when using JTAG debugger. - no-hlt [BUGS=X86-32] Tells the kernel that the hlt - instruction doesn't work correctly and not to - use it. - no_file_caps Tells the kernel not to honor file capabilities. The only way then for a file to be executed with privilege is to be setuid root or executed by root. diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8a28feae1c92..b9e7d279f8ef 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -89,7 +89,6 @@ struct cpuinfo_x86 { char wp_works_ok; /* It doesn't on 386's */ /* Problems on some 486Dx4's and old 386's: */ - char hlt_works_ok; char hard_math; char rfu; char fdiv_bug; @@ -165,15 +164,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); extern const struct seq_operations cpuinfo_op; -static inline int hlt_works(int cpu) -{ -#ifdef CONFIG_X86_32 - return cpu_data(cpu).hlt_works_ok; -#else - return 1; -#endif -} - #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 92dfec986a48..af6455e3fcc9 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -17,15 +17,6 @@ #include #include -static int __init no_halt(char *s) -{ - WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n"); - boot_cpu_data.hlt_works_ok = 0; - return 1; -} - -__setup("no-hlt", no_halt); - static int __init no_387(char *s) { boot_cpu_data.hard_math = 0; @@ -89,23 +80,6 @@ static void __init check_fpu(void) pr_warn("Hmm, FPU with FDIV bug\n"); } -static void __init check_hlt(void) -{ - if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) - return; - - pr_info("Checking 'hlt' instruction... "); - if (!boot_cpu_data.hlt_works_ok) { - pr_cont("disabled\n"); - return; - } - halt(); - halt(); - halt(); - halt(); - pr_cont("OK\n"); -} - /* * Check whether we are able to run this kernel safely on SMP. * @@ -129,7 +103,6 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); #endif check_config(); - check_hlt(); init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 3286a92e662a..e280253f6f94 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -28,7 +28,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) { seq_printf(m, "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" "f00f_bug\t: %s\n" "coma_bug\t: %s\n" "fpu\t\t: %s\n" @@ -36,7 +35,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) "cpuid level\t: %d\n" "wp\t\t: %s\n", c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", c->f00f_bug ? "yes" : "no", c->coma_bug ? "yes" : "no", c->hard_math ? "yes" : "no", diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cd5a4c9ef835..aef852eac292 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -415,10 +415,8 @@ void stop_this_cpu(void *dummy) set_cpu_online(smp_processor_id(), false); disable_local_APIC(); - for (;;) { - if (hlt_works(smp_processor_id())) - halt(); - } + for (;;) + halt(); } /* diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 2b73b5c8555f..94eac5c85cdc 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -556,9 +556,6 @@ void __init xen_arch_setup(void) COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); /* Set up idle, making sure it calls safe_halt() pvop */ -#ifdef CONFIG_X86_32 - boot_cpu_data.hlt_works_ok = 1; -#endif disable_cpuidle(); disable_cpufreq(); WARN_ON(xen_set_default_idle()); -- cgit v1.2.3-58-ga151 From e253673ec5e3e0b82e6261d5c14505fed77d8b18 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Sat, 9 Feb 2013 00:00:13 +0100 Subject: ACPI / Documentation: refer to correct file for acpi_platform_device_ids[] table When the ACPI platform device code was converted to the new ACPI scan handler facility, the the acpi_platform_device_ids[] was moved to drivers/acpi/acpi_platform.c. Update the documentation accordingly. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- Documentation/acpi/enumeration.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt index 54469bc81b1c..94a656131885 100644 --- a/Documentation/acpi/enumeration.txt +++ b/Documentation/acpi/enumeration.txt @@ -63,8 +63,8 @@ from ACPI tables. Currently the kernel is not able to automatically determine from which ACPI device it should make the corresponding platform device so we need to add the ACPI device explicitly to acpi_platform_device_ids list defined in -drivers/acpi/scan.c. This limitation is only for the platform devices, SPI -and I2C devices are created automatically as described below. +drivers/acpi/acpi_platform.c. This limitation is only for the platform +devices, SPI and I2C devices are created automatically as described below. SPI serial bus support ~~~~~~~~~~~~~~~~~~~~~~ -- cgit v1.2.3-58-ga151 From 6be264986152c498562f26b15ed73c70aa37ce48 Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Fri, 15 Feb 2013 22:55:10 +0100 Subject: cpufreq / intel_pstate: Add kernel command line option disable intel_pstate. When intel_pstate is configured into the kernel it will become the preferred scaling driver for processors that it supports. Allow the user to override this by adding: intel_pstate=disable on the kernel command line. Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- Documentation/kernel-parameters.txt | 5 +++++ drivers/cpufreq/intel_pstate.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 363e348bff9b..b335c456feb9 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1131,6 +1131,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. 0 disables intel_idle and fall back on acpi_idle. 1 to 6 specify maximum depth of C-state. + intel_pstate= [X86] + disable + Do not enable intel_pstate as the default + scaling driver for the supported processors + intremap= [X86-64, Intel-IOMMU] on enable Interrupt Remapping (default) off disable Interrupt Remapping diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e87996355da0..096fde0ebcb5 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -773,11 +773,16 @@ static void intel_pstate_exit(void) } module_exit(intel_pstate_exit); +static int __initdata no_load; + static int __init intel_pstate_init(void) { int rc = 0; const struct x86_cpu_id *id; + if (no_load) + return -ENODEV; + id = x86_match_cpu(intel_pstate_cpu_ids); if (!id) return -ENODEV; @@ -802,6 +807,17 @@ out: } device_initcall(intel_pstate_init); +static int __init intel_pstate_setup(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "disable")) + no_load = 1; + return 0; +} +early_param("intel_pstate", intel_pstate_setup); + MODULE_AUTHOR("Dirk Brandewie "); MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors"); MODULE_LICENSE("GPL"); -- cgit v1.2.3-58-ga151