diff options
65 files changed, 4332 insertions, 2821 deletions
diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt index ba78e7c2a069..4bc7287806de 100644 --- a/Documentation/cpu-freq/core.txt +++ b/Documentation/cpu-freq/core.txt @@ -96,7 +96,7 @@ new - new frequency For details about OPP, see Documentation/power/opp.txt dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with - cpufreq_frequency_table_cpuinfo which is provided with the list of + cpufreq_table_validate_and_show() which is provided with the list of frequencies that are available for operation. This function provides a ready to use conversion routine to translate the OPP layer's internal information about the available frequencies into a format readily @@ -110,7 +110,7 @@ dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with /* Do things */ r = dev_pm_opp_init_cpufreq_table(dev, &freq_table); if (!r) - cpufreq_frequency_table_cpuinfo(policy, freq_table); + cpufreq_table_validate_and_show(policy, freq_table); /* Do other things */ } diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt index 14f4e6336d88..772b94fde264 100644 --- a/Documentation/cpu-freq/cpu-drivers.txt +++ b/Documentation/cpu-freq/cpu-drivers.txt @@ -231,7 +231,7 @@ if you want to skip one entry in the table, set the frequency to CPUFREQ_ENTRY_INVALID. The entries don't need to be in ascending order. -By calling cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, +By calling cpufreq_table_validate_and_show(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); the cpuinfo.min_freq and cpuinfo.max_freq values are detected, and policy->min and policy->max are set to the same values. This is @@ -244,14 +244,12 @@ policy->max, and all other criteria are met. This is helpful for the ->verify call. int cpufreq_frequency_table_target(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table, unsigned int target_freq, - unsigned int relation, - unsigned int *index); + unsigned int relation); is the corresponding frequency table helper for the ->target -stage. Just pass the values to this function, and the unsigned int -index returns the number of the frequency table entry which contains +stage. Just pass the values to this function, and this function +returns the number of the frequency table entry which contains the frequency the CPU shall be set to. The following macros can be used as iterators over cpufreq_frequency_table: diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt index 0a94224ad296..9e3c3b33514c 100644 --- a/Documentation/cpu-freq/pcc-cpufreq.txt +++ b/Documentation/cpu-freq/pcc-cpufreq.txt @@ -159,8 +159,8 @@ to be strictly associated with a P-state. 2.2 cpuinfo_transition_latency: ------------------------------- -The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification -does not include a field to expose this value currently. +The cpuinfo_transition_latency field is 0. The PCC specification does +not include a field to expose this value currently. 2.3 cpuinfo_cur_freq: --------------------- diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 724970a25666..118538b548f6 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3598,6 +3598,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. present during boot. nocompress Don't compress/decompress hibernation images. no Disable hibernation and resume. + protect_image Turn on image protection during restoration + (that will set all pages holding image data + during restoration read-only). retain_initrd [RAM] Keep initrd memory after extraction diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c index 82607d621aca..88301e53f085 100644 --- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c +++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c @@ -85,61 +85,57 @@ static void spu_gov_cancel_work(struct spu_gov_info_struct *info) cancel_delayed_work_sync(&info->work); } -static int spu_gov_govern(struct cpufreq_policy *policy, unsigned int event) +static int spu_gov_start(struct cpufreq_policy *policy) { unsigned int cpu = policy->cpu; - struct spu_gov_info_struct *info, *affected_info; + struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu); + struct spu_gov_info_struct *affected_info; int i; - int ret = 0; - info = &per_cpu(spu_gov_info, cpu); - - switch (event) { - case CPUFREQ_GOV_START: - if (!cpu_online(cpu)) { - printk(KERN_ERR "cpu %d is not online\n", cpu); - ret = -EINVAL; - break; - } + if (!cpu_online(cpu)) { + printk(KERN_ERR "cpu %d is not online\n", cpu); + return -EINVAL; + } - if (!policy->cur) { - printk(KERN_ERR "no cpu specified in policy\n"); - ret = -EINVAL; - break; - } + if (!policy->cur) { + printk(KERN_ERR "no cpu specified in policy\n"); + return -EINVAL; + } - /* initialize spu_gov_info for all affected cpus */ - for_each_cpu(i, policy->cpus) { - affected_info = &per_cpu(spu_gov_info, i); - affected_info->policy = policy; - } + /* initialize spu_gov_info for all affected cpus */ + for_each_cpu(i, policy->cpus) { + affected_info = &per_cpu(spu_gov_info, i); + affected_info->policy = policy; + } - info->poll_int = POLL_TIME; + info->poll_int = POLL_TIME; - /* setup timer */ - spu_gov_init_work(info); + /* setup timer */ + spu_gov_init_work(info); - break; + return 0; +} - case CPUFREQ_GOV_STOP: - /* cancel timer */ - spu_gov_cancel_work(info); +static void spu_gov_stop(struct cpufreq_policy *policy) +{ + unsigned int cpu = policy->cpu; + struct spu_gov_info_struct *info = &per_cpu(spu_gov_info, cpu); + int i; - /* clean spu_gov_info for all affected cpus */ - for_each_cpu (i, policy->cpus) { - info = &per_cpu(spu_gov_info, i); - info->policy = NULL; - } + /* cancel timer */ + spu_gov_cancel_work(info); - break; + /* clean spu_gov_info for all affected cpus */ + for_each_cpu (i, policy->cpus) { + info = &per_cpu(spu_gov_info, i); + info->policy = NULL; } - - return ret; } static struct cpufreq_governor spu_governor = { .name = "spudemand", - .governor = spu_gov_govern, + .start = spu_gov_start, + .stop = spu_gov_stop, .owner = THIS_MODULE, }; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5a73a9c62c39..56f4c6676b29 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -64,8 +64,6 @@ #define MSR_OFFCORE_RSP_0 0x000001a6 #define MSR_OFFCORE_RSP_1 0x000001a7 -#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad -#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae #define MSR_TURBO_RATIO_LIMIT 0x000001ad #define MSR_TURBO_RATIO_LIMIT1 0x000001ae #define MSR_TURBO_RATIO_LIMIT2 0x000001af diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 0576b6157f3a..c9734dc76257 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -135,6 +135,7 @@ int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_disable(void); int common_cpu_die(unsigned int cpu); void native_cpu_die(unsigned int cpu); +void hlt_play_dead(void); void native_play_dead(void); void play_dead_common(void); void wbinvd_on_cpu(int cpu); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d0a51939c150..c93609c97406 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1644,7 +1644,7 @@ static inline void mwait_play_dead(void) } } -static inline void hlt_play_dead(void) +void hlt_play_dead(void) { if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index d5f64996394a..b12c26e2e309 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -12,6 +12,7 @@ #include <linux/export.h> #include <linux/smp.h> #include <linux/perf_event.h> +#include <linux/tboot.h> #include <asm/pgtable.h> #include <asm/proto.h> @@ -266,6 +267,35 @@ void notrace restore_processor_state(void) EXPORT_SYMBOL(restore_processor_state); #endif +#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HOTPLUG_CPU) +static void resume_play_dead(void) +{ + play_dead_common(); + tboot_shutdown(TB_SHUTDOWN_WFS); + hlt_play_dead(); +} + +int hibernate_resume_nonboot_cpu_disable(void) +{ + void (*play_dead)(void) = smp_ops.play_dead; + int ret; + + /* + * Ensure that MONITOR/MWAIT will not be used in the "play dead" loop + * during hibernate image restoration, because it is likely that the + * monitored address will be actually written to at that time and then + * the "dead" CPU will attempt to execute instructions again, but the + * address in its instruction pointer may not be possible to resolve + * any more at that point (the page tables used by it previously may + * have been overwritten by hibernate image data). + */ + smp_ops.play_dead = resume_play_dead; + ret = disable_nonboot_cpus(); + smp_ops.play_dead = play_dead; + return ret; +} +#endif + /* * When bsp_check() is called in hibernate and suspend, cpu hotplug * is disabled already. So it's unnessary to handle race condition between diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 3657ac1cb801..8e2e4757adcb 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -121,6 +121,7 @@ int pm_clk_add(struct device *dev, const char *con_id) { return __pm_clk_add(dev, con_id, NULL); } +EXPORT_SYMBOL_GPL(pm_clk_add); /** * pm_clk_add_clk - Start using a device clock for power management. @@ -136,9 +137,42 @@ int pm_clk_add_clk(struct device *dev, struct clk *clk) { return __pm_clk_add(dev, NULL, clk); } +EXPORT_SYMBOL_GPL(pm_clk_add_clk); /** + * of_pm_clk_add_clk - Start using a device clock for power management. + * @dev: Device whose clock is going to be used for power management. + * @name: Name of clock that is going to be used for power management. + * + * Add the clock described in the 'clocks' device-tree node that matches + * with the 'name' provided, to the list of clocks used for the power + * management of @dev. On success, returns 0. Returns a negative error + * code if the clock is not found or cannot be added. + */ +int of_pm_clk_add_clk(struct device *dev, const char *name) +{ + struct clk *clk; + int ret; + + if (!dev || !dev->of_node || !name) + return -EINVAL; + + clk = of_clk_get_by_name(dev->of_node, name); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + ret = pm_clk_add_clk(dev, clk); + if (ret) { + clk_put(clk); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(of_pm_clk_add_clk); + +/** * of_pm_clk_add_clks - Start using device clock(s) for power management. * @dev: Device whose clock(s) is going to be used for power management. * @@ -192,6 +226,7 @@ error: return ret; } +EXPORT_SYMBOL_GPL(of_pm_clk_add_clks); /** * __pm_clk_remove - Destroy PM clock entry. @@ -252,6 +287,7 @@ void pm_clk_remove(struct device *dev, const char *con_id) __pm_clk_remove(ce); } +EXPORT_SYMBOL_GPL(pm_clk_remove); /** * pm_clk_remove_clk - Stop using a device clock for power management. @@ -285,6 +321,7 @@ void pm_clk_remove_clk(struct device *dev, struct clk *clk) __pm_clk_remove(ce); } +EXPORT_SYMBOL_GPL(pm_clk_remove_clk); /** * pm_clk_init - Initialize a device's list of power management clocks. @@ -299,6 +336,7 @@ void pm_clk_init(struct device *dev) if (psd) INIT_LIST_HEAD(&psd->clock_list); } +EXPORT_SYMBOL_GPL(pm_clk_init); /** * pm_clk_create - Create and initialize a device's list of PM clocks. @@ -311,6 +349,7 @@ int pm_clk_create(struct device *dev) { return dev_pm_get_subsys_data(dev); } +EXPORT_SYMBOL_GPL(pm_clk_create); /** * pm_clk_destroy - Destroy a device's list of power management clocks. @@ -345,6 +384,7 @@ void pm_clk_destroy(struct device *dev) __pm_clk_remove(ce); } } +EXPORT_SYMBOL_GPL(pm_clk_destroy); /** * pm_clk_suspend - Disable clocks in a device's PM clock list. @@ -375,6 +415,7 @@ int pm_clk_suspend(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(pm_clk_suspend); /** * pm_clk_resume - Enable clocks in a device's PM clock list. @@ -400,6 +441,7 @@ int pm_clk_resume(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(pm_clk_resume); /** * pm_clk_notify - Notify routine for device addition and removal. @@ -480,6 +522,7 @@ int pm_clk_runtime_suspend(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(pm_clk_runtime_suspend); int pm_clk_runtime_resume(struct device *dev) { @@ -495,6 +538,7 @@ int pm_clk_runtime_resume(struct device *dev) return pm_generic_runtime_resume(dev); } +EXPORT_SYMBOL_GPL(pm_clk_runtime_resume); #else /* !CONFIG_PM_CLK */ @@ -598,3 +642,4 @@ void pm_clk_add_notifier(struct bus_type *bus, clknb->nb.notifier_call = pm_clk_notify; bus_register_notifier(bus, &clknb->nb); } +EXPORT_SYMBOL_GPL(pm_clk_add_notifier); diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index de23b648fce3..a1f2aff33997 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -187,8 +187,7 @@ static int genpd_poweron(struct generic_pm_domain *genpd, unsigned int depth) struct gpd_link *link; int ret = 0; - if (genpd->status == GPD_STATE_ACTIVE - || (genpd->prepared_count > 0 && genpd->suspend_power_off)) + if (genpd->status == GPD_STATE_ACTIVE) return 0; /* @@ -735,82 +734,24 @@ static int pm_genpd_prepare(struct device *dev) mutex_lock(&genpd->lock); - if (genpd->prepared_count++ == 0) { + if (genpd->prepared_count++ == 0) genpd->suspended_count = 0; - genpd->suspend_power_off = genpd->status == GPD_STATE_POWER_OFF; - } mutex_unlock(&genpd->lock); - if (genpd->suspend_power_off) - return 0; - - /* - * The PM domain must be in the GPD_STATE_ACTIVE state at this point, - * so genpd_poweron() will return immediately, but if the device - * is suspended (e.g. it's been stopped by genpd_stop_dev()), we need - * to make it operational. - */ - pm_runtime_resume(dev); - __pm_runtime_disable(dev, false); - ret = pm_generic_prepare(dev); if (ret) { mutex_lock(&genpd->lock); - if (--genpd->prepared_count == 0) - genpd->suspend_power_off = false; + genpd->prepared_count--; mutex_unlock(&genpd->lock); - pm_runtime_enable(dev); } return ret; } /** - * pm_genpd_suspend - Suspend a device belonging to an I/O PM domain. - * @dev: Device to suspend. - * - * Suspend a device under the assumption that its pm_domain field points to the - * domain member of an object of type struct generic_pm_domain representing - * a PM domain consisting of I/O devices. - */ -static int pm_genpd_suspend(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_suspend(dev); -} - -/** - * pm_genpd_suspend_late - Late suspend of a device from an I/O PM domain. - * @dev: Device to suspend. - * - * Carry out a late suspend of a device under the assumption that its - * pm_domain field points to the domain member of an object of type - * struct generic_pm_domain representing a PM domain consisting of I/O devices. - */ -static int pm_genpd_suspend_late(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_suspend_late(dev); -} - -/** * pm_genpd_suspend_noirq - Completion of suspend of device in an I/O PM domain. * @dev: Device to suspend. * @@ -820,6 +761,7 @@ static int pm_genpd_suspend_late(struct device *dev) static int pm_genpd_suspend_noirq(struct device *dev) { struct generic_pm_domain *genpd; + int ret; dev_dbg(dev, "%s()\n", __func__); @@ -827,11 +769,14 @@ static int pm_genpd_suspend_noirq(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - if (genpd->suspend_power_off - || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))) + if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)) return 0; - genpd_stop_dev(genpd, dev); + if (genpd->dev_ops.stop && genpd->dev_ops.start) { + ret = pm_runtime_force_suspend(dev); + if (ret) + return ret; + } /* * Since all of the "noirq" callbacks are executed sequentially, it is @@ -853,6 +798,7 @@ static int pm_genpd_suspend_noirq(struct device *dev) static int pm_genpd_resume_noirq(struct device *dev) { struct generic_pm_domain *genpd; + int ret = 0; dev_dbg(dev, "%s()\n", __func__); @@ -860,8 +806,7 @@ static int pm_genpd_resume_noirq(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - if (genpd->suspend_power_off - || (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev))) + if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)) return 0; /* @@ -872,93 +817,10 @@ static int pm_genpd_resume_noirq(struct device *dev) pm_genpd_sync_poweron(genpd, true); genpd->suspended_count--; - return genpd_start_dev(genpd, dev); -} - -/** - * pm_genpd_resume_early - Early resume of a device in an I/O PM domain. - * @dev: Device to resume. - * - * Carry out an early resume of a device under the assumption that its - * pm_domain field points to the domain member of an object of type - * struct generic_pm_domain representing a power domain consisting of I/O - * devices. - */ -static int pm_genpd_resume_early(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_resume_early(dev); -} - -/** - * pm_genpd_resume - Resume of device in an I/O PM domain. - * @dev: Device to resume. - * - * Resume a device under the assumption that its pm_domain field points to the - * domain member of an object of type struct generic_pm_domain representing - * a power domain consisting of I/O devices. - */ -static int pm_genpd_resume(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_resume(dev); -} - -/** - * pm_genpd_freeze - Freezing a device in an I/O PM domain. - * @dev: Device to freeze. - * - * Freeze a device under the assumption that its pm_domain field points to the - * domain member of an object of type struct generic_pm_domain representing - * a power domain consisting of I/O devices. - */ -static int pm_genpd_freeze(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_freeze(dev); -} + if (genpd->dev_ops.stop && genpd->dev_ops.start) + ret = pm_runtime_force_resume(dev); -/** - * pm_genpd_freeze_late - Late freeze of a device in an I/O PM domain. - * @dev: Device to freeze. - * - * Carry out a late freeze of a device under the assumption that its - * pm_domain field points to the domain member of an object of type - * struct generic_pm_domain representing a power domain consisting of I/O - * devices. - */ -static int pm_genpd_freeze_late(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_freeze_late(dev); + return ret; } /** @@ -973,6 +835,7 @@ static int pm_genpd_freeze_late(struct device *dev) static int pm_genpd_freeze_noirq(struct device *dev) { struct generic_pm_domain *genpd; + int ret = 0; dev_dbg(dev, "%s()\n", __func__); @@ -980,7 +843,10 @@ static int pm_genpd_freeze_noirq(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_stop_dev(genpd, dev); + if (genpd->dev_ops.stop && genpd->dev_ops.start) + ret = pm_runtime_force_suspend(dev); + + return ret; } /** @@ -993,6 +859,7 @@ static int pm_genpd_freeze_noirq(struct device *dev) static int pm_genpd_thaw_noirq(struct device *dev) { struct generic_pm_domain *genpd; + int ret = 0; dev_dbg(dev, "%s()\n", __func__); @@ -1000,51 +867,10 @@ static int pm_genpd_thaw_noirq(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? - 0 : genpd_start_dev(genpd, dev); -} - -/** - * pm_genpd_thaw_early - Early thaw of device in an I/O PM domain. - * @dev: Device to thaw. - * - * Carry out an early thaw of a device under the assumption that its - * pm_domain field points to the domain member of an object of type - * struct generic_pm_domain representing a power domain consisting of I/O - * devices. - */ -static int pm_genpd_thaw_early(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_thaw_early(dev); -} - -/** - * pm_genpd_thaw - Thaw a device belonging to an I/O power domain. - * @dev: Device to thaw. - * - * Thaw a device under the assumption that its pm_domain field points to the - * domain member of an object of type struct generic_pm_domain representing - * a power domain consisting of I/O devices. - */ -static int pm_genpd_thaw(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); + if (genpd->dev_ops.stop && genpd->dev_ops.start) + ret = pm_runtime_force_resume(dev); - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_thaw(dev); + return ret; } /** @@ -1057,6 +883,7 @@ static int pm_genpd_thaw(struct device *dev) static int pm_genpd_restore_noirq(struct device *dev) { struct generic_pm_domain *genpd; + int ret = 0; dev_dbg(dev, "%s()\n", __func__); @@ -1072,30 +899,20 @@ static int pm_genpd_restore_noirq(struct device *dev) * At this point suspended_count == 0 means we are being run for the * first time for the given domain in the present cycle. */ - if (genpd->suspended_count++ == 0) { + if (genpd->suspended_count++ == 0) /* * The boot kernel might put the domain into arbitrary state, * so make it appear as powered off to pm_genpd_sync_poweron(), * so that it tries to power it on in case it was really off. */ genpd->status = GPD_STATE_POWER_OFF; - if (genpd->suspend_power_off) { - /* - * If the domain was off before the hibernation, make - * sure it will be off going forward. - */ - genpd_power_off(genpd, true); - - return 0; - } - } - - if (genpd->suspend_power_off) - return 0; pm_genpd_sync_poweron(genpd, true); - return genpd_start_dev(genpd, dev); + if (genpd->dev_ops.stop && genpd->dev_ops.start) + ret = pm_runtime_force_resume(dev); + + return ret; } /** @@ -1110,7 +927,6 @@ static int pm_genpd_restore_noirq(struct device *dev) static void pm_genpd_complete(struct device *dev) { struct generic_pm_domain *genpd; - bool run_complete; dev_dbg(dev, "%s()\n", __func__); @@ -1118,20 +934,15 @@ static void pm_genpd_complete(struct device *dev) if (IS_ERR(genpd)) return; + pm_generic_complete(dev); + mutex_lock(&genpd->lock); - run_complete = !genpd->suspend_power_off; - if (--genpd->prepared_count == 0) - genpd->suspend_power_off = false; + genpd->prepared_count--; + if (!genpd->prepared_count) + genpd_queue_power_off_work(genpd); mutex_unlock(&genpd->lock); - - if (run_complete) { - pm_generic_complete(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - pm_request_idle(dev); - } } /** @@ -1173,18 +984,10 @@ EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron); #else /* !CONFIG_PM_SLEEP */ #define pm_genpd_prepare NULL -#define pm_genpd_suspend NULL -#define pm_genpd_suspend_late NULL #define pm_genpd_suspend_noirq NULL -#define pm_genpd_resume_early NULL #define pm_genpd_resume_noirq NULL -#define pm_genpd_resume NULL -#define pm_genpd_freeze NULL -#define pm_genpd_freeze_late NULL #define pm_genpd_freeze_noirq NULL -#define pm_genpd_thaw_early NULL #define pm_genpd_thaw_noirq NULL -#define pm_genpd_thaw NULL #define pm_genpd_restore_noirq NULL #define pm_genpd_complete NULL @@ -1455,12 +1258,14 @@ EXPORT_SYMBOL_GPL(pm_genpd_remove_subdomain); * @genpd: PM domain object to initialize. * @gov: PM domain governor to associate with the domain (may be NULL). * @is_off: Initial value of the domain's power_is_off field. + * + * Returns 0 on successful initialization, else a negative error code. */ -void pm_genpd_init(struct generic_pm_domain *genpd, - struct dev_power_governor *gov, bool is_off) +int pm_genpd_init(struct generic_pm_domain *genpd, + struct dev_power_governor *gov, bool is_off) { if (IS_ERR_OR_NULL(genpd)) - return; + return -EINVAL; INIT_LIST_HEAD(&genpd->master_links); INIT_LIST_HEAD(&genpd->slave_links); @@ -1476,24 +1281,24 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.runtime_suspend = genpd_runtime_suspend; genpd->domain.ops.runtime_resume = genpd_runtime_resume; genpd->domain.ops.prepare = pm_genpd_prepare; - genpd->domain.ops.suspend = pm_genpd_suspend; - genpd->domain.ops.suspend_late = pm_genpd_suspend_late; + genpd->domain.ops.suspend = pm_generic_suspend; + genpd->domain.ops.suspend_late = pm_generic_suspend_late; genpd->domain.ops.suspend_noirq = pm_genpd_suspend_noirq; genpd->domain.ops.resume_noirq = pm_genpd_resume_noirq; - genpd->domain.ops.resume_early = pm_genpd_resume_early; - genpd->domain.ops.resume = pm_genpd_resume; - genpd->domain.ops.freeze = pm_genpd_freeze; - genpd->domain.ops.freeze_late = pm_genpd_freeze_late; + genpd->domain.ops.resume_early = pm_generic_resume_early; + genpd->domain.ops.resume = pm_generic_resume; + genpd->domain.ops.freeze = pm_generic_freeze; + genpd->domain.ops.freeze_late = pm_generic_freeze_late; genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq; genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq; - genpd->domain.ops.thaw_early = pm_genpd_thaw_early; - genpd->domain.ops.thaw = pm_genpd_thaw; - genpd->domain.ops.poweroff = pm_genpd_suspend; - genpd->domain.ops.poweroff_late = pm_genpd_suspend_late; + genpd->domain.ops.thaw_early = pm_generic_thaw_early; + genpd->domain.ops.thaw = pm_generic_thaw; + genpd->domain.ops.poweroff = pm_generic_poweroff; + genpd->domain.ops.poweroff_late = pm_generic_poweroff_late; genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq; genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq; - genpd->domain.ops.restore_early = pm_genpd_resume_early; - genpd->domain.ops.restore = pm_genpd_resume; + genpd->domain.ops.restore_early = pm_generic_restore_early; + genpd->domain.ops.restore = pm_generic_restore; genpd->domain.ops.complete = pm_genpd_complete; if (genpd->flags & GENPD_FLAG_PM_CLK) { @@ -1518,6 +1323,8 @@ void pm_genpd_init(struct generic_pm_domain *genpd, mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); + + return 0; } EXPORT_SYMBOL_GPL(pm_genpd_init); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index b74690418504..e097d355cc04 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1045,10 +1045,14 @@ int __pm_runtime_set_status(struct device *dev, unsigned int status) */ if (!parent->power.disable_depth && !parent->power.ignore_children - && parent->power.runtime_status != RPM_ACTIVE) + && parent->power.runtime_status != RPM_ACTIVE) { + dev_err(dev, "runtime PM trying to activate child device %s but parent (%s) is not active\n", + dev_name(dev), + dev_name(parent)); error = -EBUSY; - else if (dev->power.runtime_status == RPM_SUSPENDED) + } else if (dev->power.runtime_status == RPM_SUSPENDED) { atomic_inc(&parent->power.child_count); + } spin_unlock(&parent->power.lock); @@ -1256,7 +1260,7 @@ void pm_runtime_allow(struct device *dev) dev->power.runtime_auto = true; if (atomic_dec_and_test(&dev->power.usage_count)) - rpm_idle(dev, RPM_AUTO); + rpm_idle(dev, RPM_AUTO | RPM_ASYNC); out: spin_unlock_irq(&dev->power.lock); @@ -1506,6 +1510,9 @@ int pm_runtime_force_resume(struct device *dev) goto out; } + if (!pm_runtime_status_suspended(dev)) + goto out; + ret = pm_runtime_set_active(dev); if (ret) goto out; diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index b7445b6ae5a4..c822d72629d5 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -31,23 +31,18 @@ config CPU_FREQ_BOOST_SW depends on THERMAL config CPU_FREQ_STAT - tristate "CPU frequency translation statistics" + bool "CPU frequency transition statistics" default y help - This driver exports CPU frequency statistics information through sysfs - file system. - - To compile this driver as a module, choose M here: the - module will be called cpufreq_stats. + Export CPU frequency statistics information through sysfs. If in doubt, say N. config CPU_FREQ_STAT_DETAILS - bool "CPU frequency translation statistics details" + bool "CPU frequency transition statistics details" depends on CPU_FREQ_STAT help - This will show detail CPU frequency translation table in sysfs file - system. + Show detailed CPU frequency transition table in sysfs. If in doubt, say N. diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 32a15052f363..297e9128fe9f 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -468,20 +468,17 @@ unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; struct cpufreq_frequency_table *entry; - unsigned int next_perf_state, next_freq, freq; + unsigned int next_perf_state, next_freq, index; /* * Find the closest frequency above target_freq. - * - * The table is sorted in the reverse order with respect to the - * frequency and all of the entries are valid (see the initialization). */ - entry = policy->freq_table; - do { - entry++; - freq = entry->frequency; - } while (freq >= target_freq && freq != CPUFREQ_TABLE_END); - entry--; + if (policy->cached_target_freq == target_freq) + index = policy->cached_resolved_idx; + else + index = cpufreq_table_find_index_dl(policy, target_freq); + + entry = &policy->freq_table[index]; next_freq = entry->frequency; next_perf_state = entry->driver_data; diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c index 404360cad25c..042023bbbf62 100644 --- a/drivers/cpufreq/amd_freq_sensitivity.c +++ b/drivers/cpufreq/amd_freq_sensitivity.c @@ -48,9 +48,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, struct policy_dbs_info *policy_dbs = policy->governor_data; struct dbs_data *od_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = od_data->tuners; - struct od_policy_dbs_info *od_info = to_dbs_info(policy_dbs); - if (!od_info->freq_table) + if (!policy->freq_table) return freq_next; rdmsr_on_cpu(policy->cpu, MSR_AMD64_FREQ_SENSITIVITY_ACTUAL, @@ -92,10 +91,9 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, else { unsigned int index; - cpufreq_frequency_table_target(policy, - od_info->freq_table, policy->cur - 1, - CPUFREQ_RELATION_H, &index); - freq_next = od_info->freq_table[index].frequency; + index = cpufreq_table_find_index_h(policy, + policy->cur - 1); + freq_next = policy->freq_table[index].frequency; } data->freq_prev = freq_next; diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5617c7087d77..3dd4884c6f9e 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -74,19 +74,12 @@ static inline bool has_target(void) } /* internal prototypes */ -static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); static unsigned int __cpufreq_get(struct cpufreq_policy *policy); +static int cpufreq_init_governor(struct cpufreq_policy *policy); +static void cpufreq_exit_governor(struct cpufreq_policy *policy); static int cpufreq_start_governor(struct cpufreq_policy *policy); - -static inline void cpufreq_exit_governor(struct cpufreq_policy *policy) -{ - (void)cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); -} - -static inline void cpufreq_stop_governor(struct cpufreq_policy *policy) -{ - (void)cpufreq_governor(policy, CPUFREQ_GOV_STOP); -} +static void cpufreq_stop_governor(struct cpufreq_policy *policy); +static void cpufreq_governor_limits(struct cpufreq_policy *policy); /** * Two notifier lists: the "policy" list is involved in the @@ -133,15 +126,6 @@ struct kobject *get_governor_parent_kobj(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(get_governor_parent_kobj); -struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) -{ - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - - return policy && !policy_is_inactive(policy) ? - policy->freq_table : NULL; -} -EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table); - static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) { u64 idle_time; @@ -354,6 +338,7 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy, pr_debug("FREQ: %lu - CPU: %lu\n", (unsigned long)freqs->new, (unsigned long)freqs->cpu); trace_cpu_frequency(freqs->new, freqs->cpu); + cpufreq_stats_record_transition(policy, freqs->new); srcu_notifier_call_chain(&cpufreq_transition_notifier_list, CPUFREQ_POSTCHANGE, freqs); if (likely(policy) && likely(policy->cpu == freqs->cpu)) @@ -507,6 +492,38 @@ void cpufreq_disable_fast_switch(struct cpufreq_policy *policy) } EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); +/** + * cpufreq_driver_resolve_freq - Map a target frequency to a driver-supported + * one. + * @target_freq: target frequency to resolve. + * + * The target to driver frequency mapping is cached in the policy. + * + * Return: Lowest driver-supported frequency greater than or equal to the + * given target_freq, subject to policy (min/max) and driver limitations. + */ +unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + policy->cached_target_freq = target_freq; + + if (cpufreq_driver->target_index) { + int idx; + + idx = cpufreq_frequency_table_target(policy, target_freq, + CPUFREQ_RELATION_L); + policy->cached_resolved_idx = idx; + return policy->freq_table[idx].frequency; + } + + if (cpufreq_driver->resolve_freq) + return cpufreq_driver->resolve_freq(policy, target_freq); + + return target_freq; +} +EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); + /********************************************************************* * SYSFS INTERFACE * *********************************************************************/ @@ -1115,6 +1132,7 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify) CPUFREQ_REMOVE_POLICY, policy); down_write(&policy->rwsem); + cpufreq_stats_free_table(policy); cpufreq_remove_dev_symlink(policy); kobj = &policy->kobj; cmp = &policy->kobj_unregister; @@ -1265,13 +1283,12 @@ static int cpufreq_online(unsigned int cpu) } } - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, - CPUFREQ_START, policy); - if (new_policy) { ret = cpufreq_add_dev_interface(policy); if (ret) goto out_exit_policy; + + cpufreq_stats_create_table(policy); blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); @@ -1280,6 +1297,9 @@ static int cpufreq_online(unsigned int cpu) write_unlock_irqrestore(&cpufreq_driver_lock, flags); } + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_START, policy); + ret = cpufreq_init_policy(policy); if (ret) { pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n", @@ -1556,9 +1576,6 @@ static unsigned int cpufreq_update_current_freq(struct cpufreq_policy *policy) { unsigned int new_freq; - if (cpufreq_suspended) - return 0; - new_freq = cpufreq_driver->get(policy->cpu); if (!new_freq) return 0; @@ -1864,14 +1881,17 @@ static int __target_intermediate(struct cpufreq_policy *policy, return ret; } -static int __target_index(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *freq_table, int index) +static int __target_index(struct cpufreq_policy *policy, int index) { struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0}; unsigned int intermediate_freq = 0; + unsigned int newfreq = policy->freq_table[index].frequency; int retval = -EINVAL; bool notify; + if (newfreq == policy->cur) + return 0; + notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION); if (notify) { /* Handle switching to intermediate frequency */ @@ -1886,7 +1906,7 @@ static int __target_index(struct cpufreq_policy *policy, freqs.old = freqs.new; } - freqs.new = freq_table[index].frequency; + freqs.new = newfreq; pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n", __func__, policy->cpu, freqs.old, freqs.new); @@ -1923,17 +1943,13 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation) { unsigned int old_target_freq = target_freq; - struct cpufreq_frequency_table *freq_table; - int index, retval; + int index; if (cpufreq_disabled()) return -ENODEV; /* Make sure that target_freq is within supported range */ - if (target_freq > policy->max) - target_freq = policy->max; - if (target_freq < policy->min) - target_freq = policy->min; + target_freq = clamp_val(target_freq, policy->min, policy->max); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); @@ -1956,23 +1972,9 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, if (!cpufreq_driver->target_index) return -EINVAL; - freq_table = cpufreq_frequency_get_table(policy->cpu); - if (unlikely(!freq_table)) { - pr_err("%s: Unable to find freq_table\n", __func__); - return -EINVAL; - } - - retval = cpufreq_frequency_table_target(policy, freq_table, target_freq, - relation, &index); - if (unlikely(retval)) { - pr_err("%s: Unable to find matching freq\n", __func__); - return retval; - } - - if (freq_table[index].frequency == policy->cur) - return 0; + index = cpufreq_frequency_table_target(policy, target_freq, relation); - return __target_index(policy, freq_table, index); + return __target_index(policy, index); } EXPORT_SYMBOL_GPL(__cpufreq_driver_target); @@ -1997,7 +1999,7 @@ __weak struct cpufreq_governor *cpufreq_fallback_governor(void) return NULL; } -static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) +static int cpufreq_init_governor(struct cpufreq_policy *policy) { int ret; @@ -2025,36 +2027,82 @@ static int cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) } } - if (event == CPUFREQ_GOV_POLICY_INIT) - if (!try_module_get(policy->governor->owner)) - return -EINVAL; - - pr_debug("%s: for CPU %u, event %u\n", __func__, policy->cpu, event); + if (!try_module_get(policy->governor->owner)) + return -EINVAL; - ret = policy->governor->governor(policy, event); + pr_debug("%s: for CPU %u\n", __func__, policy->cpu); - if (event == CPUFREQ_GOV_POLICY_INIT) { - if (ret) + if (policy->governor->init) { + ret = policy->governor->init(policy); + if (ret) { module_put(policy->governor->owner); - else - policy->governor->initialized++; - } else if (event == CPUFREQ_GOV_POLICY_EXIT) { - policy->governor->initialized--; - module_put(policy->governor->owner); + return ret; + } } - return ret; + return 0; +} + +static void cpufreq_exit_governor(struct cpufreq_policy *policy) +{ + if (cpufreq_suspended || !policy->governor) + return; + + pr_debug("%s: for CPU %u\n", __func__, policy->cpu); + + if (policy->governor->exit) + policy->governor->exit(policy); + + module_put(policy->governor->owner); } static int cpufreq_start_governor(struct cpufreq_policy *policy) { int ret; + if (cpufreq_suspended) + return 0; + + if (!policy->governor) + return -EINVAL; + + pr_debug("%s: for CPU %u\n", __func__, policy->cpu); + if (cpufreq_driver->get && !cpufreq_driver->setpolicy) cpufreq_update_current_freq(policy); - ret = cpufreq_governor(policy, CPUFREQ_GOV_START); - return ret ? ret : cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + if (policy->governor->start) { + ret = policy->governor->start(policy); + if (ret) + return ret; + } + + if (policy->governor->limits) + policy->governor->limits(policy); + + return 0; +} + +static void cpufreq_stop_governor(struct cpufreq_policy *policy) +{ + if (cpufreq_suspended || !policy->governor) + return; + + pr_debug("%s: for CPU %u\n", __func__, policy->cpu); + + if (policy->governor->stop) + policy->governor->stop(policy); +} + +static void cpufreq_governor_limits(struct cpufreq_policy *policy) +{ + if (cpufreq_suspended || !policy->governor) + return; + + pr_debug("%s: for CPU %u\n", __func__, policy->cpu); + + if (policy->governor->limits) + policy->governor->limits(policy); } int cpufreq_register_governor(struct cpufreq_governor *governor) @@ -2069,7 +2117,6 @@ int cpufreq_register_governor(struct cpufreq_governor *governor) mutex_lock(&cpufreq_governor_mutex); - governor->initialized = 0; err = -EBUSY; if (!find_governor(governor->name)) { err = 0; @@ -2184,6 +2231,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, policy->min = new_policy->min; policy->max = new_policy->max; + policy->cached_target_freq = UINT_MAX; + pr_debug("new min and max freqs are %u - %u kHz\n", policy->min, policy->max); @@ -2195,7 +2244,8 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, if (new_policy->governor == policy->governor) { pr_debug("cpufreq: governor limits update\n"); - return cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); + cpufreq_governor_limits(policy); + return 0; } pr_debug("governor switch\n"); @@ -2210,7 +2260,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, /* start new governor */ policy->governor = new_policy->governor; - ret = cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); + ret = cpufreq_init_governor(policy); if (!ret) { ret = cpufreq_start_governor(policy); if (!ret) { @@ -2224,7 +2274,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, pr_debug("starting governor %s failed\n", policy->governor->name); if (old_gov) { policy->governor = old_gov; - if (cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) + if (cpufreq_init_governor(policy)) policy->governor = NULL; else cpufreq_start_governor(policy); @@ -2309,26 +2359,25 @@ static struct notifier_block __refdata cpufreq_cpu_notifier = { *********************************************************************/ static int cpufreq_boost_set_sw(int state) { - struct cpufreq_frequency_table *freq_table; struct cpufreq_policy *policy; int ret = -EINVAL; for_each_active_policy(policy) { - freq_table = cpufreq_frequency_get_table(policy->cpu); - if (freq_table) { - ret = cpufreq_frequency_table_cpuinfo(policy, - freq_table); - if (ret) { - pr_err("%s: Policy frequency update failed\n", - __func__); - break; - } - - down_write(&policy->rwsem); - policy->user_policy.max = policy->max; - cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); - up_write(&policy->rwsem); + if (!policy->freq_table) + continue; + + ret = cpufreq_frequency_table_cpuinfo(policy, + policy->freq_table); + if (ret) { + pr_err("%s: Policy frequency update failed\n", + __func__); + break; } + + down_write(&policy->rwsem); + policy->user_policy.max = policy->max; + cpufreq_governor_limits(policy); + up_write(&policy->rwsem); } return ret; diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 316df247e00d..18da4f8051d3 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -17,7 +17,6 @@ struct cs_policy_dbs_info { struct policy_dbs_info policy_dbs; unsigned int down_skip; - unsigned int requested_freq; }; static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs) @@ -75,19 +74,17 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) /* Check for frequency increase */ if (load > dbs_data->up_threshold) { + unsigned int requested_freq = policy->cur; + dbs_info->down_skip = 0; /* if we are already at full speed then break out early */ - if (dbs_info->requested_freq == policy->max) + if (requested_freq == policy->max) goto out; - dbs_info->requested_freq += get_freq_target(cs_tuners, policy); - - if (dbs_info->requested_freq > policy->max) - dbs_info->requested_freq = policy->max; + requested_freq += get_freq_target(cs_tuners, policy); - __cpufreq_driver_target(policy, dbs_info->requested_freq, - CPUFREQ_RELATION_H); + __cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_H); goto out; } @@ -98,36 +95,27 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy) /* Check for frequency decrease */ if (load < cs_tuners->down_threshold) { - unsigned int freq_target; + unsigned int freq_target, requested_freq = policy->cur; /* * if we cannot reduce the frequency anymore, break out early */ - if (policy->cur == policy->min) + if (requested_freq == policy->min) goto out; freq_target = get_freq_target(cs_tuners, policy); - if (dbs_info->requested_freq > freq_target) - dbs_info->requested_freq -= freq_target; + if (requested_freq > freq_target) + requested_freq -= freq_target; else - dbs_info->requested_freq = policy->min; + requested_freq = policy->min; - __cpufreq_driver_target(policy, dbs_info->requested_freq, - CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_L); } out: return dbs_data->sampling_rate; } -static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data); - -static struct notifier_block cs_cpufreq_notifier_block = { - .notifier_call = dbs_cpufreq_notifier, -}; - /************************** sysfs interface ************************/ -static struct dbs_governor cs_dbs_gov; static ssize_t store_sampling_down_factor(struct gov_attr_set *attr_set, const char *buf, size_t count) @@ -268,15 +256,13 @@ static void cs_free(struct policy_dbs_info *policy_dbs) kfree(to_dbs_info(policy_dbs)); } -static int cs_init(struct dbs_data *dbs_data, bool notify) +static int cs_init(struct dbs_data *dbs_data) { struct cs_dbs_tuners *tuners; tuners = kzalloc(sizeof(*tuners), GFP_KERNEL); - if (!tuners) { - pr_err("%s: kzalloc failed\n", __func__); + if (!tuners) return -ENOMEM; - } tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD; tuners->freq_step = DEF_FREQUENCY_STEP; @@ -288,19 +274,11 @@ static int cs_init(struct dbs_data *dbs_data, bool notify) dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); - if (notify) - cpufreq_register_notifier(&cs_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - return 0; } -static void cs_exit(struct dbs_data *dbs_data, bool notify) +static void cs_exit(struct dbs_data *dbs_data) { - if (notify) - cpufreq_unregister_notifier(&cs_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - kfree(dbs_data->tuners); } @@ -309,16 +287,10 @@ static void cs_start(struct cpufreq_policy *policy) struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); dbs_info->down_skip = 0; - dbs_info->requested_freq = policy->cur; } -static struct dbs_governor cs_dbs_gov = { - .gov = { - .name = "conservative", - .governor = cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, - }, +static struct dbs_governor cs_governor = { + .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("conservative"), .kobj_type = { .default_attrs = cs_attributes }, .gov_dbs_timer = cs_dbs_timer, .alloc = cs_alloc, @@ -328,33 +300,7 @@ static struct dbs_governor cs_dbs_gov = { .start = cs_start, }; -#define CPU_FREQ_GOV_CONSERVATIVE (&cs_dbs_gov.gov) - -static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) -{ - struct cpufreq_freqs *freq = data; - struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); - struct cs_policy_dbs_info *dbs_info; - - if (!policy) - return 0; - - /* policy isn't governed by conservative governor */ - if (policy->governor != CPU_FREQ_GOV_CONSERVATIVE) - return 0; - - dbs_info = to_dbs_info(policy->governor_data); - /* - * we only care if our internally tracked freq moves outside the 'valid' - * ranges of frequency available to us otherwise we do not change it - */ - if (dbs_info->requested_freq > policy->max - || dbs_info->requested_freq < policy->min) - dbs_info->requested_freq = freq->new; - - return 0; -} +#define CPU_FREQ_GOV_CONSERVATIVE (&cs_governor.gov) static int __init cpufreq_gov_dbs_init(void) { diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index be498d56dd69..e415349ab31b 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -336,17 +336,6 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) synchronize_sched(); } -static void gov_cancel_work(struct cpufreq_policy *policy) -{ - struct policy_dbs_info *policy_dbs = policy->governor_data; - - gov_clear_update_util(policy_dbs->policy); - irq_work_sync(&policy_dbs->irq_work); - cancel_work_sync(&policy_dbs->work); - atomic_set(&policy_dbs->work_count, 0); - policy_dbs->work_in_progress = false; -} - static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, struct dbs_governor *gov) { @@ -389,7 +378,7 @@ static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs, gov->free(policy_dbs); } -static int cpufreq_governor_init(struct cpufreq_policy *policy) +int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data; @@ -429,7 +418,7 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list); - ret = gov->init(dbs_data, !policy->governor->initialized); + ret = gov->init(dbs_data); if (ret) goto free_policy_dbs_info; @@ -458,13 +447,13 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy) goto out; /* Failure, so roll back. */ - pr_err("cpufreq: Governor initialization failed (dbs_data kobject init error %d)\n", ret); + pr_err("initialization failed (dbs_data kobject init error %d)\n", ret); policy->governor_data = NULL; if (!have_governor_per_policy()) gov->gdbs_data = NULL; - gov->exit(dbs_data, !policy->governor->initialized); + gov->exit(dbs_data); kfree(dbs_data); free_policy_dbs_info: @@ -474,8 +463,9 @@ out: mutex_unlock(&gov_dbs_data_mutex); return ret; } +EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_init); -static int cpufreq_governor_exit(struct cpufreq_policy *policy) +void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; @@ -493,17 +483,17 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy) if (!have_governor_per_policy()) gov->gdbs_data = NULL; - gov->exit(dbs_data, policy->governor->initialized == 1); + gov->exit(dbs_data); kfree(dbs_data); } free_policy_dbs_info(policy_dbs, gov); mutex_unlock(&gov_dbs_data_mutex); - return 0; } +EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_exit); -static int cpufreq_governor_start(struct cpufreq_policy *policy) +int cpufreq_dbs_governor_start(struct cpufreq_policy *policy) { struct dbs_governor *gov = dbs_governor_of(policy); struct policy_dbs_info *policy_dbs = policy->governor_data; @@ -539,47 +529,28 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy) gov_set_update_util(policy_dbs, sampling_rate); return 0; } +EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_start); -static int cpufreq_governor_stop(struct cpufreq_policy *policy) +void cpufreq_dbs_governor_stop(struct cpufreq_policy *policy) { - gov_cancel_work(policy); - return 0; + struct policy_dbs_info *policy_dbs = policy->governor_data; + + gov_clear_update_util(policy_dbs->policy); + irq_work_sync(&policy_dbs->irq_work); + cancel_work_sync(&policy_dbs->work); + atomic_set(&policy_dbs->work_count, 0); + policy_dbs->work_in_progress = false; } +EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop); -static int cpufreq_governor_limits(struct cpufreq_policy *policy) +void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy) { struct policy_dbs_info *policy_dbs = policy->governor_data; mutex_lock(&policy_dbs->timer_mutex); - - if (policy->max < policy->cur) - __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > policy->cur) - __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); - + cpufreq_policy_apply_limits(policy); gov_update_sample_delay(policy_dbs, 0); mutex_unlock(&policy_dbs->timer_mutex); - - return 0; -} - -int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) -{ - if (event == CPUFREQ_GOV_POLICY_INIT) { - return cpufreq_governor_init(policy); - } else if (policy->governor_data) { - switch (event) { - case CPUFREQ_GOV_POLICY_EXIT: - return cpufreq_governor_exit(policy); - case CPUFREQ_GOV_START: - return cpufreq_governor_start(policy); - case CPUFREQ_GOV_STOP: - return cpufreq_governor_stop(policy); - case CPUFREQ_GOV_LIMITS: - return cpufreq_governor_limits(policy); - } - } - return -EINVAL; } -EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); +EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_limits); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 34eb214b6d57..ef1037e9c92b 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -138,8 +138,8 @@ struct dbs_governor { unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy); struct policy_dbs_info *(*alloc)(void); void (*free)(struct policy_dbs_info *policy_dbs); - int (*init)(struct dbs_data *dbs_data, bool notify); - void (*exit)(struct dbs_data *dbs_data, bool notify); + int (*init)(struct dbs_data *dbs_data); + void (*exit)(struct dbs_data *dbs_data); void (*start)(struct cpufreq_policy *policy); }; @@ -148,6 +148,25 @@ static inline struct dbs_governor *dbs_governor_of(struct cpufreq_policy *policy return container_of(policy->governor, struct dbs_governor, gov); } +/* Governor callback routines */ +int cpufreq_dbs_governor_init(struct cpufreq_policy *policy); +void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy); +int cpufreq_dbs_governor_start(struct cpufreq_policy *policy); +void cpufreq_dbs_governor_stop(struct cpufreq_policy *policy); +void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy); + +#define CPUFREQ_DBS_GOVERNOR_INITIALIZER(_name_) \ + { \ + .name = _name_, \ + .max_transition_latency = TRANSITION_LATENCY_LIMIT, \ + .owner = THIS_MODULE, \ + .init = cpufreq_dbs_governor_init, \ + .exit = cpufreq_dbs_governor_exit, \ + .start = cpufreq_dbs_governor_start, \ + .stop = cpufreq_dbs_governor_stop, \ + .limits = cpufreq_dbs_governor_limits, \ + } + /* Governor specific operations */ struct od_ops { unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, @@ -155,7 +174,6 @@ struct od_ops { }; unsigned int dbs_update(struct cpufreq_policy *policy); -int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event); void od_register_powersave_bias_handler(unsigned int (*f) (struct cpufreq_policy *, unsigned int, unsigned int), unsigned int powersave_bias); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 300163430516..3a1f49f5f4c6 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -65,34 +65,30 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, { unsigned int freq_req, freq_reduc, freq_avg; unsigned int freq_hi, freq_lo; - unsigned int index = 0; + unsigned int index; unsigned int delay_hi_us; struct policy_dbs_info *policy_dbs = policy->governor_data; struct od_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs); struct dbs_data *dbs_data = policy_dbs->dbs_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; + struct cpufreq_frequency_table *freq_table = policy->freq_table; - if (!dbs_info->freq_table) { + if (!freq_table) { dbs_info->freq_lo = 0; dbs_info->freq_lo_delay_us = 0; return freq_next; } - cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, - relation, &index); - freq_req = dbs_info->freq_table[index].frequency; + index = cpufreq_frequency_table_target(policy, freq_next, relation); + freq_req = freq_table[index].frequency; freq_reduc = freq_req * od_tuners->powersave_bias / 1000; freq_avg = freq_req - freq_reduc; /* Find freq bounds for freq_avg in freq_table */ - index = 0; - cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, - CPUFREQ_RELATION_H, &index); - freq_lo = dbs_info->freq_table[index].frequency; - index = 0; - cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, - CPUFREQ_RELATION_L, &index); - freq_hi = dbs_info->freq_table[index].frequency; + index = cpufreq_table_find_index_h(policy, freq_avg); + freq_lo = freq_table[index].frequency; + index = cpufreq_table_find_index_l(policy, freq_avg); + freq_hi = freq_table[index].frequency; /* Find out how long we have to be in hi and lo freqs */ if (freq_hi == freq_lo) { @@ -113,7 +109,6 @@ static void ondemand_powersave_bias_init(struct cpufreq_policy *policy) { struct od_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data); - dbs_info->freq_table = cpufreq_frequency_get_table(policy->cpu); dbs_info->freq_lo = 0; } @@ -361,17 +356,15 @@ static void od_free(struct policy_dbs_info *policy_dbs) kfree(to_dbs_info(policy_dbs)); } -static int od_init(struct dbs_data *dbs_data, bool notify) +static int od_init(struct dbs_data *dbs_data) { struct od_dbs_tuners *tuners; u64 idle_time; int cpu; tuners = kzalloc(sizeof(*tuners), GFP_KERNEL); - if (!tuners) { - pr_err("%s: kzalloc failed\n", __func__); + if (!tuners) return -ENOMEM; - } cpu = get_cpu(); idle_time = get_cpu_idle_time_us(cpu, NULL); @@ -402,7 +395,7 @@ static int od_init(struct dbs_data *dbs_data, bool notify) return 0; } -static void od_exit(struct dbs_data *dbs_data, bool notify) +static void od_exit(struct dbs_data *dbs_data) { kfree(dbs_data->tuners); } @@ -420,12 +413,7 @@ static struct od_ops od_ops = { }; static struct dbs_governor od_dbs_gov = { - .gov = { - .name = "ondemand", - .governor = cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, - }, + .gov = CPUFREQ_DBS_GOVERNOR_INITIALIZER("ondemand"), .kobj_type = { .default_attrs = od_attributes }, .gov_dbs_timer = od_dbs_timer, .alloc = od_alloc, diff --git a/drivers/cpufreq/cpufreq_ondemand.h b/drivers/cpufreq/cpufreq_ondemand.h index f0121db3cd9e..640ea4e97106 100644 --- a/drivers/cpufreq/cpufreq_ondemand.h +++ b/drivers/cpufreq/cpufreq_ondemand.h @@ -13,7 +13,6 @@ struct od_policy_dbs_info { struct policy_dbs_info policy_dbs; - struct cpufreq_frequency_table *freq_table; unsigned int freq_lo; unsigned int freq_lo_delay_us; unsigned int freq_hi_delay_us; diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c index af9f4b96f5a8..dafb679adc58 100644 --- a/drivers/cpufreq/cpufreq_performance.c +++ b/drivers/cpufreq/cpufreq_performance.c @@ -16,27 +16,16 @@ #include <linux/init.h> #include <linux/module.h> -static int cpufreq_governor_performance(struct cpufreq_policy *policy, - unsigned int event) +static void cpufreq_gov_performance_limits(struct cpufreq_policy *policy) { - switch (event) { - case CPUFREQ_GOV_START: - case CPUFREQ_GOV_LIMITS: - pr_debug("setting to %u kHz because of event %u\n", - policy->max, event); - __cpufreq_driver_target(policy, policy->max, - CPUFREQ_RELATION_H); - break; - default: - break; - } - return 0; + pr_debug("setting to %u kHz\n", policy->max); + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); } static struct cpufreq_governor cpufreq_gov_performance = { .name = "performance", - .governor = cpufreq_governor_performance, .owner = THIS_MODULE, + .limits = cpufreq_gov_performance_limits, }; static int __init cpufreq_gov_performance_init(void) diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c index b8b400232a74..78a651038faf 100644 --- a/drivers/cpufreq/cpufreq_powersave.c +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -16,26 +16,15 @@ #include <linux/init.h> #include <linux/module.h> -static int cpufreq_governor_powersave(struct cpufreq_policy *policy, - unsigned int event) +static void cpufreq_gov_powersave_limits(struct cpufreq_policy *policy) { - switch (event) { - case CPUFREQ_GOV_START: - case CPUFREQ_GOV_LIMITS: - pr_debug("setting to %u kHz because of event %u\n", - policy->min, event); - __cpufreq_driver_target(policy, policy->min, - CPUFREQ_RELATION_L); - break; - default: - break; - } - return 0; + pr_debug("setting to %u kHz\n", policy->min); + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); } static struct cpufreq_governor cpufreq_gov_powersave = { .name = "powersave", - .governor = cpufreq_governor_powersave, + .limits = cpufreq_gov_powersave_limits, .owner = THIS_MODULE, }; diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 5e370a30a964..06d3abdffd3a 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -15,7 +15,7 @@ #include <linux/slab.h> #include <linux/cputime.h> -static spinlock_t cpufreq_stats_lock; +static DEFINE_SPINLOCK(cpufreq_stats_lock); struct cpufreq_stats { unsigned int total_trans; @@ -52,6 +52,9 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf) ssize_t len = 0; int i; + if (policy->fast_switch_enabled) + return 0; + cpufreq_stats_update(stats); for (i = 0; i < stats->state_num; i++) { len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i], @@ -68,6 +71,9 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) ssize_t len = 0; int i, j; + if (policy->fast_switch_enabled) + return 0; + len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += snprintf(buf + len, PAGE_SIZE - len, " : "); for (i = 0; i < stats->state_num; i++) { @@ -130,7 +136,7 @@ static int freq_table_get_index(struct cpufreq_stats *stats, unsigned int freq) return -1; } -static void __cpufreq_stats_free_table(struct cpufreq_policy *policy) +void cpufreq_stats_free_table(struct cpufreq_policy *policy) { struct cpufreq_stats *stats = policy->stats; @@ -146,39 +152,25 @@ static void __cpufreq_stats_free_table(struct cpufreq_policy *policy) policy->stats = NULL; } -static void cpufreq_stats_free_table(unsigned int cpu) -{ - struct cpufreq_policy *policy; - - policy = cpufreq_cpu_get(cpu); - if (!policy) - return; - - __cpufreq_stats_free_table(policy); - - cpufreq_cpu_put(policy); -} - -static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) +void cpufreq_stats_create_table(struct cpufreq_policy *policy) { unsigned int i = 0, count = 0, ret = -ENOMEM; struct cpufreq_stats *stats; unsigned int alloc_size; - unsigned int cpu = policy->cpu; struct cpufreq_frequency_table *pos, *table; /* We need cpufreq table for creating stats table */ - table = cpufreq_frequency_get_table(cpu); + table = policy->freq_table; if (unlikely(!table)) - return 0; + return; /* stats already initialized */ if (policy->stats) - return -EEXIST; + return; stats = kzalloc(sizeof(*stats), GFP_KERNEL); if (!stats) - return -ENOMEM; + return; /* Find total allocation size */ cpufreq_for_each_valid_entry(pos, table) @@ -215,80 +207,32 @@ static int __cpufreq_stats_create_table(struct cpufreq_policy *policy) policy->stats = stats; ret = sysfs_create_group(&policy->kobj, &stats_attr_group); if (!ret) - return 0; + return; /* We failed, release resources */ policy->stats = NULL; kfree(stats->time_in_state); free_stat: kfree(stats); - - return ret; -} - -static void cpufreq_stats_create_table(unsigned int cpu) -{ - struct cpufreq_policy *policy; - - /* - * "likely(!policy)" because normally cpufreq_stats will be registered - * before cpufreq driver - */ - policy = cpufreq_cpu_get(cpu); - if (likely(!policy)) - return; - - __cpufreq_stats_create_table(policy); - - cpufreq_cpu_put(policy); } -static int cpufreq_stat_notifier_policy(struct notifier_block *nb, - unsigned long val, void *data) +void cpufreq_stats_record_transition(struct cpufreq_policy *policy, + unsigned int new_freq) { - int ret = 0; - struct cpufreq_policy *policy = data; - - if (val == CPUFREQ_CREATE_POLICY) - ret = __cpufreq_stats_create_table(policy); - else if (val == CPUFREQ_REMOVE_POLICY) - __cpufreq_stats_free_table(policy); - - return ret; -} - -static int cpufreq_stat_notifier_trans(struct notifier_block *nb, - unsigned long val, void *data) -{ - struct cpufreq_freqs *freq = data; - struct cpufreq_policy *policy = cpufreq_cpu_get(freq->cpu); - struct cpufreq_stats *stats; + struct cpufreq_stats *stats = policy->stats; int old_index, new_index; - if (!policy) { - pr_err("%s: No policy found\n", __func__); - return 0; - } - - if (val != CPUFREQ_POSTCHANGE) - goto put_policy; - - if (!policy->stats) { + if (!stats) { pr_debug("%s: No stats found\n", __func__); - goto put_policy; + return; } - stats = policy->stats; - old_index = stats->last_index; - new_index = freq_table_get_index(stats, freq->new); + new_index = freq_table_get_index(stats, new_freq); /* We can't do stats->time_in_state[-1]= .. */ - if (old_index == -1 || new_index == -1) - goto put_policy; - - if (old_index == new_index) - goto put_policy; + if (old_index == -1 || new_index == -1 || old_index == new_index) + return; cpufreq_stats_update(stats); @@ -297,61 +241,4 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, stats->trans_table[old_index * stats->max_state + new_index]++; #endif stats->total_trans++; - -put_policy: - cpufreq_cpu_put(policy); - return 0; } - -static struct notifier_block notifier_policy_block = { - .notifier_call = cpufreq_stat_notifier_policy -}; - -static struct notifier_block notifier_trans_block = { - .notifier_call = cpufreq_stat_notifier_trans -}; - -static int __init cpufreq_stats_init(void) -{ - int ret; - unsigned int cpu; - - spin_lock_init(&cpufreq_stats_lock); - ret = cpufreq_register_notifier(¬ifier_policy_block, - CPUFREQ_POLICY_NOTIFIER); - if (ret) - return ret; - - for_each_online_cpu(cpu) - cpufreq_stats_create_table(cpu); - - ret = cpufreq_register_notifier(¬ifier_trans_block, - CPUFREQ_TRANSITION_NOTIFIER); - if (ret) { - cpufreq_unregister_notifier(¬ifier_policy_block, - CPUFREQ_POLICY_NOTIFIER); - for_each_online_cpu(cpu) - cpufreq_stats_free_table(cpu); - return ret; - } - - return 0; -} -static void __exit cpufreq_stats_exit(void) -{ - unsigned int cpu; - - cpufreq_unregister_notifier(¬ifier_policy_block, - CPUFREQ_POLICY_NOTIFIER); - cpufreq_unregister_notifier(¬ifier_trans_block, - CPUFREQ_TRANSITION_NOTIFIER); - for_each_online_cpu(cpu) - cpufreq_stats_free_table(cpu); -} - -MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>"); -MODULE_DESCRIPTION("Export cpufreq stats via sysfs"); -MODULE_LICENSE("GPL"); - -module_init(cpufreq_stats_init); -module_exit(cpufreq_stats_exit); diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c index 9f3dec9a3f36..bd897e3e134d 100644 --- a/drivers/cpufreq/cpufreq_userspace.c +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -65,66 +65,66 @@ static int cpufreq_userspace_policy_init(struct cpufreq_policy *policy) return 0; } -static int cpufreq_governor_userspace(struct cpufreq_policy *policy, - unsigned int event) +static void cpufreq_userspace_policy_exit(struct cpufreq_policy *policy) +{ + mutex_lock(&userspace_mutex); + kfree(policy->governor_data); + policy->governor_data = NULL; + mutex_unlock(&userspace_mutex); +} + +static int cpufreq_userspace_policy_start(struct cpufreq_policy *policy) { unsigned int *setspeed = policy->governor_data; - unsigned int cpu = policy->cpu; - int rc = 0; - if (event == CPUFREQ_GOV_POLICY_INIT) - return cpufreq_userspace_policy_init(policy); + BUG_ON(!policy->cur); + pr_debug("started managing cpu %u\n", policy->cpu); - if (!setspeed) - return -EINVAL; - - switch (event) { - case CPUFREQ_GOV_POLICY_EXIT: - mutex_lock(&userspace_mutex); - policy->governor_data = NULL; - kfree(setspeed); - mutex_unlock(&userspace_mutex); - break; - case CPUFREQ_GOV_START: - BUG_ON(!policy->cur); - pr_debug("started managing cpu %u\n", cpu); - - mutex_lock(&userspace_mutex); - per_cpu(cpu_is_managed, cpu) = 1; - *setspeed = policy->cur; - mutex_unlock(&userspace_mutex); - break; - case CPUFREQ_GOV_STOP: - pr_debug("managing cpu %u stopped\n", cpu); - - mutex_lock(&userspace_mutex); - per_cpu(cpu_is_managed, cpu) = 0; - *setspeed = 0; - mutex_unlock(&userspace_mutex); - break; - case CPUFREQ_GOV_LIMITS: - mutex_lock(&userspace_mutex); - pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n", - cpu, policy->min, policy->max, policy->cur, *setspeed); - - if (policy->max < *setspeed) - __cpufreq_driver_target(policy, policy->max, - CPUFREQ_RELATION_H); - else if (policy->min > *setspeed) - __cpufreq_driver_target(policy, policy->min, - CPUFREQ_RELATION_L); - else - __cpufreq_driver_target(policy, *setspeed, - CPUFREQ_RELATION_L); - mutex_unlock(&userspace_mutex); - break; - } - return rc; + mutex_lock(&userspace_mutex); + per_cpu(cpu_is_managed, policy->cpu) = 1; + *setspeed = policy->cur; + mutex_unlock(&userspace_mutex); + return 0; +} + +static void cpufreq_userspace_policy_stop(struct cpufreq_policy *policy) +{ + unsigned int *setspeed = policy->governor_data; + + pr_debug("managing cpu %u stopped\n", policy->cpu); + + mutex_lock(&userspace_mutex); + per_cpu(cpu_is_managed, policy->cpu) = 0; + *setspeed = 0; + mutex_unlock(&userspace_mutex); +} + +static void cpufreq_userspace_policy_limits(struct cpufreq_policy *policy) +{ + unsigned int *setspeed = policy->governor_data; + + mutex_lock(&userspace_mutex); + + pr_debug("limit event for cpu %u: %u - %u kHz, currently %u kHz, last set to %u kHz\n", + policy->cpu, policy->min, policy->max, policy->cur, *setspeed); + + if (policy->max < *setspeed) + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); + else if (policy->min > *setspeed) + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); + else + __cpufreq_driver_target(policy, *setspeed, CPUFREQ_RELATION_L); + + mutex_unlock(&userspace_mutex); } static struct cpufreq_governor cpufreq_gov_userspace = { .name = "userspace", - .governor = cpufreq_governor_userspace, + .init = cpufreq_userspace_policy_init, + .exit = cpufreq_userspace_policy_exit, + .start = cpufreq_userspace_policy_start, + .stop = cpufreq_userspace_policy_stop, + .limits = cpufreq_userspace_policy_limits, .store_setspeed = cpufreq_set, .show_setspeed = show_speed, .owner = THIS_MODULE, diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c index 7e336d20c184..b95a872800ec 100644 --- a/drivers/cpufreq/davinci-cpufreq.c +++ b/drivers/cpufreq/davinci-cpufreq.c @@ -38,26 +38,6 @@ struct davinci_cpufreq { }; static struct davinci_cpufreq cpufreq; -static int davinci_verify_speed(struct cpufreq_policy *policy) -{ - struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data; - struct cpufreq_frequency_table *freq_table = pdata->freq_table; - struct clk *armclk = cpufreq.armclk; - - if (freq_table) - return cpufreq_frequency_table_verify(policy, freq_table); - - if (policy->cpu) - return -EINVAL; - - cpufreq_verify_within_cpu_limits(policy); - policy->min = clk_round_rate(armclk, policy->min * 1000) / 1000; - policy->max = clk_round_rate(armclk, policy->max * 1000) / 1000; - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, - policy->cpuinfo.max_freq); - return 0; -} - static int davinci_target(struct cpufreq_policy *policy, unsigned int idx) { struct davinci_cpufreq_config *pdata = cpufreq.dev->platform_data; @@ -121,7 +101,7 @@ static int davinci_cpu_init(struct cpufreq_policy *policy) static struct cpufreq_driver davinci_driver = { .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, - .verify = davinci_verify_speed, + .verify = cpufreq_generic_frequency_table_verify, .target_index = davinci_target, .get = cpufreq_generic_get, .init = davinci_cpu_init, diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index a8f1daffc9bc..3bbbf9e6960c 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -63,8 +63,6 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, else return 0; } -EXPORT_SYMBOL_GPL(cpufreq_frequency_table_cpuinfo); - int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table) @@ -108,20 +106,16 @@ EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify); */ int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy) { - struct cpufreq_frequency_table *table = - cpufreq_frequency_get_table(policy->cpu); - if (!table) + if (!policy->freq_table) return -ENODEV; - return cpufreq_frequency_table_verify(policy, table); + return cpufreq_frequency_table_verify(policy, policy->freq_table); } EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify); -int cpufreq_frequency_table_target(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table, - unsigned int target_freq, - unsigned int relation, - unsigned int *index) +int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) { struct cpufreq_frequency_table optimal = { .driver_data = ~0, @@ -132,7 +126,9 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, .frequency = 0, }; struct cpufreq_frequency_table *pos; + struct cpufreq_frequency_table *table = policy->freq_table; unsigned int freq, diff, i = 0; + int index; pr_debug("request for target %u kHz (relation: %u) for cpu %u\n", target_freq, relation, policy->cpu); @@ -196,25 +192,26 @@ int cpufreq_frequency_table_target(struct cpufreq_policy *policy, } } if (optimal.driver_data > i) { - if (suboptimal.driver_data > i) - return -EINVAL; - *index = suboptimal.driver_data; - } else - *index = optimal.driver_data; + if (suboptimal.driver_data > i) { + WARN(1, "Invalid frequency table: %d\n", policy->cpu); + return 0; + } - pr_debug("target index is %u, freq is:%u kHz\n", *index, - table[*index].frequency); + index = suboptimal.driver_data; + } else + index = optimal.driver_data; - return 0; + pr_debug("target index is %u, freq is:%u kHz\n", index, + table[index].frequency); + return index; } -EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target); +EXPORT_SYMBOL_GPL(cpufreq_table_index_unsorted); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq) { - struct cpufreq_frequency_table *pos, *table; + struct cpufreq_frequency_table *pos, *table = policy->freq_table; - table = cpufreq_frequency_get_table(policy->cpu); if (unlikely(!table)) { pr_debug("%s: Unable to find frequency table\n", __func__); return -ENOENT; @@ -300,15 +297,72 @@ struct freq_attr *cpufreq_generic_attr[] = { }; EXPORT_SYMBOL_GPL(cpufreq_generic_attr); +static int set_freq_table_sorted(struct cpufreq_policy *policy) +{ + struct cpufreq_frequency_table *pos, *table = policy->freq_table; + struct cpufreq_frequency_table *prev = NULL; + int ascending = 0; + + policy->freq_table_sorted = CPUFREQ_TABLE_UNSORTED; + + cpufreq_for_each_valid_entry(pos, table) { + if (!prev) { + prev = pos; + continue; + } + + if (pos->frequency == prev->frequency) { + pr_warn("Duplicate freq-table entries: %u\n", + pos->frequency); + return -EINVAL; + } + + /* Frequency increased from prev to pos */ + if (pos->frequency > prev->frequency) { + /* But frequency was decreasing earlier */ + if (ascending < 0) { + pr_debug("Freq table is unsorted\n"); + return 0; + } + + ascending++; + } else { + /* Frequency decreased from prev to pos */ + + /* But frequency was increasing earlier */ + if (ascending > 0) { + pr_debug("Freq table is unsorted\n"); + return 0; + } + + ascending--; + } + + prev = pos; + } + + if (ascending > 0) + policy->freq_table_sorted = CPUFREQ_TABLE_SORTED_ASCENDING; + else + policy->freq_table_sorted = CPUFREQ_TABLE_SORTED_DESCENDING; + + pr_debug("Freq table is sorted in %s order\n", + ascending > 0 ? "ascending" : "descending"); + + return 0; +} + int cpufreq_table_validate_and_show(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table) { - int ret = cpufreq_frequency_table_cpuinfo(policy, table); + int ret; - if (!ret) - policy->freq_table = table; + ret = cpufreq_frequency_table_cpuinfo(policy, table); + if (ret) + return ret; - return ret; + policy->freq_table = table; + return set_freq_table_sorted(policy); } EXPORT_SYMBOL_GPL(cpufreq_table_validate_and_show); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 28690b284846..9ec033b4f2d9 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -97,7 +97,6 @@ static inline u64 div_ext_fp(u64 x, u64 y) * read from MPERF MSR between last and current sample * @tsc: Difference of time stamp counter between last and * current sample - * @freq: Effective frequency calculated from APERF/MPERF * @time: Current time from scheduler * * This structure is used in the cpudata structure to store performance sample @@ -109,7 +108,6 @@ struct sample { u64 aperf; u64 mperf; u64 tsc; - int freq; u64 time; }; @@ -282,9 +280,9 @@ struct cpu_defaults { static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu); static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu); -static struct pstate_adjust_policy pid_params; -static struct pstate_funcs pstate_funcs; -static int hwp_active; +static struct pstate_adjust_policy pid_params __read_mostly; +static struct pstate_funcs pstate_funcs __read_mostly; +static int hwp_active __read_mostly; #ifdef CONFIG_ACPI static bool acpi_ppc; @@ -808,7 +806,8 @@ static void __init intel_pstate_sysfs_expose_params(void) static void intel_pstate_hwp_enable(struct cpudata *cpudata) { /* First disable HWP notification interrupt as we don't process them */ - wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); + if (static_cpu_has(X86_FEATURE_HWP_NOTIFY)) + wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x00); wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } @@ -945,7 +944,7 @@ static int core_get_max_pstate(void) if (err) goto skip_tar; - tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl; + tdp_msr = MSR_CONFIG_TDP_NOMINAL + (tdp_ctrl & 0x3); err = rdmsrl_safe(tdp_msr, &tdp_ratio); if (err) goto skip_tar; @@ -973,7 +972,7 @@ static int core_get_turbo_pstate(void) u64 value; int nont, ret; - rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); + rdmsrl(MSR_TURBO_RATIO_LIMIT, value); nont = core_get_max_pstate(); ret = (value) & 255; if (ret <= nont) @@ -1002,7 +1001,7 @@ static int knl_get_turbo_pstate(void) u64 value; int nont, ret; - rdmsrl(MSR_NHM_TURBO_RATIO_LIMIT, value); + rdmsrl(MSR_TURBO_RATIO_LIMIT, value); nont = core_get_max_pstate(); ret = (((value) >> 8) & 0xFF); if (ret <= nont) @@ -1092,6 +1091,26 @@ static struct cpu_defaults knl_params = { }, }; +static struct cpu_defaults bxt_params = { + .pid_policy = { + .sample_rate_ms = 10, + .deadband = 0, + .setpoint = 60, + .p_gain_pct = 14, + .d_gain_pct = 0, + .i_gain_pct = 4, + }, + .funcs = { + .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, + .get_min = core_get_min_pstate, + .get_turbo = core_get_turbo_pstate, + .get_scaling = core_get_scaling, + .get_val = core_get_val, + .get_target_pstate = get_target_pstate_use_cpu_load, + }, +}; + static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) { int max_perf = cpu->pstate.turbo_pstate; @@ -1114,17 +1133,12 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } -static inline void intel_pstate_record_pstate(struct cpudata *cpu, int pstate) -{ - trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); - cpu->pstate.current_pstate = pstate; -} - static void intel_pstate_set_min_pstate(struct cpudata *cpu) { int pstate = cpu->pstate.min_pstate; - intel_pstate_record_pstate(cpu, pstate); + trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); + cpu->pstate.current_pstate = pstate; /* * Generally, there is no guarantee that this code will always run on * the CPU being updated, so force the register update to run on the @@ -1284,10 +1298,11 @@ static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) intel_pstate_get_min_max(cpu, &min_perf, &max_perf); pstate = clamp_t(int, pstate, min_perf, max_perf); + trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); if (pstate == cpu->pstate.current_pstate) return; - intel_pstate_record_pstate(cpu, pstate); + cpu->pstate.current_pstate = pstate; wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate)); } @@ -1352,11 +1367,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = { ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_params), ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), ICPU(INTEL_FAM6_XEON_PHI_KNL, knl_params), + ICPU(INTEL_FAM6_ATOM_GOLDMONT, bxt_params), {} }; MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); -static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = { +static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), {} }; @@ -1576,12 +1592,12 @@ static struct cpufreq_driver intel_pstate_driver = { .name = "intel_pstate", }; -static int __initdata no_load; -static int __initdata no_hwp; -static int __initdata hwp_only; -static unsigned int force_load; +static int no_load __initdata; +static int no_hwp __initdata; +static int hwp_only __initdata; +static unsigned int force_load __initdata; -static int intel_pstate_msrs_not_valid(void) +static int __init intel_pstate_msrs_not_valid(void) { if (!pstate_funcs.get_max() || !pstate_funcs.get_min() || @@ -1591,7 +1607,7 @@ static int intel_pstate_msrs_not_valid(void) return 0; } -static void copy_pid_params(struct pstate_adjust_policy *policy) +static void __init copy_pid_params(struct pstate_adjust_policy *policy) { pid_params.sample_rate_ms = policy->sample_rate_ms; pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC; @@ -1602,7 +1618,7 @@ static void copy_pid_params(struct pstate_adjust_policy *policy) pid_params.setpoint = policy->setpoint; } -static void copy_cpu_funcs(struct pstate_funcs *funcs) +static void __init copy_cpu_funcs(struct pstate_funcs *funcs) { pstate_funcs.get_max = funcs->get_max; pstate_funcs.get_max_physical = funcs->get_max_physical; @@ -1617,7 +1633,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) #ifdef CONFIG_ACPI -static bool intel_pstate_no_acpi_pss(void) +static bool __init intel_pstate_no_acpi_pss(void) { int i; @@ -1646,7 +1662,7 @@ static bool intel_pstate_no_acpi_pss(void) return true; } -static bool intel_pstate_has_acpi_ppc(void) +static bool __init intel_pstate_has_acpi_ppc(void) { int i; @@ -1674,7 +1690,7 @@ struct hw_vendor_info { }; /* Hardware vendor-specific info that has its own power management modes */ -static struct hw_vendor_info vendor_info[] = { +static struct hw_vendor_info vendor_info[] __initdata = { {1, "HP ", "ProLiant", PSS}, {1, "ORACLE", "X4-2 ", PPC}, {1, "ORACLE", "X4-2L ", PPC}, @@ -1693,7 +1709,7 @@ static struct hw_vendor_info vendor_info[] = { {0, "", ""}, }; -static bool intel_pstate_platform_pwr_mgmt_exists(void) +static bool __init intel_pstate_platform_pwr_mgmt_exists(void) { struct acpi_table_header hdr; struct hw_vendor_info *v_info; diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c index e920889b9ac2..ed915ee85dd9 100644 --- a/drivers/cpufreq/mvebu-cpufreq.c +++ b/drivers/cpufreq/mvebu-cpufreq.c @@ -70,7 +70,7 @@ static int __init armada_xp_pmsu_cpufreq_init(void) continue; } - clk = clk_get(cpu_dev, 0); + clk = clk_get(cpu_dev, NULL); if (IS_ERR(clk)) { pr_err("Cannot get clock for CPU %d\n", cpu); return PTR_ERR(clk); diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c index a7ecb9a84c15..3f0ce2ae35ee 100644 --- a/drivers/cpufreq/pcc-cpufreq.c +++ b/drivers/cpufreq/pcc-cpufreq.c @@ -555,8 +555,6 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) policy->min = policy->cpuinfo.min_freq = ioread32(&pcch_hdr->minimum_frequency) * 1000; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - pr_debug("init: policy->max is %d, policy->min is %d\n", policy->max, policy->min); out: diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 6bd715b7f11c..87796e0864e9 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -64,12 +64,14 @@ /** * struct global_pstate_info - Per policy data structure to maintain history of * global pstates - * @highest_lpstate: The local pstate from which we are ramping down + * @highest_lpstate_idx: The local pstate index from which we are + * ramping down * @elapsed_time: Time in ms spent in ramping down from - * highest_lpstate + * highest_lpstate_idx * @last_sampled_time: Time from boot in ms when global pstates were * last set - * @last_lpstate,last_gpstate: Last set values for local and global pstates + * @last_lpstate_idx, Last set value of local pstate and global + * last_gpstate_idx pstate in terms of cpufreq table index * @timer: Is used for ramping down if cpu goes idle for * a long time with global pstate held high * @gpstate_lock: A spinlock to maintain synchronization between @@ -77,11 +79,11 @@ * governer's target_index calls */ struct global_pstate_info { - int highest_lpstate; + int highest_lpstate_idx; unsigned int elapsed_time; unsigned int last_sampled_time; - int last_lpstate; - int last_gpstate; + int last_lpstate_idx; + int last_gpstate_idx; spinlock_t gpstate_lock; struct timer_list timer; }; @@ -124,29 +126,47 @@ static int nr_chips; static DEFINE_PER_CPU(struct chip *, chip_info); /* - * Note: The set of pstates consists of contiguous integers, the - * smallest of which is indicated by powernv_pstate_info.min, the - * largest of which is indicated by powernv_pstate_info.max. + * Note: + * The set of pstates consists of contiguous integers. + * powernv_pstate_info stores the index of the frequency table for + * max, min and nominal frequencies. It also stores number of + * available frequencies. * - * The nominal pstate is the highest non-turbo pstate in this - * platform. This is indicated by powernv_pstate_info.nominal. + * powernv_pstate_info.nominal indicates the index to the highest + * non-turbo frequency. */ static struct powernv_pstate_info { - int min; - int max; - int nominal; - int nr_pstates; + unsigned int min; + unsigned int max; + unsigned int nominal; + unsigned int nr_pstates; } powernv_pstate_info; +/* Use following macros for conversions between pstate_id and index */ +static inline int idx_to_pstate(unsigned int i) +{ + return powernv_freqs[i].driver_data; +} + +static inline unsigned int pstate_to_idx(int pstate) +{ + /* + * abs() is deliberately used so that is works with + * both monotonically increasing and decreasing + * pstate values + */ + return abs(pstate - idx_to_pstate(powernv_pstate_info.max)); +} + static inline void reset_gpstates(struct cpufreq_policy *policy) { struct global_pstate_info *gpstates = policy->driver_data; - gpstates->highest_lpstate = 0; + gpstates->highest_lpstate_idx = 0; gpstates->elapsed_time = 0; gpstates->last_sampled_time = 0; - gpstates->last_lpstate = 0; - gpstates->last_gpstate = 0; + gpstates->last_lpstate_idx = 0; + gpstates->last_gpstate_idx = 0; } /* @@ -156,9 +176,10 @@ static inline void reset_gpstates(struct cpufreq_policy *policy) static int init_powernv_pstates(void) { struct device_node *power_mgt; - int i, pstate_min, pstate_max, pstate_nominal, nr_pstates = 0; + int i, nr_pstates = 0; const __be32 *pstate_ids, *pstate_freqs; u32 len_ids, len_freqs; + u32 pstate_min, pstate_max, pstate_nominal; power_mgt = of_find_node_by_path("/ibm,opal/power-mgt"); if (!power_mgt) { @@ -208,6 +229,7 @@ static int init_powernv_pstates(void) return -ENODEV; } + powernv_pstate_info.nr_pstates = nr_pstates; pr_debug("NR PStates %d\n", nr_pstates); for (i = 0; i < nr_pstates; i++) { u32 id = be32_to_cpu(pstate_ids[i]); @@ -216,15 +238,17 @@ static int init_powernv_pstates(void) pr_debug("PState id %d freq %d MHz\n", id, freq); powernv_freqs[i].frequency = freq * 1000; /* kHz */ powernv_freqs[i].driver_data = id; + + if (id == pstate_max) + powernv_pstate_info.max = i; + else if (id == pstate_nominal) + powernv_pstate_info.nominal = i; + else if (id == pstate_min) + powernv_pstate_info.min = i; } + /* End of list marker entry */ powernv_freqs[i].frequency = CPUFREQ_TABLE_END; - - powernv_pstate_info.min = pstate_min; - powernv_pstate_info.max = pstate_max; - powernv_pstate_info.nominal = pstate_nominal; - powernv_pstate_info.nr_pstates = nr_pstates; - return 0; } @@ -233,12 +257,12 @@ static unsigned int pstate_id_to_freq(int pstate_id) { int i; - i = powernv_pstate_info.max - pstate_id; + i = pstate_to_idx(pstate_id); if (i >= powernv_pstate_info.nr_pstates || i < 0) { pr_warn("PState id %d outside of PState table, " "reporting nominal id %d instead\n", - pstate_id, powernv_pstate_info.nominal); - i = powernv_pstate_info.max - powernv_pstate_info.nominal; + pstate_id, idx_to_pstate(powernv_pstate_info.nominal)); + i = powernv_pstate_info.nominal; } return powernv_freqs[i].frequency; @@ -252,7 +276,7 @@ static ssize_t cpuinfo_nominal_freq_show(struct cpufreq_policy *policy, char *buf) { return sprintf(buf, "%u\n", - pstate_id_to_freq(powernv_pstate_info.nominal)); + powernv_freqs[powernv_pstate_info.nominal].frequency); } struct freq_attr cpufreq_freq_attr_cpuinfo_nominal_freq = @@ -426,7 +450,7 @@ static void set_pstate(void *data) */ static inline unsigned int get_nominal_index(void) { - return powernv_pstate_info.max - powernv_pstate_info.nominal; + return powernv_pstate_info.nominal; } static void powernv_cpufreq_throttle_check(void *data) @@ -435,20 +459,22 @@ static void powernv_cpufreq_throttle_check(void *data) unsigned int cpu = smp_processor_id(); unsigned long pmsr; int pmsr_pmax; + unsigned int pmsr_pmax_idx; pmsr = get_pmspr(SPRN_PMSR); chip = this_cpu_read(chip_info); /* Check for Pmax Capping */ pmsr_pmax = (s8)PMSR_MAX(pmsr); - if (pmsr_pmax != powernv_pstate_info.max) { + pmsr_pmax_idx = pstate_to_idx(pmsr_pmax); + if (pmsr_pmax_idx != powernv_pstate_info.max) { if (chip->throttled) goto next; chip->throttled = true; - if (pmsr_pmax < powernv_pstate_info.nominal) { - pr_warn_once("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", + if (pmsr_pmax_idx > powernv_pstate_info.nominal) { + pr_warn_once("CPU %d on Chip %u has Pmax(%d) reduced below nominal frequency(%d)\n", cpu, chip->id, pmsr_pmax, - powernv_pstate_info.nominal); + idx_to_pstate(powernv_pstate_info.nominal)); chip->throttle_sub_turbo++; } else { chip->throttle_turbo++; @@ -484,34 +510,35 @@ next: /** * calc_global_pstate - Calculate global pstate - * @elapsed_time: Elapsed time in milliseconds - * @local_pstate: New local pstate - * @highest_lpstate: pstate from which its ramping down + * @elapsed_time: Elapsed time in milliseconds + * @local_pstate_idx: New local pstate + * @highest_lpstate_idx: pstate from which its ramping down * * Finds the appropriate global pstate based on the pstate from which its * ramping down and the time elapsed in ramping down. It follows a quadratic * equation which ensures that it reaches ramping down to pmin in 5sec. */ static inline int calc_global_pstate(unsigned int elapsed_time, - int highest_lpstate, int local_pstate) + int highest_lpstate_idx, + int local_pstate_idx) { - int pstate_diff; + int index_diff; /* * Using ramp_down_percent we get the percentage of rampdown * that we are expecting to be dropping. Difference between - * highest_lpstate and powernv_pstate_info.min will give a absolute + * highest_lpstate_idx and powernv_pstate_info.min will give a absolute * number of how many pstates we will drop eventually by the end of * 5 seconds, then just scale it get the number pstates to be dropped. */ - pstate_diff = ((int)ramp_down_percent(elapsed_time) * - (highest_lpstate - powernv_pstate_info.min)) / 100; + index_diff = ((int)ramp_down_percent(elapsed_time) * + (powernv_pstate_info.min - highest_lpstate_idx)) / 100; /* Ensure that global pstate is >= to local pstate */ - if (highest_lpstate - pstate_diff < local_pstate) - return local_pstate; + if (highest_lpstate_idx + index_diff >= local_pstate_idx) + return local_pstate_idx; else - return highest_lpstate - pstate_diff; + return highest_lpstate_idx + index_diff; } static inline void queue_gpstate_timer(struct global_pstate_info *gpstates) @@ -546,7 +573,7 @@ void gpstate_timer_handler(unsigned long data) { struct cpufreq_policy *policy = (struct cpufreq_policy *)data; struct global_pstate_info *gpstates = policy->driver_data; - int gpstate_id; + int gpstate_idx; unsigned int time_diff = jiffies_to_msecs(jiffies) - gpstates->last_sampled_time; struct powernv_smp_call_data freq_data; @@ -556,29 +583,29 @@ void gpstate_timer_handler(unsigned long data) gpstates->last_sampled_time += time_diff; gpstates->elapsed_time += time_diff; - freq_data.pstate_id = gpstates->last_lpstate; + freq_data.pstate_id = idx_to_pstate(gpstates->last_lpstate_idx); - if ((gpstates->last_gpstate == freq_data.pstate_id) || + if ((gpstates->last_gpstate_idx == gpstates->last_lpstate_idx) || (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME)) { - gpstate_id = freq_data.pstate_id; + gpstate_idx = pstate_to_idx(freq_data.pstate_id); reset_gpstates(policy); - gpstates->highest_lpstate = freq_data.pstate_id; + gpstates->highest_lpstate_idx = gpstate_idx; } else { - gpstate_id = calc_global_pstate(gpstates->elapsed_time, - gpstates->highest_lpstate, - freq_data.pstate_id); + gpstate_idx = calc_global_pstate(gpstates->elapsed_time, + gpstates->highest_lpstate_idx, + freq_data.pstate_id); } /* * If local pstate is equal to global pstate, rampdown is over * So timer is not required to be queued. */ - if (gpstate_id != freq_data.pstate_id) + if (gpstate_idx != gpstates->last_lpstate_idx) queue_gpstate_timer(gpstates); - freq_data.gpstate_id = gpstate_id; - gpstates->last_gpstate = freq_data.gpstate_id; - gpstates->last_lpstate = freq_data.pstate_id; + freq_data.gpstate_id = idx_to_pstate(gpstate_idx); + gpstates->last_gpstate_idx = pstate_to_idx(freq_data.gpstate_id); + gpstates->last_lpstate_idx = pstate_to_idx(freq_data.pstate_id); spin_unlock(&gpstates->gpstate_lock); @@ -595,7 +622,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, unsigned int new_index) { struct powernv_smp_call_data freq_data; - unsigned int cur_msec, gpstate_id; + unsigned int cur_msec, gpstate_idx; struct global_pstate_info *gpstates = policy->driver_data; if (unlikely(rebooting) && new_index != get_nominal_index()) @@ -607,15 +634,15 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, cur_msec = jiffies_to_msecs(get_jiffies_64()); spin_lock(&gpstates->gpstate_lock); - freq_data.pstate_id = powernv_freqs[new_index].driver_data; + freq_data.pstate_id = idx_to_pstate(new_index); if (!gpstates->last_sampled_time) { - gpstate_id = freq_data.pstate_id; - gpstates->highest_lpstate = freq_data.pstate_id; + gpstate_idx = new_index; + gpstates->highest_lpstate_idx = new_index; goto gpstates_done; } - if (gpstates->last_gpstate > freq_data.pstate_id) { + if (gpstates->last_gpstate_idx < new_index) { gpstates->elapsed_time += cur_msec - gpstates->last_sampled_time; @@ -626,34 +653,34 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, */ if (gpstates->elapsed_time > MAX_RAMP_DOWN_TIME) { reset_gpstates(policy); - gpstates->highest_lpstate = freq_data.pstate_id; - gpstate_id = freq_data.pstate_id; + gpstates->highest_lpstate_idx = new_index; + gpstate_idx = new_index; } else { /* Elaspsed_time is less than 5 seconds, continue to rampdown */ - gpstate_id = calc_global_pstate(gpstates->elapsed_time, - gpstates->highest_lpstate, - freq_data.pstate_id); + gpstate_idx = calc_global_pstate(gpstates->elapsed_time, + gpstates->highest_lpstate_idx, + new_index); } } else { reset_gpstates(policy); - gpstates->highest_lpstate = freq_data.pstate_id; - gpstate_id = freq_data.pstate_id; + gpstates->highest_lpstate_idx = new_index; + gpstate_idx = new_index; } /* * If local pstate is equal to global pstate, rampdown is over * So timer is not required to be queued. */ - if (gpstate_id != freq_data.pstate_id) + if (gpstate_idx != new_index) queue_gpstate_timer(gpstates); else del_timer_sync(&gpstates->timer); gpstates_done: - freq_data.gpstate_id = gpstate_id; + freq_data.gpstate_id = idx_to_pstate(gpstate_idx); gpstates->last_sampled_time = cur_msec; - gpstates->last_gpstate = freq_data.gpstate_id; - gpstates->last_lpstate = freq_data.pstate_id; + gpstates->last_gpstate_idx = gpstate_idx; + gpstates->last_lpstate_idx = new_index; spin_unlock(&gpstates->gpstate_lock); @@ -759,9 +786,7 @@ void powernv_cpufreq_work_fn(struct work_struct *work) struct cpufreq_policy policy; cpufreq_get_policy(&policy, cpu); - cpufreq_frequency_table_target(&policy, policy.freq_table, - policy.cur, - CPUFREQ_RELATION_C, &index); + index = cpufreq_table_find_index_c(&policy, policy.cur); powernv_cpufreq_target_index(&policy, index); cpumask_andnot(&mask, &mask, policy.cpus); } @@ -847,8 +872,8 @@ static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) struct powernv_smp_call_data freq_data; struct global_pstate_info *gpstates = policy->driver_data; - freq_data.pstate_id = powernv_pstate_info.min; - freq_data.gpstate_id = powernv_pstate_info.min; + freq_data.pstate_id = idx_to_pstate(powernv_pstate_info.min); + freq_data.gpstate_id = idx_to_pstate(powernv_pstate_info.min); smp_call_function_single(policy->cpu, set_pstate, &freq_data, 1); del_timer_sync(&gpstates->timer); } diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c index 7c4cd5c634f2..dc112481a408 100644 --- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c +++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c @@ -94,7 +94,7 @@ static int pmi_notifier(struct notifier_block *nb, unsigned long event, void *data) { struct cpufreq_policy *policy = data; - struct cpufreq_frequency_table *cbe_freqs; + struct cpufreq_frequency_table *cbe_freqs = policy->freq_table; u8 node; /* Should this really be called for CPUFREQ_ADJUST and CPUFREQ_NOTIFY @@ -103,7 +103,6 @@ static int pmi_notifier(struct notifier_block *nb, if (event == CPUFREQ_START) return 0; - cbe_freqs = cpufreq_frequency_get_table(policy->cpu); node = cbe_cpu_to_node(policy->cpu); pr_debug("got notified, event=%lu, node=%u\n", event, node); diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c index ae8eaed77b70..7b596fa38ad2 100644 --- a/drivers/cpufreq/s3c24xx-cpufreq.c +++ b/drivers/cpufreq/s3c24xx-cpufreq.c @@ -293,12 +293,8 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, __func__, policy, target_freq, relation); if (ftab) { - if (cpufreq_frequency_table_target(policy, ftab, - target_freq, relation, - &index)) { - s3c_freq_dbg("%s: table failed\n", __func__); - return -EINVAL; - } + index = cpufreq_frequency_table_target(policy, target_freq, + relation); s3c_freq_dbg("%s: adjust %d to entry %d (%u)\n", __func__, target_freq, index, ftab[index].frequency); @@ -315,7 +311,6 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, pll = NULL; } else { struct cpufreq_policy tmp_policy; - int ret; /* we keep the cpu pll table in Hz, to ensure we get an * accurate value for the PLL output. */ @@ -323,20 +318,14 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, tmp_policy.min = policy->min * 1000; tmp_policy.max = policy->max * 1000; tmp_policy.cpu = policy->cpu; + tmp_policy.freq_table = pll_reg; - /* cpufreq_frequency_table_target uses a pointer to 'index' - * which is the number of the table entry, not the value of + /* cpufreq_frequency_table_target returns the index + * of the table entry, not the value of * the table entry's index field. */ - ret = cpufreq_frequency_table_target(&tmp_policy, pll_reg, - target_freq, relation, - &index); - - if (ret < 0) { - pr_err("%s: no PLL available\n", __func__); - goto err_notpossible; - } - + index = cpufreq_frequency_table_target(&tmp_policy, target_freq, + relation); pll = pll_reg + index; s3c_freq_dbg("%s: target %u => %u\n", @@ -346,10 +335,6 @@ static int s3c_cpufreq_target(struct cpufreq_policy *policy, } return s3c_cpufreq_settarget(policy, target_freq, pll); - - err_notpossible: - pr_err("no compatible settings for %d\n", target_freq); - return -EINVAL; } struct clk *s3c_cpufreq_clk_get(struct device *dev, const char *name) @@ -571,11 +556,7 @@ static int s3c_cpufreq_build_freq(void) { int size, ret; - if (!cpu_cur.info->calc_freqtable) - return -EINVAL; - kfree(ftab); - ftab = NULL; size = cpu_cur.info->calc_freqtable(&cpu_cur, NULL, 0); size++; diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index 06d85917b6d5..9e07588ea9f5 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -246,12 +246,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index) new_freq = s5pv210_freq_table[index].frequency; /* Finding current running level index */ - if (cpufreq_frequency_table_target(policy, s5pv210_freq_table, - old_freq, CPUFREQ_RELATION_H, - &priv_index)) { - ret = -EINVAL; - goto exit; - } + priv_index = cpufreq_table_find_index_h(policy, old_freq); arm_volt = dvs_conf[index].arm_volt; int_volt = dvs_conf[index].int_volt; diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 78dac0e9da11..a5be56ec57f2 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -75,7 +75,7 @@ config DEVFREQ_GOV_PASSIVE comment "DEVFREQ Drivers" config ARM_EXYNOS_BUS_DEVFREQ - bool "ARM EXYNOS Generic Memory Bus DEVFREQ Driver" + tristate "ARM EXYNOS Generic Memory Bus DEVFREQ Driver" depends on ARCH_EXYNOS select DEVFREQ_GOV_SIMPLE_ONDEMAND select DEVFREQ_GOV_PASSIVE diff --git a/drivers/devfreq/devfreq-event.c b/drivers/devfreq/devfreq-event.c index 39b048eda2ce..9aea2c7ecbe6 100644 --- a/drivers/devfreq/devfreq-event.c +++ b/drivers/devfreq/devfreq-event.c @@ -15,7 +15,7 @@ #include <linux/kernel.h> #include <linux/err.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/of.h> @@ -481,13 +481,3 @@ static int __init devfreq_event_init(void) return 0; } subsys_initcall(devfreq_event_init); - -static void __exit devfreq_event_exit(void) -{ - class_destroy(devfreq_event_class); -} -module_exit(devfreq_event_exit); - -MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>"); -MODULE_DESCRIPTION("DEVFREQ-Event class support"); -MODULE_LICENSE("GPL"); diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index e92418facc92..478006b7764a 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -15,7 +15,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/pm_opp.h> @@ -707,10 +707,12 @@ struct devfreq *devfreq_get_devfreq_by_phandle(struct device *dev, int index) if (devfreq->dev.parent && devfreq->dev.parent->of_node == node) { mutex_unlock(&devfreq_list_lock); + of_node_put(node); return devfreq; } } mutex_unlock(&devfreq_list_lock); + of_node_put(node); return ERR_PTR(-EPROBE_DEFER); } @@ -1199,13 +1201,6 @@ static int __init devfreq_init(void) } subsys_initcall(devfreq_init); -static void __exit devfreq_exit(void) -{ - class_destroy(devfreq_class); - destroy_workqueue(devfreq_wq); -} -module_exit(devfreq_exit); - /* * The followings are helper functions for devfreq user device drivers with * OPP framework. @@ -1471,7 +1466,3 @@ void devm_devfreq_unregister_notifier(struct device *dev, devm_devfreq_dev_match, devfreq)); } EXPORT_SYMBOL(devm_devfreq_unregister_notifier); - -MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>"); -MODULE_DESCRIPTION("devfreq class support"); -MODULE_LICENSE("GPL"); diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig index 1e8b4f469f38..eb6f74a2b6b9 100644 --- a/drivers/devfreq/event/Kconfig +++ b/drivers/devfreq/event/Kconfig @@ -14,7 +14,7 @@ menuconfig PM_DEVFREQ_EVENT if PM_DEVFREQ_EVENT config DEVFREQ_EVENT_EXYNOS_NOCP - bool "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver" + tristate "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver" depends on ARCH_EXYNOS select PM_OPP help @@ -22,7 +22,7 @@ config DEVFREQ_EVENT_EXYNOS_NOCP (Network on Chip) Probe counters to measure the bandwidth of AXI bus. config DEVFREQ_EVENT_EXYNOS_PPMU - bool "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver" + tristate "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver" depends on ARCH_EXYNOS select PM_OPP help diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c index f312485f1451..845bf25fb9fb 100644 --- a/drivers/devfreq/event/exynos-ppmu.c +++ b/drivers/devfreq/event/exynos-ppmu.c @@ -482,7 +482,8 @@ static int exynos_ppmu_probe(struct platform_device *pdev) if (!info->edev) { dev_err(&pdev->dev, "failed to allocate memory devfreq-event devices\n"); - return -ENOMEM; + ret = -ENOMEM; + goto err; } edev = info->edev; platform_set_drvdata(pdev, info); diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c index 2363d0a189b7..29866f7e6d7e 100644 --- a/drivers/devfreq/exynos-bus.c +++ b/drivers/devfreq/exynos-bus.c @@ -383,7 +383,7 @@ err_clk: static int exynos_bus_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; + struct device_node *np = dev->of_node, *node; struct devfreq_dev_profile *profile; struct devfreq_simple_ondemand_data *ondemand_data; struct devfreq_passive_data *passive_data; @@ -407,7 +407,7 @@ static int exynos_bus_probe(struct platform_device *pdev) /* Parse the device-tree to get the resource information */ ret = exynos_bus_parse_of(np, bus); if (ret < 0) - goto err; + return ret; profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL); if (!profile) { @@ -415,10 +415,13 @@ static int exynos_bus_probe(struct platform_device *pdev) goto err; } - if (of_parse_phandle(dev->of_node, "devfreq", 0)) + node = of_parse_phandle(dev->of_node, "devfreq", 0); + if (node) { + of_node_put(node); goto passive; - else + } else { ret = exynos_bus_parent_parse_of(np, bus); + } if (ret < 0) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 252edba16e36..892d60fb225b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -421,29 +421,6 @@ static int acp_suspend(void *handle) static int acp_resume(void *handle) { - int i, ret; - struct acp_pm_domain *apd; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - /* return early if no ACP */ - if (!adev->acp.acp_genpd) - return 0; - - /* SMU block will power on ACP irrespective of ACP runtime status. - * Power off explicitly based on genpd ACP runtime status so that ACP - * hw and ACP-genpd status are in sync. - * 'suspend_power_off' represents "Power status before system suspend" - */ - if (adev->acp.acp_genpd->gpd.suspend_power_off == true) { - apd = container_of(&adev->acp.acp_genpd->gpd, - struct acp_pm_domain, gpd); - - for (i = 4; i >= 0 ; i--) { - ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i); - if (ret) - pr_err("ACP tile %d tile suspend failed\n", i); - } - } return 0; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index b5dd41d13d3d..9b2ef248788d 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -46,8 +46,6 @@ * to avoid complications with the lapic timer workaround. * Have not seen issues with suspend, but may need same workaround here. * - * There is currently no kernel-based automatic probing/loading mechanism - * if the driver is built as a module. */ /* un-comment DEBUG to enable pr_debug() statements */ @@ -60,7 +58,7 @@ #include <linux/sched.h> #include <linux/notifier.h> #include <linux/cpu.h> -#include <linux/module.h> +#include <linux/moduleparam.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include <asm/mwait.h> @@ -828,6 +826,35 @@ static struct cpuidle_state bxt_cstates[] = { .enter = NULL } }; +static struct cpuidle_state dnv_cstates[] = { + { + .name = "C1-DNV", + .desc = "MWAIT 0x00", + .flags = MWAIT2flg(0x00), + .exit_latency = 2, + .target_residency = 2, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, + { + .name = "C1E-DNV", + .desc = "MWAIT 0x01", + .flags = MWAIT2flg(0x01), + .exit_latency = 10, + .target_residency = 20, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, + { + .name = "C6-DNV", + .desc = "MWAIT 0x20", + .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, + .exit_latency = 50, + .target_residency = 500, + .enter = &intel_idle, + .enter_freeze = intel_idle_freeze, }, + { + .enter = NULL } +}; + /** * intel_idle * @dev: cpuidle_device @@ -1017,6 +1044,11 @@ static const struct idle_cpu idle_cpu_bxt = { .disable_promotion_to_c1e = true, }; +static const struct idle_cpu idle_cpu_dnv = { + .state_table = dnv_cstates, + .disable_promotion_to_c1e = true, +}; + #define ICPU(model, cpu) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu } @@ -1053,9 +1085,9 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = { ICPU(INTEL_FAM6_SKYLAKE_X, idle_cpu_skx), ICPU(INTEL_FAM6_XEON_PHI_KNL, idle_cpu_knl), ICPU(INTEL_FAM6_ATOM_GOLDMONT, idle_cpu_bxt), + ICPU(INTEL_FAM6_ATOM_DENVERTON, idle_cpu_dnv), {} }; -MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); /* * intel_idle_probe() @@ -1155,7 +1187,10 @@ static unsigned long long irtl_2_usec(unsigned long long irtl) { unsigned long long ns; - ns = irtl_ns_units[(irtl >> 10) & 0x3]; + if (!irtl) + return 0; + + ns = irtl_ns_units[(irtl >> 10) & 0x7]; return div64_u64((irtl & 0x3FF) * ns, 1000); } @@ -1168,43 +1203,39 @@ static unsigned long long irtl_2_usec(unsigned long long irtl) static void bxt_idle_state_table_update(void) { unsigned long long msr; + unsigned int usec; rdmsrl(MSR_PKGC6_IRTL, msr); - if (msr) { - unsigned int usec = irtl_2_usec(msr); - + usec = irtl_2_usec(msr); + if (usec) { bxt_cstates[2].exit_latency = usec; bxt_cstates[2].target_residency = usec; } rdmsrl(MSR_PKGC7_IRTL, msr); - if (msr) { - unsigned int usec = irtl_2_usec(msr); - + usec = irtl_2_usec(msr); + if (usec) { bxt_cstates[3].exit_latency = usec; bxt_cstates[3].target_residency = usec; } rdmsrl(MSR_PKGC8_IRTL, msr); - if (msr) { - unsigned int usec = irtl_2_usec(msr); - + usec = irtl_2_usec(msr); + if (usec) { bxt_cstates[4].exit_latency = usec; bxt_cstates[4].target_residency = usec; } rdmsrl(MSR_PKGC9_IRTL, msr); - if (msr) { - unsigned int usec = irtl_2_usec(msr); - + usec = irtl_2_usec(msr); + if (usec) { bxt_cstates[5].exit_latency = usec; bxt_cstates[5].target_residency = usec; } rdmsrl(MSR_PKGC10_IRTL, msr); - if (msr) { - unsigned int usec = irtl_2_usec(msr); - + usec = irtl_2_usec(msr); + if (usec) { bxt_cstates[6].exit_latency = usec; bxt_cstates[6].target_residency = usec; } @@ -1416,34 +1447,12 @@ static int __init intel_idle_init(void) return 0; } +device_initcall(intel_idle_init); -static void __exit intel_idle_exit(void) -{ - struct cpuidle_device *dev; - int i; - - cpu_notifier_register_begin(); - - if (lapic_timer_reliable_states != LAPIC_TIMER_ALWAYS_RELIABLE) - on_each_cpu(__setup_broadcast_timer, (void *)false, 1); - __unregister_cpu_notifier(&cpu_hotplug_notifier); - - for_each_possible_cpu(i) { - dev = per_cpu_ptr(intel_idle_cpuidle_devices, i); - cpuidle_unregister_device(dev); - } - - cpu_notifier_register_done(); - - cpuidle_unregister_driver(&intel_idle_driver); - free_percpu(intel_idle_cpuidle_devices); -} - -module_init(intel_idle_init); -module_exit(intel_idle_exit); - +/* + * We are not really modular, but we used to support that. Meaning we also + * support "intel_idle.max_cstate=..." at boot and also a read-only export of + * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param + * is the easiest way (currently) to continue doing that. + */ module_param(max_cstate, int, 0444); - -MODULE_AUTHOR("Len Brown <len.brown@intel.com>"); -MODULE_DESCRIPTION("Cpuidle driver for Intel Hardware v" INTEL_IDLE_VERSION); -MODULE_LICENSE("GPL"); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c8b4dbdd1bdd..badbddc683f0 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -530,8 +530,8 @@ static const struct pci_platform_pm_ops *pci_platform_pm; int pci_set_platform_pm(const struct pci_platform_pm_ops *ops) { - if (!ops->is_manageable || !ops->set_state || !ops->choose_state - || !ops->sleep_wake) + if (!ops->is_manageable || !ops->set_state || !ops->choose_state || + !ops->sleep_wake || !ops->run_wake || !ops->need_resume) return -EINVAL; pci_platform_pm = ops; return 0; diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 2e8f2be5b6f9..fbab29dfa793 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -336,14 +336,14 @@ static int release_zone(struct powercap_zone *power_zone) static int find_nr_power_limit(struct rapl_domain *rd) { - int i; + int i, nr_pl = 0; for (i = 0; i < NR_POWER_LIMITS; i++) { - if (rd->rpl[i].name == NULL) - break; + if (rd->rpl[i].name) + nr_pl++; } - return i; + return nr_pl; } static int set_domain_enable(struct powercap_zone *power_zone, bool mode) @@ -426,15 +426,38 @@ static const struct powercap_zone_ops zone_ops[] = { }, }; -static int set_power_limit(struct powercap_zone *power_zone, int id, + +/* + * Constraint index used by powercap can be different than power limit (PL) + * index in that some PLs maybe missing due to non-existant MSRs. So we + * need to convert here by finding the valid PLs only (name populated). + */ +static int contraint_to_pl(struct rapl_domain *rd, int cid) +{ + int i, j; + + for (i = 0, j = 0; i < NR_POWER_LIMITS; i++) { + if ((rd->rpl[i].name) && j++ == cid) { + pr_debug("%s: index %d\n", __func__, i); + return i; + } + } + + return -EINVAL; +} + +static int set_power_limit(struct powercap_zone *power_zone, int cid, u64 power_limit) { struct rapl_domain *rd; struct rapl_package *rp; int ret = 0; + int id; get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); + id = contraint_to_pl(rd, cid); + rp = rd->rp; if (rd->state & DOMAIN_STATE_BIOS_LOCKED) { @@ -461,16 +484,18 @@ set_exit: return ret; } -static int get_current_power_limit(struct powercap_zone *power_zone, int id, +static int get_current_power_limit(struct powercap_zone *power_zone, int cid, u64 *data) { struct rapl_domain *rd; u64 val; int prim; int ret = 0; + int id; get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); + id = contraint_to_pl(rd, cid); switch (rd->rpl[id].prim_id) { case PL1_ENABLE: prim = POWER_LIMIT1; @@ -492,14 +517,17 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int id, return ret; } -static int set_time_window(struct powercap_zone *power_zone, int id, +static int set_time_window(struct powercap_zone *power_zone, int cid, u64 window) { struct rapl_domain *rd; int ret = 0; + int id; get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); + id = contraint_to_pl(rd, cid); + switch (rd->rpl[id].prim_id) { case PL1_ENABLE: rapl_write_data_raw(rd, TIME_WINDOW1, window); @@ -514,14 +542,17 @@ static int set_time_window(struct powercap_zone *power_zone, int id, return ret; } -static int get_time_window(struct powercap_zone *power_zone, int id, u64 *data) +static int get_time_window(struct powercap_zone *power_zone, int cid, u64 *data) { struct rapl_domain *rd; u64 val; int ret = 0; + int id; get_online_cpus(); rd = power_zone_to_rapl_domain(power_zone); + id = contraint_to_pl(rd, cid); + switch (rd->rpl[id].prim_id) { case PL1_ENABLE: ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val); @@ -540,15 +571,17 @@ static int get_time_window(struct powercap_zone *power_zone, int id, u64 *data) return ret; } -static const char *get_constraint_name(struct powercap_zone *power_zone, int id) +static const char *get_constraint_name(struct powercap_zone *power_zone, int cid) { - struct rapl_power_limit *rpl; struct rapl_domain *rd; + int id; rd = power_zone_to_rapl_domain(power_zone); - rpl = (struct rapl_power_limit *) &rd->rpl[id]; + id = contraint_to_pl(rd, cid); + if (id >= 0) + return rd->rpl[id].name; - return rpl->name; + return NULL; } @@ -1101,6 +1134,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { RAPL_CPU(INTEL_FAM6_SANDYBRIDGE_X, rapl_defaults_core), RAPL_CPU(INTEL_FAM6_IVYBRIDGE, rapl_defaults_core), + RAPL_CPU(INTEL_FAM6_IVYBRIDGE_X, rapl_defaults_core), RAPL_CPU(INTEL_FAM6_HASWELL_CORE, rapl_defaults_core), RAPL_CPU(INTEL_FAM6_HASWELL_ULT, rapl_defaults_core), @@ -1123,6 +1157,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD1, rapl_defaults_tng), RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD2, rapl_defaults_ann), RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT, rapl_defaults_core), + RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON, rapl_defaults_core), RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL, rapl_defaults_hsw_server), {} @@ -1381,6 +1416,37 @@ static int rapl_check_domain(int cpu, int domain) return 0; } + +/* + * Check if power limits are available. Two cases when they are not available: + * 1. Locked by BIOS, in this case we still provide read-only access so that + * users can see what limit is set by the BIOS. + * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not + * exist at all. In this case, we do not show the contraints in powercap. + * + * Called after domains are detected and initialized. + */ +static void rapl_detect_powerlimit(struct rapl_domain *rd) +{ + u64 val64; + int i; + + /* check if the domain is locked by BIOS, ignore if MSR doesn't exist */ + if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) { + if (val64) { + pr_info("RAPL package %d domain %s locked by BIOS\n", + rd->rp->id, rd->name); + rd->state |= DOMAIN_STATE_BIOS_LOCKED; + } + } + /* check if power limit MSRs exists, otherwise domain is monitoring only */ + for (i = 0; i < NR_POWER_LIMITS; i++) { + int prim = rd->rpl[i].prim_id; + if (rapl_read_data_raw(rd, prim, false, &val64)) + rd->rpl[i].name = NULL; + } +} + /* Detect active and valid domains for the given CPU, caller must * ensure the CPU belongs to the targeted package and CPU hotlug is disabled. */ @@ -1389,7 +1455,6 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu) int i; int ret = 0; struct rapl_domain *rd; - u64 locked; for (i = 0; i < RAPL_DOMAIN_MAX; i++) { /* use physical package id to read counters */ @@ -1400,7 +1465,7 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu) } rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX); if (!rp->nr_domains) { - pr_err("no valid rapl domains found in package %d\n", rp->id); + pr_debug("no valid rapl domains found in package %d\n", rp->id); ret = -ENODEV; goto done; } @@ -1414,17 +1479,9 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu) } rapl_init_domains(rp); - for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) { - /* check if the domain is locked by BIOS */ - ret = rapl_read_data_raw(rd, FW_LOCK, false, &locked); - if (ret) - return ret; - if (locked) { - pr_info("RAPL package %d domain %s locked by BIOS\n", - rp->id, rd->name); - rd->state |= DOMAIN_STATE_BIOS_LOCKED; - } - } + for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) + rapl_detect_powerlimit(rd); + done: diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 5b4b47ed948b..3788ed74c9ab 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -787,22 +787,34 @@ __cpufreq_cooling_register(struct device_node *np, const struct cpumask *clip_cpus, u32 capacitance, get_static_t plat_static_func) { + struct cpufreq_policy *policy; struct thermal_cooling_device *cool_dev; struct cpufreq_cooling_device *cpufreq_dev; char dev_name[THERMAL_NAME_LENGTH]; struct cpufreq_frequency_table *pos, *table; + struct cpumask temp_mask; unsigned int freq, i, num_cpus; int ret; - table = cpufreq_frequency_get_table(cpumask_first(clip_cpus)); + cpumask_and(&temp_mask, clip_cpus, cpu_online_mask); + policy = cpufreq_cpu_get(cpumask_first(&temp_mask)); + if (!policy) { + pr_debug("%s: CPUFreq policy not found\n", __func__); + return ERR_PTR(-EPROBE_DEFER); + } + + table = policy->freq_table; if (!table) { pr_debug("%s: CPUFreq table not found\n", __func__); - return ERR_PTR(-EPROBE_DEFER); + cool_dev = ERR_PTR(-ENODEV); + goto put_policy; } cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL); - if (!cpufreq_dev) - return ERR_PTR(-ENOMEM); + if (!cpufreq_dev) { + cool_dev = ERR_PTR(-ENOMEM); + goto put_policy; + } num_cpus = cpumask_weight(clip_cpus); cpufreq_dev->time_in_idle = kcalloc(num_cpus, @@ -892,7 +904,7 @@ __cpufreq_cooling_register(struct device_node *np, CPUFREQ_POLICY_NOTIFIER); mutex_unlock(&cooling_cpufreq_lock); - return cool_dev; + goto put_policy; remove_idr: release_idr(&cpufreq_idr, cpufreq_dev->id); @@ -906,6 +918,8 @@ free_time_in_idle: kfree(cpufreq_dev->time_in_idle); free_cdev: kfree(cpufreq_dev); +put_policy: + cpufreq_cpu_put(policy); return cool_dev; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 4e81e08db752..631ba33bbe9f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -36,6 +36,12 @@ struct cpufreq_governor; +enum cpufreq_table_sorting { + CPUFREQ_TABLE_UNSORTED, + CPUFREQ_TABLE_SORTED_ASCENDING, + CPUFREQ_TABLE_SORTED_DESCENDING +}; + struct cpufreq_freqs { unsigned int cpu; /* cpu nr */ unsigned int old; @@ -87,6 +93,7 @@ struct cpufreq_policy { struct cpufreq_user_policy user_policy; struct cpufreq_frequency_table *freq_table; + enum cpufreq_table_sorting freq_table_sorted; struct list_head policy_list; struct kobject kobj; @@ -113,6 +120,10 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ + unsigned int cached_target_freq; + int cached_resolved_idx; + /* Synchronization for frequency transitions */ bool transition_ongoing; /* Tracks transition status */ spinlock_t transition_lock; @@ -185,6 +196,18 @@ static inline unsigned int cpufreq_quick_get_max(unsigned int cpu) static inline void disable_cpufreq(void) { } #endif +#ifdef CONFIG_CPU_FREQ_STAT +void cpufreq_stats_create_table(struct cpufreq_policy *policy); +void cpufreq_stats_free_table(struct cpufreq_policy *policy); +void cpufreq_stats_record_transition(struct cpufreq_policy *policy, + unsigned int new_freq); +#else +static inline void cpufreq_stats_create_table(struct cpufreq_policy *policy) { } +static inline void cpufreq_stats_free_table(struct cpufreq_policy *policy) { } +static inline void cpufreq_stats_record_transition(struct cpufreq_policy *policy, + unsigned int new_freq) { } +#endif /* CONFIG_CPU_FREQ_STAT */ + /********************************************************************* * CPUFREQ DRIVER INTERFACE * *********************************************************************/ @@ -251,6 +274,16 @@ struct cpufreq_driver { unsigned int index); unsigned int (*fast_switch)(struct cpufreq_policy *policy, unsigned int target_freq); + + /* + * Caches and returns the lowest driver-supported frequency greater than + * or equal to the target frequency, subject to any driver limitations. + * Does not set the frequency. Only to be implemented for drivers with + * target(). + */ + unsigned int (*resolve_freq)(struct cpufreq_policy *policy, + unsigned int target_freq); + /* * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION * unset. @@ -455,18 +488,13 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, #define MIN_LATENCY_MULTIPLIER (20) #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) -/* Governor Events */ -#define CPUFREQ_GOV_START 1 -#define CPUFREQ_GOV_STOP 2 -#define CPUFREQ_GOV_LIMITS 3 -#define CPUFREQ_GOV_POLICY_INIT 4 -#define CPUFREQ_GOV_POLICY_EXIT 5 - struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; - int initialized; - int (*governor) (struct cpufreq_policy *policy, - unsigned int event); + int (*init)(struct cpufreq_policy *policy); + void (*exit)(struct cpufreq_policy *policy); + int (*start)(struct cpufreq_policy *policy); + void (*stop)(struct cpufreq_policy *policy); + void (*limits)(struct cpufreq_policy *policy); ssize_t (*show_setspeed) (struct cpufreq_policy *policy, char *buf); int (*store_setspeed) (struct cpufreq_policy *policy, @@ -487,12 +515,22 @@ int cpufreq_driver_target(struct cpufreq_policy *policy, int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation); +unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, + unsigned int target_freq); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); +static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy) +{ + if (policy->max < policy->cur) + __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H); + else if (policy->min > policy->cur) + __cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L); +} + /* Governor attribute set */ struct gov_attr_set { struct kobject kobj; @@ -582,11 +620,9 @@ int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); int cpufreq_generic_frequency_table_verify(struct cpufreq_policy *policy); -int cpufreq_frequency_table_target(struct cpufreq_policy *policy, - struct cpufreq_frequency_table *table, - unsigned int target_freq, - unsigned int relation, - unsigned int *index); +int cpufreq_table_index_unsorted(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation); int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy, unsigned int freq); @@ -597,6 +633,227 @@ int cpufreq_boost_trigger_state(int state); int cpufreq_boost_enabled(void); int cpufreq_enable_boost_support(void); bool policy_has_boost_freq(struct cpufreq_policy *policy); + +/* Find lowest freq at or above target in a table in ascending order */ +static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq >= target_freq) + return i; + + best = i; + } + + return best; +} + +/* Find lowest freq at or above target in a table in descending order */ +static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq == target_freq) + return i; + + if (freq > target_freq) { + best = i; + continue; + } + + /* No freq found above target_freq */ + if (best == -1) + return i; + + return best; + } + + return best; +} + +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) + return cpufreq_table_find_index_al(policy, target_freq); + else + return cpufreq_table_find_index_dl(policy, target_freq); +} + +/* Find highest freq at or below target in a table in ascending order */ +static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq == target_freq) + return i; + + if (freq < target_freq) { + best = i; + continue; + } + + /* No freq found below target_freq */ + if (best == -1) + return i; + + return best; + } + + return best; +} + +/* Find highest freq at or below target in a table in descending order */ +static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq <= target_freq) + return i; + + best = i; + } + + return best; +} + +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) + return cpufreq_table_find_index_ah(policy, target_freq); + else + return cpufreq_table_find_index_dh(policy, target_freq); +} + +/* Find closest freq to target in a table in ascending order */ +static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq == target_freq) + return i; + + if (freq < target_freq) { + best = i; + continue; + } + + /* No freq found below target_freq */ + if (best == -1) + return i; + + /* Choose the closest freq */ + if (target_freq - table[best].frequency > freq - target_freq) + return i; + + return best; + } + + return best; +} + +/* Find closest freq to target in a table in descending order */ +static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + struct cpufreq_frequency_table *table = policy->freq_table; + unsigned int freq; + int i, best = -1; + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + freq = table[i].frequency; + + if (freq == target_freq) + return i; + + if (freq > target_freq) { + best = i; + continue; + } + + /* No freq found above target_freq */ + if (best == -1) + return i; + + /* Choose the closest freq */ + if (table[best].frequency - target_freq > target_freq - freq) + return i; + + return best; + } + + return best; +} + +/* Works only on sorted freq-tables */ +static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy, + unsigned int target_freq) +{ + target_freq = clamp_val(target_freq, policy->min, policy->max); + + if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING) + return cpufreq_table_find_index_ac(policy, target_freq); + else + return cpufreq_table_find_index_dc(policy, target_freq); +} + +static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)) + return cpufreq_table_index_unsorted(policy, target_freq, + relation); + + switch (relation) { + case CPUFREQ_RELATION_L: + return cpufreq_table_find_index_l(policy, target_freq); + case CPUFREQ_RELATION_H: + return cpufreq_table_find_index_h(policy, target_freq); + case CPUFREQ_RELATION_C: + return cpufreq_table_find_index_c(policy, target_freq); + default: + pr_err("%s: Invalid relation: %d\n", __func__, relation); + return -EINVAL; + } +} #else static inline int cpufreq_boost_trigger_state(int state) { @@ -617,8 +874,6 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) return false; } #endif -/* the following funtion is for cpufreq core use only */ -struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu); /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h index 308d6044f153..09779b0ae720 100644 --- a/include/linux/pm_clock.h +++ b/include/linux/pm_clock.h @@ -42,6 +42,7 @@ extern int pm_clk_create(struct device *dev); extern void pm_clk_destroy(struct device *dev); extern int pm_clk_add(struct device *dev, const char *con_id); extern int pm_clk_add_clk(struct device *dev, struct clk *clk); +extern int of_pm_clk_add_clk(struct device *dev, const char *name); extern int of_pm_clk_add_clks(struct device *dev); extern void pm_clk_remove(struct device *dev, const char *con_id); extern void pm_clk_remove_clk(struct device *dev, struct clk *clk); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 39285c7bd3f5..31fec858088c 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -57,7 +57,6 @@ struct generic_pm_domain { unsigned int device_count; /* Number of devices */ unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ - bool suspend_power_off; /* Power status before system suspend */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); struct gpd_dev_ops dev_ops; @@ -128,8 +127,8 @@ extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); -extern void pm_genpd_init(struct generic_pm_domain *genpd, - struct dev_power_governor *gov, bool is_off); +extern int pm_genpd_init(struct generic_pm_domain *genpd, + struct dev_power_governor *gov, bool is_off); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -164,9 +163,10 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, { return -ENOSYS; } -static inline void pm_genpd_init(struct generic_pm_domain *genpd, - struct dev_power_governor *gov, bool is_off) +static inline int pm_genpd_init(struct generic_pm_domain *genpd, + struct dev_power_governor *gov, bool is_off) { + return -ENOSYS; } #endif diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8b6ec7ef0854..7693e39b14fe 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -18,12 +18,11 @@ static inline void pm_set_vt_switch(int do_switch) #endif #ifdef CONFIG_VT_CONSOLE_SLEEP -extern int pm_prepare_console(void); +extern void pm_prepare_console(void); extern void pm_restore_console(void); #else -static inline int pm_prepare_console(void) +static inline void pm_prepare_console(void) { - return 0; } static inline void pm_restore_console(void) diff --git a/kernel/power/Makefile b/kernel/power/Makefile index cb880a14cc39..eb4f717705ba 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -1,6 +1,8 @@ ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG +KASAN_SANITIZE_snapshot.o := n + obj-y += qos.o obj-$(CONFIG_PM) += main.o obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o diff --git a/kernel/power/console.c b/kernel/power/console.c index aba9c545a0e3..0e781798b0b3 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -126,17 +126,17 @@ out: return ret; } -int pm_prepare_console(void) +void pm_prepare_console(void) { if (!pm_vt_switch()) - return 0; + return; orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1); if (orig_fgconsole < 0) - return 1; + return; orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE); - return 0; + return; } void pm_restore_console(void) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 9021387c6ff4..a881c6a7ba74 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -52,6 +52,7 @@ enum { #ifdef CONFIG_SUSPEND HIBERNATION_SUSPEND, #endif + HIBERNATION_TEST_RESUME, /* keep last */ __HIBERNATION_AFTER_LAST }; @@ -409,6 +410,11 @@ int hibernation_snapshot(int platform_mode) goto Close; } +int __weak hibernate_resume_nonboot_cpu_disable(void) +{ + return disable_nonboot_cpus(); +} + /** * resume_target_kernel - Restore system state from a hibernation image. * @platform_mode: Whether or not to use the platform driver. @@ -433,7 +439,7 @@ static int resume_target_kernel(bool platform_mode) if (error) goto Cleanup; - error = disable_nonboot_cpus(); + error = hibernate_resume_nonboot_cpu_disable(); if (error) goto Enable_cpus; @@ -642,12 +648,39 @@ static void power_down(void) cpu_relax(); } +static int load_image_and_restore(void) +{ + int error; + unsigned int flags; + + pr_debug("PM: Loading hibernation image.\n"); + + lock_device_hotplug(); + error = create_basic_memory_bitmaps(); + if (error) + goto Unlock; + + error = swsusp_read(&flags); + swsusp_close(FMODE_READ); + if (!error) + hibernation_restore(flags & SF_PLATFORM_MODE); + + printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n"); + swsusp_free(); + free_basic_memory_bitmaps(); + Unlock: + unlock_device_hotplug(); + + return error; +} + /** * hibernate - Carry out system hibernation, including saving the image. */ int hibernate(void) { - int error; + int error, nr_calls = 0; + bool snapshot_test = false; if (!hibernation_available()) { pr_debug("PM: Hibernation not available.\n"); @@ -662,9 +695,11 @@ int hibernate(void) } pm_prepare_console(); - error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); - if (error) + error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); + if (error) { + nr_calls--; goto Exit; + } printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); @@ -697,8 +732,12 @@ int hibernate(void) pr_debug("PM: writing image.\n"); error = swsusp_write(flags); swsusp_free(); - if (!error) - power_down(); + if (!error) { + if (hibernation_mode == HIBERNATION_TEST_RESUME) + snapshot_test = true; + else + power_down(); + } in_suspend = 0; pm_restore_gfp_mask(); } else { @@ -709,12 +748,18 @@ int hibernate(void) free_basic_memory_bitmaps(); Thaw: unlock_device_hotplug(); + if (snapshot_test) { + pr_debug("PM: Checking hibernation image\n"); + error = swsusp_check(); + if (!error) + error = load_image_and_restore(); + } thaw_processes(); /* Don't bother checking whether freezer_test_done is true */ freezer_test_done = false; Exit: - pm_notifier_call_chain(PM_POST_HIBERNATION); + __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL); pm_restore_console(); atomic_inc(&snapshot_device_available); Unlock: @@ -740,8 +785,7 @@ int hibernate(void) */ static int software_resume(void) { - int error; - unsigned int flags; + int error, nr_calls = 0; /* * If the user said "noresume".. bail out early. @@ -827,35 +871,20 @@ static int software_resume(void) } pm_prepare_console(); - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); - if (error) + error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); + if (error) { + nr_calls--; goto Close_Finish; + } pr_debug("PM: Preparing processes for restore.\n"); error = freeze_processes(); if (error) goto Close_Finish; - - pr_debug("PM: Loading hibernation image.\n"); - - lock_device_hotplug(); - error = create_basic_memory_bitmaps(); - if (error) - goto Thaw; - - error = swsusp_read(&flags); - swsusp_close(FMODE_READ); - if (!error) - hibernation_restore(flags & SF_PLATFORM_MODE); - - printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n"); - swsusp_free(); - free_basic_memory_bitmaps(); - Thaw: - unlock_device_hotplug(); + error = load_image_and_restore(); thaw_processes(); Finish: - pm_notifier_call_chain(PM_POST_RESTORE); + __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); pm_restore_console(); atomic_inc(&snapshot_device_available); /* For success case, the suspend path will release the lock */ @@ -878,6 +907,7 @@ static const char * const hibernation_modes[] = { #ifdef CONFIG_SUSPEND [HIBERNATION_SUSPEND] = "suspend", #endif + [HIBERNATION_TEST_RESUME] = "test_resume", }; /* @@ -924,6 +954,7 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, #ifdef CONFIG_SUSPEND case HIBERNATION_SUSPEND: #endif + case HIBERNATION_TEST_RESUME: break; case HIBERNATION_PLATFORM: if (hibernation_ops) @@ -970,6 +1001,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, #ifdef CONFIG_SUSPEND case HIBERNATION_SUSPEND: #endif + case HIBERNATION_TEST_RESUME: hibernation_mode = mode; break; case HIBERNATION_PLATFORM: @@ -1115,13 +1147,16 @@ static int __init resume_offset_setup(char *str) static int __init hibernate_setup(char *str) { - if (!strncmp(str, "noresume", 8)) + if (!strncmp(str, "noresume", 8)) { noresume = 1; - else if (!strncmp(str, "nocompress", 10)) + } else if (!strncmp(str, "nocompress", 10)) { nocompress = 1; - else if (!strncmp(str, "no", 2)) { + } else if (!strncmp(str, "no", 2)) { noresume = 1; nohibernate = 1; + } else if (IS_ENABLED(CONFIG_DEBUG_RODATA) + && !strncmp(str, "protect_image", 13)) { + enable_restore_image_protection(); } return 1; } diff --git a/kernel/power/main.c b/kernel/power/main.c index 27946975eff0..5ea50b1b7595 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -38,12 +38,19 @@ int unregister_pm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_pm_notifier); -int pm_notifier_call_chain(unsigned long val) +int __pm_notifier_call_chain(unsigned long val, int nr_to_call, int *nr_calls) { - int ret = blocking_notifier_call_chain(&pm_chain_head, val, NULL); + int ret; + + ret = __blocking_notifier_call_chain(&pm_chain_head, val, NULL, + nr_to_call, nr_calls); return notifier_to_errno(ret); } +int pm_notifier_call_chain(unsigned long val) +{ + return __pm_notifier_call_chain(val, -1, NULL); +} /* If set, devices may be suspended and resumed asynchronously. */ int pm_async_enabled = 1; diff --git a/kernel/power/power.h b/kernel/power/power.h index efe1b3b17c88..242d8b827dd5 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -38,6 +38,8 @@ static inline char *check_image_kernel(struct swsusp_info *info) } #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ +extern int hibernate_resume_nonboot_cpu_disable(void); + /* * Keep some memory free so that I/O operations can succeed without paging * [Might this be more than 4 MB?] @@ -59,6 +61,13 @@ extern int hibernation_snapshot(int platform_mode); extern int hibernation_restore(int platform_mode); extern int hibernation_platform_enter(void); +#ifdef CONFIG_DEBUG_RODATA +/* kernel/power/snapshot.c */ +extern void enable_restore_image_protection(void); +#else +static inline void enable_restore_image_protection(void) {} +#endif /* CONFIG_DEBUG_RODATA */ + #else /* !CONFIG_HIBERNATION */ static inline void hibernate_reserved_size_init(void) {} @@ -200,6 +209,8 @@ static inline void suspend_test_finish(const char *label) {} #ifdef CONFIG_PM_SLEEP /* kernel/power/main.c */ +extern int __pm_notifier_call_chain(unsigned long val, int nr_to_call, + int *nr_calls); extern int pm_notifier_call_chain(unsigned long val); #endif diff --git a/kernel/power/process.c b/kernel/power/process.c index 0c2ee9761d57..8f27d5a8adf6 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -89,6 +89,9 @@ static int try_to_freeze_tasks(bool user_only) elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); + if (wq_busy) + show_workqueue_state(); + if (!wakeup) { read_lock(&tasklist_lock); for_each_process_thread(g, p) { diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 3a970604308f..d90df926b59f 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -38,6 +38,43 @@ #include "power.h" +#ifdef CONFIG_DEBUG_RODATA +static bool hibernate_restore_protection; +static bool hibernate_restore_protection_active; + +void enable_restore_image_protection(void) +{ + hibernate_restore_protection = true; +} + +static inline void hibernate_restore_protection_begin(void) +{ + hibernate_restore_protection_active = hibernate_restore_protection; +} + +static inline void hibernate_restore_protection_end(void) +{ + hibernate_restore_protection_active = false; +} + +static inline void hibernate_restore_protect_page(void *page_address) +{ + if (hibernate_restore_protection_active) + set_memory_ro((unsigned long)page_address, 1); +} + +static inline void hibernate_restore_unprotect_page(void *page_address) +{ + if (hibernate_restore_protection_active) + set_memory_rw((unsigned long)page_address, 1); +} +#else +static inline void hibernate_restore_protection_begin(void) {} +static inline void hibernate_restore_protection_end(void) {} +static inline void hibernate_restore_protect_page(void *page_address) {} +static inline void hibernate_restore_unprotect_page(void *page_address) {} +#endif /* CONFIG_DEBUG_RODATA */ + static int swsusp_page_is_free(struct page *); static void swsusp_set_page_forbidden(struct page *); static void swsusp_unset_page_forbidden(struct page *); @@ -67,25 +104,32 @@ void __init hibernate_image_size_init(void) image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; } -/* List of PBEs needed for restoring the pages that were allocated before +/* + * List of PBEs needed for restoring the pages that were allocated before * the suspend and included in the suspend image, but have also been * allocated by the "resume" kernel, so their contents cannot be written * directly to their "original" page frames. */ struct pbe *restore_pblist; -/* Pointer to an auxiliary buffer (1 page) */ -static void *buffer; +/* struct linked_page is used to build chains of pages */ -/** - * @safe_needed - on resume, for storing the PBE list and the image, - * we can only use memory pages that do not conflict with the pages - * used before suspend. The unsafe pages have PageNosaveFree set - * and we count them using unsafe_pages. - * - * Each allocated image page is marked as PageNosave and PageNosaveFree - * so that swsusp_free() can release it. +#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) + +struct linked_page { + struct linked_page *next; + char data[LINKED_PAGE_DATA_SIZE]; +} __packed; + +/* + * List of "safe" pages (ie. pages that were not used by the image kernel + * before hibernation) that may be used as temporary storage for image kernel + * memory contents. */ +static struct linked_page *safe_pages_list; + +/* Pointer to an auxiliary buffer (1 page) */ +static void *buffer; #define PG_ANY 0 #define PG_SAFE 1 @@ -94,6 +138,19 @@ static void *buffer; static unsigned int allocated_unsafe_pages; +/** + * get_image_page - Allocate a page for a hibernation image. + * @gfp_mask: GFP mask for the allocation. + * @safe_needed: Get pages that were not used before hibernation (restore only) + * + * During image restoration, for storing the PBE list and the image data, we can + * only use memory pages that do not conflict with the pages used before + * hibernation. The "unsafe" pages have PageNosaveFree set and we count them + * using allocated_unsafe_pages. + * + * Each allocated image page is marked as PageNosave and PageNosaveFree so that + * swsusp_free() can release it. + */ static void *get_image_page(gfp_t gfp_mask, int safe_needed) { void *res; @@ -113,9 +170,21 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed) return res; } +static void *__get_safe_page(gfp_t gfp_mask) +{ + if (safe_pages_list) { + void *ret = safe_pages_list; + + safe_pages_list = safe_pages_list->next; + memset(ret, 0, PAGE_SIZE); + return ret; + } + return get_image_page(gfp_mask, PG_SAFE); +} + unsigned long get_safe_page(gfp_t gfp_mask) { - return (unsigned long)get_image_page(gfp_mask, PG_SAFE); + return (unsigned long)__get_safe_page(gfp_mask); } static struct page *alloc_image_page(gfp_t gfp_mask) @@ -130,11 +199,22 @@ static struct page *alloc_image_page(gfp_t gfp_mask) return page; } +static void recycle_safe_page(void *page_address) +{ + struct linked_page *lp = page_address; + + lp->next = safe_pages_list; + safe_pages_list = lp; +} + /** - * free_image_page - free page represented by @addr, allocated with - * get_image_page (page flags set by it must be cleared) + * free_image_page - Free a page allocated for hibernation image. + * @addr: Address of the page to free. + * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page. + * + * The page to free should have been allocated by get_image_page() (page flags + * set by it are affected). */ - static inline void free_image_page(void *addr, int clear_nosave_free) { struct page *page; @@ -150,17 +230,8 @@ static inline void free_image_page(void *addr, int clear_nosave_free) __free_page(page); } -/* struct linked_page is used to build chains of pages */ - -#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) - -struct linked_page { - struct linked_page *next; - char data[LINKED_PAGE_DATA_SIZE]; -} __packed; - -static inline void -free_list_of_pages(struct linked_page *list, int clear_page_nosave) +static inline void free_list_of_pages(struct linked_page *list, + int clear_page_nosave) { while (list) { struct linked_page *lp = list->next; @@ -170,30 +241,28 @@ free_list_of_pages(struct linked_page *list, int clear_page_nosave) } } -/** - * struct chain_allocator is used for allocating small objects out of - * a linked list of pages called 'the chain'. - * - * The chain grows each time when there is no room for a new object in - * the current page. The allocated objects cannot be freed individually. - * It is only possible to free them all at once, by freeing the entire - * chain. - * - * NOTE: The chain allocator may be inefficient if the allocated objects - * are not much smaller than PAGE_SIZE. - */ - +/* + * struct chain_allocator is used for allocating small objects out of + * a linked list of pages called 'the chain'. + * + * The chain grows each time when there is no room for a new object in + * the current page. The allocated objects cannot be freed individually. + * It is only possible to free them all at once, by freeing the entire + * chain. + * + * NOTE: The chain allocator may be inefficient if the allocated objects + * are not much smaller than PAGE_SIZE. + */ struct chain_allocator { struct linked_page *chain; /* the chain */ unsigned int used_space; /* total size of objects allocated out - * of the current page - */ + of the current page */ gfp_t gfp_mask; /* mask for allocating pages */ int safe_needed; /* if set, only "safe" pages are allocated */ }; -static void -chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) +static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask, + int safe_needed) { ca->chain = NULL; ca->used_space = LINKED_PAGE_DATA_SIZE; @@ -208,7 +277,8 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { struct linked_page *lp; - lp = get_image_page(ca->gfp_mask, ca->safe_needed); + lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) : + get_image_page(ca->gfp_mask, PG_ANY); if (!lp) return NULL; @@ -222,44 +292,44 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) } /** - * Data types related to memory bitmaps. + * Data types related to memory bitmaps. * - * Memory bitmap is a structure consiting of many linked lists of - * objects. The main list's elements are of type struct zone_bitmap - * and each of them corresonds to one zone. For each zone bitmap - * object there is a list of objects of type struct bm_block that - * represent each blocks of bitmap in which information is stored. + * Memory bitmap is a structure consiting of many linked lists of + * objects. The main list's elements are of type struct zone_bitmap + * and each of them corresonds to one zone. For each zone bitmap + * object there is a list of objects of type struct bm_block that + * represent each blocks of bitmap in which information is stored. * - * struct memory_bitmap contains a pointer to the main list of zone - * bitmap objects, a struct bm_position used for browsing the bitmap, - * and a pointer to the list of pages used for allocating all of the - * zone bitmap objects and bitmap block objects. + * struct memory_bitmap contains a pointer to the main list of zone + * bitmap objects, a struct bm_position used for browsing the bitmap, + * and a pointer to the list of pages used for allocating all of the + * zone bitmap objects and bitmap block objects. * - * NOTE: It has to be possible to lay out the bitmap in memory - * using only allocations of order 0. Additionally, the bitmap is - * designed to work with arbitrary number of zones (this is over the - * top for now, but let's avoid making unnecessary assumptions ;-). + * NOTE: It has to be possible to lay out the bitmap in memory + * using only allocations of order 0. Additionally, the bitmap is + * designed to work with arbitrary number of zones (this is over the + * top for now, but let's avoid making unnecessary assumptions ;-). * - * struct zone_bitmap contains a pointer to a list of bitmap block - * objects and a pointer to the bitmap block object that has been - * most recently used for setting bits. Additionally, it contains the - * pfns that correspond to the start and end of the represented zone. + * struct zone_bitmap contains a pointer to a list of bitmap block + * objects and a pointer to the bitmap block object that has been + * most recently used for setting bits. Additionally, it contains the + * PFNs that correspond to the start and end of the represented zone. * - * struct bm_block contains a pointer to the memory page in which - * information is stored (in the form of a block of bitmap) - * It also contains the pfns that correspond to the start and end of - * the represented memory area. + * struct bm_block contains a pointer to the memory page in which + * information is stored (in the form of a block of bitmap) + * It also contains the pfns that correspond to the start and end of + * the represented memory area. * - * The memory bitmap is organized as a radix tree to guarantee fast random - * access to the bits. There is one radix tree for each zone (as returned - * from create_mem_extents). + * The memory bitmap is organized as a radix tree to guarantee fast random + * access to the bits. There is one radix tree for each zone (as returned + * from create_mem_extents). * - * One radix tree is represented by one struct mem_zone_bm_rtree. There are - * two linked lists for the nodes of the tree, one for the inner nodes and - * one for the leave nodes. The linked leave nodes are used for fast linear - * access of the memory bitmap. + * One radix tree is represented by one struct mem_zone_bm_rtree. There are + * two linked lists for the nodes of the tree, one for the inner nodes and + * one for the leave nodes. The linked leave nodes are used for fast linear + * access of the memory bitmap. * - * The struct rtree_node represents one node of the radix tree. + * The struct rtree_node represents one node of the radix tree. */ #define BM_END_OF_MAP (~0UL) @@ -305,9 +375,8 @@ struct bm_position { struct memory_bitmap { struct list_head zones; struct linked_page *p_list; /* list of pages used to store zone - * bitmap objects and bitmap block - * objects - */ + bitmap objects and bitmap block + objects */ struct bm_position cur; /* most recently used bit position */ }; @@ -321,12 +390,12 @@ struct memory_bitmap { #endif #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) -/* - * alloc_rtree_node - Allocate a new node and add it to the radix tree. +/** + * alloc_rtree_node - Allocate a new node and add it to the radix tree. * - * This function is used to allocate inner nodes as well as the - * leave nodes of the radix tree. It also adds the node to the - * corresponding linked list passed in by the *list parameter. + * This function is used to allocate inner nodes as well as the + * leave nodes of the radix tree. It also adds the node to the + * corresponding linked list passed in by the *list parameter. */ static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, struct chain_allocator *ca, @@ -347,12 +416,12 @@ static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, return node; } -/* - * add_rtree_block - Add a new leave node to the radix tree +/** + * add_rtree_block - Add a new leave node to the radix tree. * - * The leave nodes need to be allocated in order to keep the leaves - * linked list in order. This is guaranteed by the zone->blocks - * counter. + * The leave nodes need to be allocated in order to keep the leaves + * linked list in order. This is guaranteed by the zone->blocks + * counter. */ static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, int safe_needed, struct chain_allocator *ca) @@ -417,17 +486,18 @@ static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, int clear_nosave_free); -/* - * create_zone_bm_rtree - create a radix tree for one zone +/** + * create_zone_bm_rtree - Create a radix tree for one zone. * - * Allocated the mem_zone_bm_rtree structure and initializes it. - * This function also allocated and builds the radix tree for the - * zone. + * Allocated the mem_zone_bm_rtree structure and initializes it. + * This function also allocated and builds the radix tree for the + * zone. */ -static struct mem_zone_bm_rtree * -create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, - struct chain_allocator *ca, - unsigned long start, unsigned long end) +static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, + int safe_needed, + struct chain_allocator *ca, + unsigned long start, + unsigned long end) { struct mem_zone_bm_rtree *zone; unsigned int i, nr_blocks; @@ -454,12 +524,12 @@ create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, return zone; } -/* - * free_zone_bm_rtree - Free the memory of the radix tree +/** + * free_zone_bm_rtree - Free the memory of the radix tree. * - * Free all node pages of the radix tree. The mem_zone_bm_rtree - * structure itself is not freed here nor are the rtree_node - * structs. + * Free all node pages of the radix tree. The mem_zone_bm_rtree + * structure itself is not freed here nor are the rtree_node + * structs. */ static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, int clear_nosave_free) @@ -492,8 +562,8 @@ struct mem_extent { }; /** - * free_mem_extents - free a list of memory extents - * @list - list of extents to empty + * free_mem_extents - Free a list of memory extents. + * @list: List of extents to free. */ static void free_mem_extents(struct list_head *list) { @@ -506,10 +576,11 @@ static void free_mem_extents(struct list_head *list) } /** - * create_mem_extents - create a list of memory extents representing - * contiguous ranges of PFNs - * @list - list to put the extents into - * @gfp_mask - mask to use for memory allocations + * create_mem_extents - Create a list of memory extents. + * @list: List to put the extents into. + * @gfp_mask: Mask to use for memory allocations. + * + * The extents represent contiguous ranges of PFNs. */ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) { @@ -565,10 +636,10 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) } /** - * memory_bm_create - allocate memory for a memory bitmap - */ -static int -memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) + * memory_bm_create - Allocate memory for a memory bitmap. + */ +static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, + int safe_needed) { struct chain_allocator ca; struct list_head mem_extents; @@ -607,8 +678,9 @@ memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) } /** - * memory_bm_free - free memory occupied by the memory bitmap @bm - */ + * memory_bm_free - Free memory occupied by the memory bitmap. + * @bm: Memory bitmap. + */ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) { struct mem_zone_bm_rtree *zone; @@ -622,14 +694,13 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) } /** - * memory_bm_find_bit - Find the bit for pfn in the memory - * bitmap + * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap. * - * Find the bit in the bitmap @bm that corresponds to given pfn. - * The cur.zone, cur.block and cur.node_pfn member of @bm are - * updated. - * It walks the radix tree to find the page which contains the bit for - * pfn and returns the bit position in **addr and *bit_nr. + * Find the bit in memory bitmap @bm that corresponds to the given PFN. + * The cur.zone, cur.block and cur.node_pfn members of @bm are updated. + * + * Walk the radix tree to find the page containing the bit that represents @pfn + * and return the position of the bit in @addr and @bit_nr. */ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, void **addr, unsigned int *bit_nr) @@ -658,10 +729,9 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, zone_found: /* - * We have a zone. Now walk the radix tree to find the leave - * node for our pfn. + * We have found the zone. Now walk the radix tree to find the leaf node + * for our PFN. */ - node = bm->cur.node; if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) goto node_found; @@ -754,14 +824,14 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) } /* - * rtree_next_node - Jumps to the next leave node + * rtree_next_node - Jump to the next leaf node. * - * Sets the position to the beginning of the next node in the - * memory bitmap. This is either the next node in the current - * zone's radix tree or the first node in the radix tree of the - * next zone. + * Set the position to the beginning of the next node in the + * memory bitmap. This is either the next node in the current + * zone's radix tree or the first node in the radix tree of the + * next zone. * - * Returns true if there is a next node, false otherwise. + * Return true if there is a next node, false otherwise. */ static bool rtree_next_node(struct memory_bitmap *bm) { @@ -790,14 +860,15 @@ static bool rtree_next_node(struct memory_bitmap *bm) } /** - * memory_bm_rtree_next_pfn - Find the next set bit in the bitmap @bm + * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap. + * @bm: Memory bitmap. * - * Starting from the last returned position this function searches - * for the next set bit in the memory bitmap and returns its - * number. If no more bit is set BM_END_OF_MAP is returned. + * Starting from the last returned position this function searches for the next + * set bit in @bm and returns the PFN represented by it. If no more bits are + * set, BM_END_OF_MAP is returned. * - * It is required to run memory_bm_position_reset() before the - * first call to this function. + * It is required to run memory_bm_position_reset() before the first call to + * this function for the given memory bitmap. */ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) { @@ -819,11 +890,10 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) return BM_END_OF_MAP; } -/** - * This structure represents a range of page frames the contents of which - * should not be saved during the suspend. +/* + * This structure represents a range of page frames the contents of which + * should not be saved during hibernation. */ - struct nosave_region { struct list_head list; unsigned long start_pfn; @@ -832,15 +902,42 @@ struct nosave_region { static LIST_HEAD(nosave_regions); +static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone) +{ + struct rtree_node *node; + + list_for_each_entry(node, &zone->nodes, list) + recycle_safe_page(node->data); + + list_for_each_entry(node, &zone->leaves, list) + recycle_safe_page(node->data); +} + +static void memory_bm_recycle(struct memory_bitmap *bm) +{ + struct mem_zone_bm_rtree *zone; + struct linked_page *p_list; + + list_for_each_entry(zone, &bm->zones, list) + recycle_zone_bm_rtree(zone); + + p_list = bm->p_list; + while (p_list) { + struct linked_page *lp = p_list; + + p_list = lp->next; + recycle_safe_page(lp); + } +} + /** - * register_nosave_region - register a range of page frames the contents - * of which should not be saved during the suspend (to be used in the early - * initialization code) + * register_nosave_region - Register a region of unsaveable memory. + * + * Register a range of page frames the contents of which should not be saved + * during hibernation (to be used in the early initialization code). */ - -void __init -__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, - int use_kmalloc) +void __init __register_nosave_region(unsigned long start_pfn, + unsigned long end_pfn, int use_kmalloc) { struct nosave_region *region; @@ -857,12 +954,13 @@ __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, } } if (use_kmalloc) { - /* during init, this shouldn't fail */ + /* During init, this shouldn't fail */ region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); BUG_ON(!region); - } else + } else { /* This allocation cannot fail */ region = memblock_virt_alloc(sizeof(struct nosave_region), 0); + } region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); @@ -923,10 +1021,12 @@ static void swsusp_unset_page_forbidden(struct page *page) } /** - * mark_nosave_pages - set bits corresponding to the page frames the - * contents of which should not be saved in a given bitmap. + * mark_nosave_pages - Mark pages that should not be saved. + * @bm: Memory bitmap. + * + * Set the bits in @bm that correspond to the page frames the contents of which + * should not be saved. */ - static void mark_nosave_pages(struct memory_bitmap *bm) { struct nosave_region *region; @@ -956,13 +1056,13 @@ static void mark_nosave_pages(struct memory_bitmap *bm) } /** - * create_basic_memory_bitmaps - create bitmaps needed for marking page - * frames that should not be saved and free page frames. The pointers - * forbidden_pages_map and free_pages_map are only modified if everything - * goes well, because we don't want the bits to be used before both bitmaps - * are set up. + * create_basic_memory_bitmaps - Create bitmaps to hold basic page information. + * + * Create bitmaps needed for marking page frames that should not be saved and + * free page frames. The forbidden_pages_map and free_pages_map pointers are + * only modified if everything goes well, because we don't want the bits to be + * touched before both bitmaps are set up. */ - int create_basic_memory_bitmaps(void) { struct memory_bitmap *bm1, *bm2; @@ -1007,12 +1107,12 @@ int create_basic_memory_bitmaps(void) } /** - * free_basic_memory_bitmaps - free memory bitmaps allocated by - * create_basic_memory_bitmaps(). The auxiliary pointers are necessary - * so that the bitmaps themselves are not referred to while they are being - * freed. + * free_basic_memory_bitmaps - Free memory bitmaps holding basic information. + * + * Free memory bitmaps allocated by create_basic_memory_bitmaps(). The + * auxiliary pointers are necessary so that the bitmaps themselves are not + * referred to while they are being freed. */ - void free_basic_memory_bitmaps(void) { struct memory_bitmap *bm1, *bm2; @@ -1033,11 +1133,13 @@ void free_basic_memory_bitmaps(void) } /** - * snapshot_additional_pages - estimate the number of additional pages - * be needed for setting up the suspend image data structures for given - * zone (usually the returned value is greater than the exact number) + * snapshot_additional_pages - Estimate the number of extra pages needed. + * @zone: Memory zone to carry out the computation for. + * + * Estimate the number of additional pages needed for setting up a hibernation + * image data structures for @zone (usually, the returned value is greater than + * the exact number). */ - unsigned int snapshot_additional_pages(struct zone *zone) { unsigned int rtree, nodes; @@ -1055,10 +1157,10 @@ unsigned int snapshot_additional_pages(struct zone *zone) #ifdef CONFIG_HIGHMEM /** - * count_free_highmem_pages - compute the total number of free highmem - * pages, system-wide. + * count_free_highmem_pages - Compute the total number of free highmem pages. + * + * The returned number is system-wide. */ - static unsigned int count_free_highmem_pages(void) { struct zone *zone; @@ -1072,11 +1174,12 @@ static unsigned int count_free_highmem_pages(void) } /** - * saveable_highmem_page - Determine whether a highmem page should be - * included in the suspend image. + * saveable_highmem_page - Check if a highmem page is saveable. * - * We should save the page if it isn't Nosave or NosaveFree, or Reserved, - * and it isn't a part of a free chunk of pages. + * Determine whether a highmem page should be included in a hibernation image. + * + * We should save the page if it isn't Nosave or NosaveFree, or Reserved, + * and it isn't part of a free chunk of pages. */ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) { @@ -1102,10 +1205,8 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) } /** - * count_highmem_pages - compute the total number of saveable highmem - * pages. + * count_highmem_pages - Compute the total number of saveable highmem pages. */ - static unsigned int count_highmem_pages(void) { struct zone *zone; @@ -1133,12 +1234,14 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p) #endif /* CONFIG_HIGHMEM */ /** - * saveable_page - Determine whether a non-highmem page should be included - * in the suspend image. + * saveable_page - Check if the given page is saveable. * - * We should save the page if it isn't Nosave, and is not in the range - * of pages statically defined as 'unsaveable', and it isn't a part of - * a free chunk of pages. + * Determine whether a non-highmem page should be included in a hibernation + * image. + * + * We should save the page if it isn't Nosave, and is not in the range + * of pages statically defined as 'unsaveable', and it isn't part of + * a free chunk of pages. */ static struct page *saveable_page(struct zone *zone, unsigned long pfn) { @@ -1167,10 +1270,8 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn) } /** - * count_data_pages - compute the total number of saveable non-highmem - * pages. + * count_data_pages - Compute the total number of saveable non-highmem pages. */ - static unsigned int count_data_pages(void) { struct zone *zone; @@ -1190,7 +1291,8 @@ static unsigned int count_data_pages(void) return n; } -/* This is needed, because copy_page and memcpy are not usable for copying +/* + * This is needed, because copy_page and memcpy are not usable for copying * task structs. */ static inline void do_copy_page(long *dst, long *src) @@ -1201,12 +1303,12 @@ static inline void do_copy_page(long *dst, long *src) *dst++ = *src++; } - /** - * safe_copy_page - check if the page we are going to copy is marked as - * present in the kernel page tables (this always is the case if - * CONFIG_DEBUG_PAGEALLOC is not set and in that case - * kernel_page_present() always returns 'true'). + * safe_copy_page - Copy a page in a safe way. + * + * Check if the page we are going to copy is marked as present in the kernel + * page tables (this always is the case if CONFIG_DEBUG_PAGEALLOC is not set + * and in that case kernel_page_present() always returns 'true'). */ static void safe_copy_page(void *dst, struct page *s_page) { @@ -1219,10 +1321,8 @@ static void safe_copy_page(void *dst, struct page *s_page) } } - #ifdef CONFIG_HIGHMEM -static inline struct page * -page_is_saveable(struct zone *zone, unsigned long pfn) +static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn) { return is_highmem(zone) ? saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); @@ -1243,7 +1343,8 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) kunmap_atomic(src); } else { if (PageHighMem(d_page)) { - /* Page pointed to by src may contain some kernel + /* + * The page pointed to by src may contain some kernel * data modified by kmap_atomic() */ safe_copy_page(buffer, s_page); @@ -1265,8 +1366,8 @@ static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) } #endif /* CONFIG_HIGHMEM */ -static void -copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) +static void copy_data_pages(struct memory_bitmap *copy_bm, + struct memory_bitmap *orig_bm) { struct zone *zone; unsigned long pfn; @@ -1315,12 +1416,11 @@ static struct memory_bitmap orig_bm; static struct memory_bitmap copy_bm; /** - * swsusp_free - free pages allocated for the suspend. + * swsusp_free - Free pages allocated for hibernation image. * - * Suspend pages are alocated before the atomic copy is made, so we - * need to release them after the resume. + * Image pages are alocated before snapshot creation, so they need to be + * released after resume. */ - void swsusp_free(void) { unsigned long fb_pfn, fr_pfn; @@ -1351,6 +1451,7 @@ loop: memory_bm_clear_current(forbidden_pages_map); memory_bm_clear_current(free_pages_map); + hibernate_restore_unprotect_page(page_address(page)); __free_page(page); goto loop; } @@ -1362,6 +1463,7 @@ out: buffer = NULL; alloc_normal = 0; alloc_highmem = 0; + hibernate_restore_protection_end(); } /* Helper functions used for the shrinking of memory. */ @@ -1369,7 +1471,7 @@ out: #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) /** - * preallocate_image_pages - Allocate a number of pages for hibernation image + * preallocate_image_pages - Allocate a number of pages for hibernation image. * @nr_pages: Number of page frames to allocate. * @mask: GFP flags to use for the allocation. * @@ -1419,7 +1521,7 @@ static unsigned long preallocate_image_highmem(unsigned long nr_pages) } /** - * __fraction - Compute (an approximation of) x * (multiplier / base) + * __fraction - Compute (an approximation of) x * (multiplier / base). */ static unsigned long __fraction(u64 x, u64 multiplier, u64 base) { @@ -1429,8 +1531,8 @@ static unsigned long __fraction(u64 x, u64 multiplier, u64 base) } static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, - unsigned long highmem, - unsigned long total) + unsigned long highmem, + unsigned long total) { unsigned long alloc = __fraction(nr_pages, highmem, total); @@ -1443,15 +1545,15 @@ static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) } static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, - unsigned long highmem, - unsigned long total) + unsigned long highmem, + unsigned long total) { return 0; } #endif /* CONFIG_HIGHMEM */ /** - * free_unnecessary_pages - Release preallocated pages not needed for the image + * free_unnecessary_pages - Release preallocated pages not needed for the image. */ static unsigned long free_unnecessary_pages(void) { @@ -1505,7 +1607,7 @@ static unsigned long free_unnecessary_pages(void) } /** - * minimum_image_size - Estimate the minimum acceptable size of an image + * minimum_image_size - Estimate the minimum acceptable size of an image. * @saveable: Number of saveable pages in the system. * * We want to avoid attempting to free too much memory too hard, so estimate the @@ -1535,7 +1637,7 @@ static unsigned long minimum_image_size(unsigned long saveable) } /** - * hibernate_preallocate_memory - Preallocate memory for hibernation image + * hibernate_preallocate_memory - Preallocate memory for hibernation image. * * To create a hibernation image it is necessary to make a copy of every page * frame in use. We also need a number of page frames to be free during @@ -1708,10 +1810,11 @@ int hibernate_preallocate_memory(void) #ifdef CONFIG_HIGHMEM /** - * count_pages_for_highmem - compute the number of non-highmem pages - * that will be necessary for creating copies of highmem pages. - */ - + * count_pages_for_highmem - Count non-highmem pages needed for copying highmem. + * + * Compute the number of non-highmem pages that will be necessary for creating + * copies of highmem pages. + */ static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; @@ -1724,15 +1827,12 @@ static unsigned int count_pages_for_highmem(unsigned int nr_highmem) return nr_highmem; } #else -static unsigned int -count_pages_for_highmem(unsigned int nr_highmem) { return 0; } +static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; } #endif /* CONFIG_HIGHMEM */ /** - * enough_free_mem - Make sure we have enough free memory for the - * snapshot image. + * enough_free_mem - Check if there is enough free memory for the image. */ - static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) { struct zone *zone; @@ -1751,10 +1851,11 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) #ifdef CONFIG_HIGHMEM /** - * get_highmem_buffer - if there are some highmem pages in the suspend - * image, we may need the buffer to copy them and/or load their data. + * get_highmem_buffer - Allocate a buffer for highmem pages. + * + * If there are some highmem pages in the hibernation image, we may need a + * buffer to copy them and/or load their data. */ - static inline int get_highmem_buffer(int safe_needed) { buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); @@ -1762,13 +1863,13 @@ static inline int get_highmem_buffer(int safe_needed) } /** - * alloc_highmem_image_pages - allocate some highmem pages for the image. - * Try to allocate as many pages as needed, but if the number of free - * highmem pages is lesser than that, allocate them all. + * alloc_highmem_image_pages - Allocate some highmem pages for the image. + * + * Try to allocate as many pages as needed, but if the number of free highmem + * pages is less than that, allocate them all. */ - -static inline unsigned int -alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) +static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, + unsigned int nr_highmem) { unsigned int to_alloc = count_free_highmem_pages(); @@ -1787,25 +1888,24 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) #else static inline int get_highmem_buffer(int safe_needed) { return 0; } -static inline unsigned int -alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } +static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, + unsigned int n) { return 0; } #endif /* CONFIG_HIGHMEM */ /** - * swsusp_alloc - allocate memory for the suspend image + * swsusp_alloc - Allocate memory for hibernation image. * - * We first try to allocate as many highmem pages as there are - * saveable highmem pages in the system. If that fails, we allocate - * non-highmem pages for the copies of the remaining highmem ones. + * We first try to allocate as many highmem pages as there are + * saveable highmem pages in the system. If that fails, we allocate + * non-highmem pages for the copies of the remaining highmem ones. * - * In this approach it is likely that the copies of highmem pages will - * also be located in the high memory, because of the way in which - * copy_data_pages() works. + * In this approach it is likely that the copies of highmem pages will + * also be located in the high memory, because of the way in which + * copy_data_pages() works. */ - -static int -swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, - unsigned int nr_pages, unsigned int nr_highmem) +static int swsusp_alloc(struct memory_bitmap *orig_bm, + struct memory_bitmap *copy_bm, + unsigned int nr_pages, unsigned int nr_highmem) { if (nr_highmem > 0) { if (get_highmem_buffer(PG_ANY)) @@ -1855,7 +1955,8 @@ asmlinkage __visible int swsusp_save(void) return -ENOMEM; } - /* During allocating of suspend pagedir, new cold pages may appear. + /* + * During allocating of suspend pagedir, new cold pages may appear. * Kill them. */ drain_local_pages(NULL); @@ -1918,12 +2019,14 @@ static int init_header(struct swsusp_info *info) } /** - * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm - * are stored in the array @buf[] (1 page at a time) + * pack_pfns - Prepare PFNs for saving. + * @bm: Memory bitmap. + * @buf: Memory buffer to store the PFNs in. + * + * PFNs corresponding to set bits in @bm are stored in the area of memory + * pointed to by @buf (1 page at a time). */ - -static inline void -pack_pfns(unsigned long *buf, struct memory_bitmap *bm) +static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) { int j; @@ -1937,22 +2040,21 @@ pack_pfns(unsigned long *buf, struct memory_bitmap *bm) } /** - * snapshot_read_next - used for reading the system memory snapshot. + * snapshot_read_next - Get the address to read the next image page from. + * @handle: Snapshot handle to be used for the reading. * - * On the first call to it @handle should point to a zeroed - * snapshot_handle structure. The structure gets updated and a pointer - * to it should be passed to this function every next time. + * On the first call, @handle should point to a zeroed snapshot_handle + * structure. The structure gets populated then and a pointer to it should be + * passed to this function every next time. * - * On success the function returns a positive number. Then, the caller - * is allowed to read up to the returned number of bytes from the memory - * location computed by the data_of() macro. + * On success, the function returns a positive number. Then, the caller + * is allowed to read up to the returned number of bytes from the memory + * location computed by the data_of() macro. * - * The function returns 0 to indicate the end of data stream condition, - * and a negative number is returned on error. In such cases the - * structure pointed to by @handle is not updated and should not be used - * any more. + * The function returns 0 to indicate the end of the data stream condition, + * and negative numbers are returned on errors. If that happens, the structure + * pointed to by @handle is not updated and should not be used any more. */ - int snapshot_read_next(struct snapshot_handle *handle) { if (handle->cur > nr_meta_pages + nr_copy_pages) @@ -1981,7 +2083,8 @@ int snapshot_read_next(struct snapshot_handle *handle) page = pfn_to_page(memory_bm_next_pfn(©_bm)); if (PageHighMem(page)) { - /* Highmem pages are copied to the buffer, + /* + * Highmem pages are copied to the buffer, * because we can't return with a kmapped * highmem page (we may not be called again). */ @@ -1999,53 +2102,41 @@ int snapshot_read_next(struct snapshot_handle *handle) return PAGE_SIZE; } -/** - * mark_unsafe_pages - mark the pages that cannot be used for storing - * the image during resume, because they conflict with the pages that - * had been used before suspend - */ - -static int mark_unsafe_pages(struct memory_bitmap *bm) +static void duplicate_memory_bitmap(struct memory_bitmap *dst, + struct memory_bitmap *src) { - struct zone *zone; - unsigned long pfn, max_zone_pfn; + unsigned long pfn; - /* Clear page flags */ - for_each_populated_zone(zone) { - max_zone_pfn = zone_end_pfn(zone); - for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) - if (pfn_valid(pfn)) - swsusp_unset_page_free(pfn_to_page(pfn)); + memory_bm_position_reset(src); + pfn = memory_bm_next_pfn(src); + while (pfn != BM_END_OF_MAP) { + memory_bm_set_bit(dst, pfn); + pfn = memory_bm_next_pfn(src); } - - /* Mark pages that correspond to the "original" pfns as "unsafe" */ - memory_bm_position_reset(bm); - do { - pfn = memory_bm_next_pfn(bm); - if (likely(pfn != BM_END_OF_MAP)) { - if (likely(pfn_valid(pfn))) - swsusp_set_page_free(pfn_to_page(pfn)); - else - return -EFAULT; - } - } while (pfn != BM_END_OF_MAP); - - allocated_unsafe_pages = 0; - - return 0; } -static void -duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) +/** + * mark_unsafe_pages - Mark pages that were used before hibernation. + * + * Mark the pages that cannot be used for storing the image during restoration, + * because they conflict with the pages that had been used before hibernation. + */ +static void mark_unsafe_pages(struct memory_bitmap *bm) { unsigned long pfn; - memory_bm_position_reset(src); - pfn = memory_bm_next_pfn(src); + /* Clear the "free"/"unsafe" bit for all PFNs */ + memory_bm_position_reset(free_pages_map); + pfn = memory_bm_next_pfn(free_pages_map); while (pfn != BM_END_OF_MAP) { - memory_bm_set_bit(dst, pfn); - pfn = memory_bm_next_pfn(src); + memory_bm_clear_current(free_pages_map); + pfn = memory_bm_next_pfn(free_pages_map); } + + /* Mark pages that correspond to the "original" PFNs as "unsafe" */ + duplicate_memory_bitmap(free_pages_map, bm); + + allocated_unsafe_pages = 0; } static int check_header(struct swsusp_info *info) @@ -2063,11 +2154,9 @@ static int check_header(struct swsusp_info *info) } /** - * load header - check the image header and copy data from it + * load header - Check the image header and copy the data from it. */ - -static int -load_header(struct swsusp_info *info) +static int load_header(struct swsusp_info *info) { int error; @@ -2081,8 +2170,12 @@ load_header(struct swsusp_info *info) } /** - * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set - * the corresponding bit in the memory bitmap @bm + * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap. + * @bm: Memory bitmap. + * @buf: Area of memory containing the PFNs. + * + * For each element of the array pointed to by @buf (1 page at a time), set the + * corresponding bit in @bm. */ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) { @@ -2095,7 +2188,7 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) /* Extract and buffer page key for data page (s390 only). */ page_key_memorize(buf + j); - if (memory_bm_pfn_present(bm, buf[j])) + if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) memory_bm_set_bit(bm, buf[j]); else return -EFAULT; @@ -2104,13 +2197,9 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) return 0; } -/* List of "safe" pages that may be used to store data loaded from the suspend - * image - */ -static struct linked_page *safe_pages_list; - #ifdef CONFIG_HIGHMEM -/* struct highmem_pbe is used for creating the list of highmem pages that +/* + * struct highmem_pbe is used for creating the list of highmem pages that * should be restored atomically during the resume from disk, because the page * frames they have occupied before the suspend are in use. */ @@ -2120,7 +2209,8 @@ struct highmem_pbe { struct highmem_pbe *next; }; -/* List of highmem PBEs needed for restoring the highmem pages that were +/* + * List of highmem PBEs needed for restoring the highmem pages that were * allocated before the suspend and included in the suspend image, but have * also been allocated by the "resume" kernel, so their contents cannot be * written directly to their "original" page frames. @@ -2128,11 +2218,11 @@ struct highmem_pbe { static struct highmem_pbe *highmem_pblist; /** - * count_highmem_image_pages - compute the number of highmem pages in the - * suspend image. The bits in the memory bitmap @bm that correspond to the - * image pages are assumed to be set. + * count_highmem_image_pages - Compute the number of highmem pages in the image. + * @bm: Memory bitmap. + * + * The bits in @bm that correspond to image pages are assumed to be set. */ - static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { unsigned long pfn; @@ -2149,24 +2239,25 @@ static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) return cnt; } -/** - * prepare_highmem_image - try to allocate as many highmem pages as - * there are highmem image pages (@nr_highmem_p points to the variable - * containing the number of highmem image pages). The pages that are - * "safe" (ie. will not be overwritten when the suspend image is - * restored) have the corresponding bits set in @bm (it must be - * unitialized). - * - * NOTE: This function should not be called if there are no highmem - * image pages. - */ - static unsigned int safe_highmem_pages; static struct memory_bitmap *safe_highmem_bm; -static int -prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) +/** + * prepare_highmem_image - Allocate memory for loading highmem data from image. + * @bm: Pointer to an uninitialized memory bitmap structure. + * @nr_highmem_p: Pointer to the number of highmem image pages. + * + * Try to allocate as many highmem pages as there are highmem image pages + * (@nr_highmem_p points to the variable containing the number of highmem image + * pages). The pages that are "safe" (ie. will not be overwritten when the + * hibernation image is restored entirely) have the corresponding bits set in + * @bm (it must be unitialized). + * + * NOTE: This function should not be called if there are no highmem image pages. + */ +static int prepare_highmem_image(struct memory_bitmap *bm, + unsigned int *nr_highmem_p) { unsigned int to_alloc; @@ -2201,39 +2292,42 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) return 0; } +static struct page *last_highmem_page; + /** - * get_highmem_page_buffer - for given highmem image page find the buffer - * that suspend_write_next() should set for its caller to write to. + * get_highmem_page_buffer - Prepare a buffer to store a highmem image page. * - * If the page is to be saved to its "original" page frame or a copy of - * the page is to be made in the highmem, @buffer is returned. Otherwise, - * the copy of the page is to be made in normal memory, so the address of - * the copy is returned. + * For a given highmem image page get a buffer that suspend_write_next() should + * return to its caller to write to. * - * If @buffer is returned, the caller of suspend_write_next() will write - * the page's contents to @buffer, so they will have to be copied to the - * right location on the next call to suspend_write_next() and it is done - * with the help of copy_last_highmem_page(). For this purpose, if - * @buffer is returned, @last_highmem page is set to the page to which - * the data will have to be copied from @buffer. + * If the page is to be saved to its "original" page frame or a copy of + * the page is to be made in the highmem, @buffer is returned. Otherwise, + * the copy of the page is to be made in normal memory, so the address of + * the copy is returned. + * + * If @buffer is returned, the caller of suspend_write_next() will write + * the page's contents to @buffer, so they will have to be copied to the + * right location on the next call to suspend_write_next() and it is done + * with the help of copy_last_highmem_page(). For this purpose, if + * @buffer is returned, @last_highmem_page is set to the page to which + * the data will have to be copied from @buffer. */ - -static struct page *last_highmem_page; - -static void * -get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +static void *get_highmem_page_buffer(struct page *page, + struct chain_allocator *ca) { struct highmem_pbe *pbe; void *kaddr; if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { - /* We have allocated the "original" page frame and we can + /* + * We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ last_highmem_page = page; return buffer; } - /* The "original" page frame has not been allocated and we have to + /* + * The "original" page frame has not been allocated and we have to * use a "safe" page frame to store the loaded page. */ pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); @@ -2263,11 +2357,12 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) } /** - * copy_last_highmem_page - copy the contents of a highmem image from - * @buffer, where the caller of snapshot_write_next() has place them, - * to the right location represented by @last_highmem_page . + * copy_last_highmem_page - Copy most the most recent highmem image page. + * + * Copy the contents of a highmem image from @buffer, where the caller of + * snapshot_write_next() has stored them, to the right location represented by + * @last_highmem_page . */ - static void copy_last_highmem_page(void) { if (last_highmem_page) { @@ -2294,17 +2389,13 @@ static inline void free_highmem_data(void) free_image_page(buffer, PG_UNSAFE_CLEAR); } #else -static unsigned int -count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } +static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } -static inline int -prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) -{ - return 0; -} +static inline int prepare_highmem_image(struct memory_bitmap *bm, + unsigned int *nr_highmem_p) { return 0; } -static inline void * -get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +static inline void *get_highmem_page_buffer(struct page *page, + struct chain_allocator *ca) { return ERR_PTR(-EINVAL); } @@ -2314,27 +2405,27 @@ static inline int last_highmem_page_copied(void) { return 1; } static inline void free_highmem_data(void) {} #endif /* CONFIG_HIGHMEM */ +#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) + /** - * prepare_image - use the memory bitmap @bm to mark the pages that will - * be overwritten in the process of restoring the system memory state - * from the suspend image ("unsafe" pages) and allocate memory for the - * image. + * prepare_image - Make room for loading hibernation image. + * @new_bm: Unitialized memory bitmap structure. + * @bm: Memory bitmap with unsafe pages marked. + * + * Use @bm to mark the pages that will be overwritten in the process of + * restoring the system memory state from the suspend image ("unsafe" pages) + * and allocate memory for the image. * - * The idea is to allocate a new memory bitmap first and then allocate - * as many pages as needed for the image data, but not to assign these - * pages to specific tasks initially. Instead, we just mark them as - * allocated and create a lists of "safe" pages that will be used - * later. On systems with high memory a list of "safe" highmem pages is - * also created. + * The idea is to allocate a new memory bitmap first and then allocate + * as many pages as needed for image data, but without specifying what those + * pages will be used for just yet. Instead, we mark them all as allocated and + * create a lists of "safe" pages to be used later. On systems with high + * memory a list of "safe" highmem pages is created too. */ - -#define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) - -static int -prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) +static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) { unsigned int nr_pages, nr_highmem; - struct linked_page *sp_list, *lp; + struct linked_page *lp; int error; /* If there is no highmem, the buffer will not be necessary */ @@ -2342,9 +2433,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) buffer = NULL; nr_highmem = count_highmem_image_pages(bm); - error = mark_unsafe_pages(bm); - if (error) - goto Free; + mark_unsafe_pages(bm); error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); if (error) @@ -2357,14 +2446,15 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) if (error) goto Free; } - /* Reserve some safe pages for potential later use. + /* + * Reserve some safe pages for potential later use. * * NOTE: This way we make sure there will be enough safe pages for the * chain_alloc() in get_buffer(). It is a bit wasteful, but * nr_copy_pages cannot be greater than 50% of the memory anyway. + * + * nr_copy_pages cannot be less than allocated_unsafe_pages too. */ - sp_list = NULL; - /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); while (nr_pages > 0) { @@ -2373,12 +2463,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) error = -ENOMEM; goto Free; } - lp->next = sp_list; - sp_list = lp; + lp->next = safe_pages_list; + safe_pages_list = lp; nr_pages--; } /* Preallocate memory for the image */ - safe_pages_list = NULL; nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; while (nr_pages > 0) { lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); @@ -2396,12 +2485,6 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) swsusp_set_page_free(virt_to_page(lp)); nr_pages--; } - /* Free the reserved safe pages so that chain_alloc() can use them */ - while (sp_list) { - lp = sp_list->next; - free_image_page(sp_list, PG_UNSAFE_CLEAR); - sp_list = lp; - } return 0; Free: @@ -2410,10 +2493,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) } /** - * get_buffer - compute the address that snapshot_write_next() should - * set for its caller to write to. + * get_buffer - Get the address to store the next image data page. + * + * Get the address that snapshot_write_next() should return to its caller to + * write to. */ - static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) { struct pbe *pbe; @@ -2428,12 +2512,14 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) return get_highmem_page_buffer(page, ca); if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) - /* We have allocated the "original" page frame and we can + /* + * We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ return page_address(page); - /* The "original" page frame has not been allocated and we have to + /* + * The "original" page frame has not been allocated and we have to * use a "safe" page frame to store the loaded page. */ pbe = chain_alloc(ca, sizeof(struct pbe)); @@ -2450,22 +2536,21 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) } /** - * snapshot_write_next - used for writing the system memory snapshot. + * snapshot_write_next - Get the address to store the next image page. + * @handle: Snapshot handle structure to guide the writing. * - * On the first call to it @handle should point to a zeroed - * snapshot_handle structure. The structure gets updated and a pointer - * to it should be passed to this function every next time. + * On the first call, @handle should point to a zeroed snapshot_handle + * structure. The structure gets populated then and a pointer to it should be + * passed to this function every next time. * - * On success the function returns a positive number. Then, the caller - * is allowed to write up to the returned number of bytes to the memory - * location computed by the data_of() macro. + * On success, the function returns a positive number. Then, the caller + * is allowed to write up to the returned number of bytes to the memory + * location computed by the data_of() macro. * - * The function returns 0 to indicate the "end of file" condition, - * and a negative number is returned on error. In such cases the - * structure pointed to by @handle is not updated and should not be used - * any more. + * The function returns 0 to indicate the "end of file" condition. Negative + * numbers are returned on errors, in which cases the structure pointed to by + * @handle is not updated and should not be used any more. */ - int snapshot_write_next(struct snapshot_handle *handle) { static struct chain_allocator ca; @@ -2491,6 +2576,8 @@ int snapshot_write_next(struct snapshot_handle *handle) if (error) return error; + safe_pages_list = NULL; + error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); if (error) return error; @@ -2500,6 +2587,7 @@ int snapshot_write_next(struct snapshot_handle *handle) if (error) return error; + hibernate_restore_protection_begin(); } else if (handle->cur <= nr_meta_pages + 1) { error = unpack_orig_pfns(buffer, ©_bm); if (error) @@ -2522,6 +2610,7 @@ int snapshot_write_next(struct snapshot_handle *handle) copy_last_highmem_page(); /* Restore page key for data page (s390 only). */ page_key_write(handle->buffer); + hibernate_restore_protect_page(handle->buffer); handle->buffer = get_buffer(&orig_bm, &ca); if (IS_ERR(handle->buffer)) return PTR_ERR(handle->buffer); @@ -2533,22 +2622,23 @@ int snapshot_write_next(struct snapshot_handle *handle) } /** - * snapshot_write_finalize - must be called after the last call to - * snapshot_write_next() in case the last page in the image happens - * to be a highmem page and its contents should be stored in the - * highmem. Additionally, it releases the memory that will not be - * used any more. + * snapshot_write_finalize - Complete the loading of a hibernation image. + * + * Must be called after the last call to snapshot_write_next() in case the last + * page in the image happens to be a highmem page and its contents should be + * stored in highmem. Additionally, it recycles bitmap memory that's not + * necessary any more. */ - void snapshot_write_finalize(struct snapshot_handle *handle) { copy_last_highmem_page(); /* Restore page key for data page (s390 only). */ page_key_write(handle->buffer); page_key_free(); - /* Free only if we have loaded the image entirely */ + hibernate_restore_protect_page(handle->buffer); + /* Do that only if we have loaded the image entirely */ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { - memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); + memory_bm_recycle(&orig_bm); free_highmem_data(); } } @@ -2561,8 +2651,8 @@ int snapshot_image_loaded(struct snapshot_handle *handle) #ifdef CONFIG_HIGHMEM /* Assumes that @buf is ready and points to a "safe" page */ -static inline void -swap_two_pages_data(struct page *p1, struct page *p2, void *buf) +static inline void swap_two_pages_data(struct page *p1, struct page *p2, + void *buf) { void *kaddr1, *kaddr2; @@ -2576,15 +2666,15 @@ swap_two_pages_data(struct page *p1, struct page *p2, void *buf) } /** - * restore_highmem - for each highmem page that was allocated before - * the suspend and included in the suspend image, and also has been - * allocated by the "resume" kernel swap its current (ie. "before - * resume") contents with the previous (ie. "before suspend") one. + * restore_highmem - Put highmem image pages into their original locations. + * + * For each highmem page that was in use before hibernation and is included in + * the image, and also has been allocated by the "restore" kernel, swap its + * current contents with the previous (ie. "before hibernation") ones. * - * If the resume eventually fails, we can call this function once - * again and restore the "before resume" highmem state. + * If the restore eventually fails, we can call this function once again and + * restore the highmem state as seen by the restore kernel. */ - int restore_highmem(void) { struct highmem_pbe *pbe = highmem_pblist; diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 5b70d64b871e..0acab9d7f96f 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -266,16 +266,18 @@ static int suspend_test(int level) */ static int suspend_prepare(suspend_state_t state) { - int error; + int error, nr_calls = 0; if (!sleep_state_supported(state)) return -EPERM; pm_prepare_console(); - error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); - if (error) + error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls); + if (error) { + nr_calls--; goto Finish; + } trace_suspend_resume(TPS("freeze_processes"), 0, true); error = suspend_freeze_processes(); @@ -286,7 +288,7 @@ static int suspend_prepare(suspend_state_t state) suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); Finish: - pm_notifier_call_chain(PM_POST_SUSPEND); + __pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL); pm_restore_console(); return error; } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index c1aaac431055..a3b1e617bcdc 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -350,6 +350,12 @@ static int swsusp_swap_check(void) if (res < 0) blkdev_put(hib_resume_bdev, FMODE_WRITE); + /* + * Update the resume device to the one actually used, + * so the test_resume mode can use it in case it is + * invoked from hibernate() to test the snapshot. + */ + swsusp_resume_device = hib_resume_bdev->bd_dev; return res; } diff --git a/kernel/power/user.c b/kernel/power/user.c index 526e8911460a..35310b627388 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -47,7 +47,7 @@ atomic_t snapshot_device_available = ATOMIC_INIT(1); static int snapshot_open(struct inode *inode, struct file *filp) { struct snapshot_data *data; - int error; + int error, nr_calls = 0; if (!hibernation_available()) return -EPERM; @@ -74,9 +74,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; data->free_bitmaps = false; - error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); if (error) - pm_notifier_call_chain(PM_POST_HIBERNATION); + __pm_notifier_call_chain(PM_POST_HIBERNATION, --nr_calls, NULL); } else { /* * Resuming. We may need to wait for the image device to @@ -86,13 +86,15 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = -1; data->mode = O_WRONLY; - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); if (!error) { error = create_basic_memory_bitmaps(); data->free_bitmaps = !error; - } + } else + nr_calls--; + if (error) - pm_notifier_call_chain(PM_POST_RESTORE); + __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); } if (error) atomic_inc(&snapshot_device_available); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 14c4aa25cc45..a84641b222c1 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -47,6 +47,8 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; + unsigned int cached_raw_freq; + /* The fields below are only needed when sharing a policy. */ unsigned long util; unsigned long max; @@ -106,7 +108,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, /** * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @policy: cpufreq policy object to compute the new frequency for. + * @sg_cpu: schedutil cpu object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * @@ -121,14 +123,25 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, * next_freq = C * curr_freq * util_raw / max * * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. + * + * The lowest driver-supported frequency which is equal or greater than the raw + * next_freq (as calculated above) is returned, subject to policy min/max and + * cpufreq driver limitations. */ -static unsigned int get_next_freq(struct cpufreq_policy *policy, - unsigned long util, unsigned long max) +static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, + unsigned long max) { + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; - return (freq + (freq >> 2)) * util / max; + freq = (freq + (freq >> 2)) * util / max; + + if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + return sg_policy->next_freq; + sg_cpu->cached_raw_freq = freq; + return cpufreq_driver_resolve_freq(policy, freq); } static void sugov_update_single(struct update_util_data *hook, u64 time, @@ -143,13 +156,14 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, return; next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : - get_next_freq(policy, util, max); + get_next_freq(sg_cpu, util, max); sugov_update_commit(sg_policy, time, next_f); } -static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy, +static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, unsigned long util, unsigned long max) { + struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; unsigned int max_f = policy->cpuinfo.max_freq; u64 last_freq_update_time = sg_policy->last_freq_update_time; @@ -189,7 +203,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_policy *sg_policy, } } - return get_next_freq(policy, util, max); + return get_next_freq(sg_cpu, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, @@ -206,7 +220,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, sg_cpu->last_update = time; if (sugov_should_update_freq(sg_policy, time)) { - next_f = sugov_next_freq_shared(sg_policy, util, max); + next_f = sugov_next_freq_shared(sg_cpu, util, max); sugov_update_commit(sg_policy, time, next_f); } @@ -394,7 +408,7 @@ static int sugov_init(struct cpufreq_policy *policy) return ret; } -static int sugov_exit(struct cpufreq_policy *policy) +static void sugov_exit(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; struct sugov_tunables *tunables = sg_policy->tunables; @@ -412,7 +426,6 @@ static int sugov_exit(struct cpufreq_policy *policy) mutex_unlock(&global_tunables_lock); sugov_policy_free(sg_policy); - return 0; } static int sugov_start(struct cpufreq_policy *policy) @@ -434,6 +447,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->util = ULONG_MAX; sg_cpu->max = 0; sg_cpu->last_update = 0; + sg_cpu->cached_raw_freq = 0; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, sugov_update_shared); } else { @@ -444,7 +458,7 @@ static int sugov_start(struct cpufreq_policy *policy) return 0; } -static int sugov_stop(struct cpufreq_policy *policy) +static void sugov_stop(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; unsigned int cpu; @@ -456,53 +470,29 @@ static int sugov_stop(struct cpufreq_policy *policy) irq_work_sync(&sg_policy->irq_work); cancel_work_sync(&sg_policy->work); - return 0; } -static int sugov_limits(struct cpufreq_policy *policy) +static void sugov_limits(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; if (!policy->fast_switch_enabled) { mutex_lock(&sg_policy->work_lock); - - if (policy->max < policy->cur) - __cpufreq_driver_target(policy, policy->max, - CPUFREQ_RELATION_H); - else if (policy->min > policy->cur) - __cpufreq_driver_target(policy, policy->min, - CPUFREQ_RELATION_L); - + cpufreq_policy_apply_limits(policy); mutex_unlock(&sg_policy->work_lock); } sg_policy->need_freq_update = true; - return 0; -} - -int sugov_governor(struct cpufreq_policy *policy, unsigned int event) -{ - if (event == CPUFREQ_GOV_POLICY_INIT) { - return sugov_init(policy); - } else if (policy->governor_data) { - switch (event) { - case CPUFREQ_GOV_POLICY_EXIT: - return sugov_exit(policy); - case CPUFREQ_GOV_START: - return sugov_start(policy); - case CPUFREQ_GOV_STOP: - return sugov_stop(policy); - case CPUFREQ_GOV_LIMITS: - return sugov_limits(policy); - } - } - return -EINVAL; } static struct cpufreq_governor schedutil_gov = { .name = "schedutil", - .governor = sugov_governor, .owner = THIS_MODULE, + .init = sugov_init, + .exit = sugov_exit, + .start = sugov_start, + .stop = sugov_stop, + .limits = sugov_limits, }; static int __init sugov_module_init(void) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 97e7b793df35..d12bd958077e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4369,8 +4369,8 @@ static void show_pwq(struct pool_workqueue *pwq) /** * show_workqueue_state - dump workqueue state * - * Called from a sysrq handler and prints out all busy workqueues and - * pools. + * Called from a sysrq handler or try_to_freeze_tasks() and prints out + * all busy workqueues and pools. */ void show_workqueue_state(void) { diff --git a/scripts/analyze_suspend.py b/scripts/analyze_suspend.py index 93e1fd40f430..a0ba48fa2c5e 100755 --- a/scripts/analyze_suspend.py +++ b/scripts/analyze_suspend.py @@ -19,6 +19,17 @@ # Authors: # Todd Brandt <todd.e.brandt@linux.intel.com> # +# Links: +# Home Page +# https://01.org/suspendresume +# Source repo +# https://github.com/01org/suspendresume +# Documentation +# Getting Started +# https://01.org/suspendresume/documentation/getting-started +# Command List: +# https://01.org/suspendresume/documentation/command-list +# # Description: # This tool is designed to assist kernel and OS developers in optimizing # their linux stack's suspend/resume time. Using a kernel image built @@ -35,6 +46,8 @@ # CONFIG_FTRACE=y # CONFIG_FUNCTION_TRACER=y # CONFIG_FUNCTION_GRAPH_TRACER=y +# CONFIG_KPROBES=y +# CONFIG_KPROBES_ON_FTRACE=y # # For kernel versions older than 3.15: # The following additional kernel parameters are required: @@ -52,6 +65,7 @@ import re import platform from datetime import datetime import struct +import ConfigParser # ----------------- CLASSES -------------------- @@ -60,8 +74,15 @@ import struct # A global, single-instance container used to # store system values and test parameters class SystemValues: - version = 3.0 + ansi = False + version = '4.2' verbose = False + addlogs = False + mindevlen = 0.001 + mincglen = 1.0 + srgap = 0 + cgexp = False + outdir = '' testdir = '.' tpath = '/sys/kernel/debug/tracing/' fpdtpath = '/sys/firmware/acpi/tables/FPDT' @@ -71,26 +92,21 @@ class SystemValues: 'device_pm_callback_end', 'device_pm_callback_start' ] - modename = { - 'freeze': 'Suspend-To-Idle (S0)', - 'standby': 'Power-On Suspend (S1)', - 'mem': 'Suspend-to-RAM (S3)', - 'disk': 'Suspend-to-disk (S4)' - } + testcommand = '' mempath = '/dev/mem' powerfile = '/sys/power/state' suspendmode = 'mem' hostname = 'localhost' prefix = 'test' teststamp = '' + dmesgstart = 0.0 dmesgfile = '' ftracefile = '' htmlfile = '' + embedded = False rtcwake = False rtcwaketime = 10 rtcpath = '' - android = False - adb = 'adb' devicefilter = [] stamp = 0 execcount = 1 @@ -98,16 +114,90 @@ class SystemValues: usecallgraph = False usetraceevents = False usetraceeventsonly = False + usetracemarkers = True + usekprobes = True + usedevsrc = False notestrun = False - altdevname = dict() + devprops = dict() postresumetime = 0 + devpropfmt = '# Device Properties: .*' tracertypefmt = '# tracer: (?P<t>.*)' firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$' postresumefmt = '# post resume time (?P<t>[0-9]*)$' stampfmt = '# suspend-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\ '(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\ ' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$' + kprobecolor = 'rgba(204,204,204,0.5)' + synccolor = 'rgba(204,204,204,0.5)' + debugfuncs = [] + tracefuncs = { + 'sys_sync': dict(), + 'pm_prepare_console': dict(), + 'pm_notifier_call_chain': dict(), + 'freeze_processes': dict(), + 'freeze_kernel_threads': dict(), + 'pm_restrict_gfp_mask': dict(), + 'acpi_suspend_begin': dict(), + 'suspend_console': dict(), + 'acpi_pm_prepare': dict(), + 'syscore_suspend': dict(), + 'arch_enable_nonboot_cpus_end': dict(), + 'syscore_resume': dict(), + 'acpi_pm_finish': dict(), + 'resume_console': dict(), + 'acpi_pm_end': dict(), + 'pm_restore_gfp_mask': dict(), + 'thaw_processes': dict(), + 'pm_restore_console': dict(), + 'CPU_OFF': { + 'func':'_cpu_down', + 'args_x86_64': {'cpu':'%di:s32'}, + 'format': 'CPU_OFF[{cpu}]', + 'mask': 'CPU_.*_DOWN' + }, + 'CPU_ON': { + 'func':'_cpu_up', + 'args_x86_64': {'cpu':'%di:s32'}, + 'format': 'CPU_ON[{cpu}]', + 'mask': 'CPU_.*_UP' + }, + } + dev_tracefuncs = { + # general wait/delay/sleep + 'msleep': { 'args_x86_64': {'time':'%di:s32'} }, + 'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'} }, + 'acpi_os_stall': dict(), + # ACPI + 'acpi_resume_power_resources': dict(), + 'acpi_ps_parse_aml': dict(), + # filesystem + 'ext4_sync_fs': dict(), + # ATA + 'ata_eh_recover': { 'args_x86_64': {'port':'+36(%di):s32'} }, + # i915 + 'i915_gem_restore_gtt_mappings': dict(), + 'intel_opregion_setup': dict(), + 'intel_dp_detect': dict(), + 'intel_hdmi_detect': dict(), + 'intel_opregion_init': dict(), + } + kprobes_postresume = [ + { + 'name': 'ataportrst', + 'func': 'ata_eh_recover', + 'args': {'port':'+36(%di):s32'}, + 'format': 'ata{port}_port_reset', + 'mask': 'ata.*_port_reset' + } + ] + kprobes = dict() + timeformat = '%.3f' def __init__(self): + # if this is a phoronix test run, set some default options + if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ): + self.embedded = True + self.addlogs = True + self.htmlfile = os.environ['LOG_FILE'] self.hostname = platform.node() if(self.hostname == ''): self.hostname = 'localhost' @@ -118,6 +208,12 @@ class SystemValues: if os.path.exists(rtc) and os.path.exists(rtc+'/date') and \ os.path.exists(rtc+'/time') and os.path.exists(rtc+'/wakealarm'): self.rtcpath = rtc + if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()): + self.ansi = True + def setPrecision(self, num): + if num < 0 or num > 6: + return + self.timeformat = '%.{0}f'.format(num) def setOutputFile(self): if((self.htmlfile == '') and (self.dmesgfile != '')): m = re.match('(?P<name>.*)_dmesg\.txt$', self.dmesgfile) @@ -129,32 +225,37 @@ class SystemValues: self.htmlfile = m.group('name')+'.html' if(self.htmlfile == ''): self.htmlfile = 'output.html' - def initTestOutput(self, subdir): - if(not self.android): - self.prefix = self.hostname - v = open('/proc/version', 'r').read().strip() - kver = string.split(v)[2] - else: - self.prefix = 'android' - v = os.popen(self.adb+' shell cat /proc/version').read().strip() - kver = string.split(v)[2] - testtime = datetime.now().strftime('suspend-%m%d%y-%H%M%S') + def initTestOutput(self, subdir, testpath=''): + self.prefix = self.hostname + v = open('/proc/version', 'r').read().strip() + kver = string.split(v)[2] + n = datetime.now() + testtime = n.strftime('suspend-%m%d%y-%H%M%S') + if not testpath: + testpath = n.strftime('suspend-%y%m%d-%H%M%S') if(subdir != "."): - self.testdir = subdir+"/"+testtime + self.testdir = subdir+"/"+testpath else: - self.testdir = testtime + self.testdir = testpath self.teststamp = \ '# '+testtime+' '+self.prefix+' '+self.suspendmode+' '+kver + if(self.embedded): + self.dmesgfile = \ + '/tmp/'+testtime+'_'+self.suspendmode+'_dmesg.txt' + self.ftracefile = \ + '/tmp/'+testtime+'_'+self.suspendmode+'_ftrace.txt' + return self.dmesgfile = \ self.testdir+'/'+self.prefix+'_'+self.suspendmode+'_dmesg.txt' self.ftracefile = \ self.testdir+'/'+self.prefix+'_'+self.suspendmode+'_ftrace.txt' self.htmlfile = \ self.testdir+'/'+self.prefix+'_'+self.suspendmode+'.html' - os.mkdir(self.testdir) + if not os.path.isdir(self.testdir): + os.mkdir(self.testdir) def setDeviceFilter(self, devnames): self.devicefilter = string.split(devnames) - def rtcWakeAlarm(self): + def rtcWakeAlarmOn(self): os.system('echo 0 > '+self.rtcpath+'/wakealarm') outD = open(self.rtcpath+'/date', 'r').read().strip() outT = open(self.rtcpath+'/time', 'r').read().strip() @@ -172,9 +273,361 @@ class SystemValues: nowtime = int(datetime.now().strftime('%s')) alarm = nowtime + self.rtcwaketime os.system('echo %d > %s/wakealarm' % (alarm, self.rtcpath)) + def rtcWakeAlarmOff(self): + os.system('echo 0 > %s/wakealarm' % self.rtcpath) + def initdmesg(self): + # get the latest time stamp from the dmesg log + fp = os.popen('dmesg') + ktime = '0' + for line in fp: + line = line.replace('\r\n', '') + idx = line.find('[') + if idx > 1: + line = line[idx:] + m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + if(m): + ktime = m.group('ktime') + fp.close() + self.dmesgstart = float(ktime) + def getdmesg(self): + # store all new dmesg lines since initdmesg was called + fp = os.popen('dmesg') + op = open(self.dmesgfile, 'a') + for line in fp: + line = line.replace('\r\n', '') + idx = line.find('[') + if idx > 1: + line = line[idx:] + m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + if(not m): + continue + ktime = float(m.group('ktime')) + if ktime > self.dmesgstart: + op.write(line) + fp.close() + op.close() + def addFtraceFilterFunctions(self, file): + fp = open(file) + list = fp.read().split('\n') + fp.close() + for i in list: + if len(i) < 2: + continue + self.tracefuncs[i] = dict() + def getFtraceFilterFunctions(self, current): + rootCheck(True) + if not current: + os.system('cat '+self.tpath+'available_filter_functions') + return + fp = open(self.tpath+'available_filter_functions') + master = fp.read().split('\n') + fp.close() + if len(self.debugfuncs) > 0: + for i in self.debugfuncs: + if i in master: + print i + else: + print self.colorText(i) + else: + for i in self.tracefuncs: + if 'func' in self.tracefuncs[i]: + i = self.tracefuncs[i]['func'] + if i in master: + print i + else: + print self.colorText(i) + def setFtraceFilterFunctions(self, list): + fp = open(self.tpath+'available_filter_functions') + master = fp.read().split('\n') + fp.close() + flist = '' + for i in list: + if i not in master: + continue + if ' [' in i: + flist += i.split(' ')[0]+'\n' + else: + flist += i+'\n' + fp = open(self.tpath+'set_graph_function', 'w') + fp.write(flist) + fp.close() + def kprobeMatch(self, name, target): + if name not in self.kprobes: + return False + if re.match(self.kprobes[name]['mask'], target): + return True + return False + def basicKprobe(self, name): + self.kprobes[name] = {'name': name,'func': name,'args': dict(),'format': name,'mask': name} + def defaultKprobe(self, name, kdata): + k = kdata + for field in ['name', 'format', 'mask', 'func']: + if field not in k: + k[field] = name + archargs = 'args_'+platform.machine() + if archargs in k: + k['args'] = k[archargs] + else: + k['args'] = dict() + k['format'] = name + self.kprobes[name] = k + def kprobeColor(self, name): + if name not in self.kprobes or 'color' not in self.kprobes[name]: + return '' + return self.kprobes[name]['color'] + def kprobeDisplayName(self, name, dataraw): + if name not in self.kprobes: + self.basicKprobe(name) + data = '' + quote=0 + # first remvoe any spaces inside quotes, and the quotes + for c in dataraw: + if c == '"': + quote = (quote + 1) % 2 + if quote and c == ' ': + data += '_' + elif c != '"': + data += c + fmt, args = self.kprobes[name]['format'], self.kprobes[name]['args'] + arglist = dict() + # now process the args + for arg in sorted(args): + arglist[arg] = '' + m = re.match('.* '+arg+'=(?P<arg>.*) ', data); + if m: + arglist[arg] = m.group('arg') + else: + m = re.match('.* '+arg+'=(?P<arg>.*)', data); + if m: + arglist[arg] = m.group('arg') + out = fmt.format(**arglist) + out = out.replace(' ', '_').replace('"', '') + return out + def kprobeText(self, kprobe): + name, fmt, func, args = kprobe['name'], kprobe['format'], kprobe['func'], kprobe['args'] + if re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', func): + doError('Kprobe "%s" has format info in the function name "%s"' % (name, func), False) + for arg in re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', fmt): + if arg not in args: + doError('Kprobe "%s" is missing argument "%s"' % (name, arg), False) + val = 'p:%s_cal %s' % (name, func) + for i in sorted(args): + val += ' %s=%s' % (i, args[i]) + val += '\nr:%s_ret %s $retval\n' % (name, func) + return val + def addKprobes(self): + # first test each kprobe + print('INITIALIZING KPROBES...') + rejects = [] + for name in sorted(self.kprobes): + if not self.testKprobe(self.kprobes[name]): + rejects.append(name) + # remove all failed ones from the list + for name in rejects: + vprint('Skipping KPROBE: %s' % name) + self.kprobes.pop(name) + self.fsetVal('', 'kprobe_events') + kprobeevents = '' + # set the kprobes all at once + for kp in self.kprobes: + val = self.kprobeText(self.kprobes[kp]) + vprint('Adding KPROBE: %s\n%s' % (kp, val.strip())) + kprobeevents += self.kprobeText(self.kprobes[kp]) + self.fsetVal(kprobeevents, 'kprobe_events') + # verify that the kprobes were set as ordered + check = self.fgetVal('kprobe_events') + linesout = len(kprobeevents.split('\n')) + linesack = len(check.split('\n')) + if linesack < linesout: + # if not, try appending the kprobes 1 by 1 + for kp in self.kprobes: + kprobeevents = self.kprobeText(self.kprobes[kp]) + self.fsetVal(kprobeevents, 'kprobe_events', 'a') + self.fsetVal('1', 'events/kprobes/enable') + def testKprobe(self, kprobe): + kprobeevents = self.kprobeText(kprobe) + if not kprobeevents: + return False + try: + self.fsetVal(kprobeevents, 'kprobe_events') + check = self.fgetVal('kprobe_events') + except: + return False + linesout = len(kprobeevents.split('\n')) + linesack = len(check.split('\n')) + if linesack < linesout: + return False + return True + def fsetVal(self, val, path, mode='w'): + file = self.tpath+path + if not os.path.exists(file): + return False + try: + fp = open(file, mode) + fp.write(val) + fp.close() + except: + pass + return True + def fgetVal(self, path): + file = self.tpath+path + res = '' + if not os.path.exists(file): + return res + try: + fp = open(file, 'r') + res = fp.read() + fp.close() + except: + pass + return res + def cleanupFtrace(self): + if(self.usecallgraph or self.usetraceevents): + self.fsetVal('0', 'events/kprobes/enable') + self.fsetVal('', 'kprobe_events') + def setupAllKprobes(self): + for name in self.tracefuncs: + self.defaultKprobe(name, self.tracefuncs[name]) + for name in self.dev_tracefuncs: + self.defaultKprobe(name, self.dev_tracefuncs[name]) + def isCallgraphFunc(self, name): + if len(self.debugfuncs) < 1 and self.suspendmode == 'command': + return True + if name in self.debugfuncs: + return True + funclist = [] + for i in self.tracefuncs: + if 'func' in self.tracefuncs[i]: + funclist.append(self.tracefuncs[i]['func']) + else: + funclist.append(i) + if name in funclist: + return True + return False + def initFtrace(self, testing=False): + tp = self.tpath + print('INITIALIZING FTRACE...') + # turn trace off + self.fsetVal('0', 'tracing_on') + self.cleanupFtrace() + # set the trace clock to global + self.fsetVal('global', 'trace_clock') + # set trace buffer to a huge value + self.fsetVal('nop', 'current_tracer') + self.fsetVal('100000', 'buffer_size_kb') + # go no further if this is just a status check + if testing: + return + if self.usekprobes: + # add tracefunc kprobes so long as were not using full callgraph + if(not self.usecallgraph or len(self.debugfuncs) > 0): + for name in self.tracefuncs: + self.defaultKprobe(name, self.tracefuncs[name]) + if self.usedevsrc: + for name in self.dev_tracefuncs: + self.defaultKprobe(name, self.dev_tracefuncs[name]) + else: + self.usedevsrc = False + self.addKprobes() + # initialize the callgraph trace, unless this is an x2 run + if(self.usecallgraph): + # set trace type + self.fsetVal('function_graph', 'current_tracer') + self.fsetVal('', 'set_ftrace_filter') + # set trace format options + self.fsetVal('print-parent', 'trace_options') + self.fsetVal('funcgraph-abstime', 'trace_options') + self.fsetVal('funcgraph-cpu', 'trace_options') + self.fsetVal('funcgraph-duration', 'trace_options') + self.fsetVal('funcgraph-proc', 'trace_options') + self.fsetVal('funcgraph-tail', 'trace_options') + self.fsetVal('nofuncgraph-overhead', 'trace_options') + self.fsetVal('context-info', 'trace_options') + self.fsetVal('graph-time', 'trace_options') + self.fsetVal('0', 'max_graph_depth') + if len(self.debugfuncs) > 0: + self.setFtraceFilterFunctions(self.debugfuncs) + elif self.suspendmode == 'command': + self.fsetVal('', 'set_graph_function') + else: + cf = ['dpm_run_callback'] + if(self.usetraceeventsonly): + cf += ['dpm_prepare', 'dpm_complete'] + for fn in self.tracefuncs: + if 'func' in self.tracefuncs[fn]: + cf.append(self.tracefuncs[fn]['func']) + else: + cf.append(fn) + self.setFtraceFilterFunctions(cf) + if(self.usetraceevents): + # turn trace events on + events = iter(self.traceevents) + for e in events: + self.fsetVal('1', 'events/power/'+e+'/enable') + # clear the trace buffer + self.fsetVal('', 'trace') + def verifyFtrace(self): + # files needed for any trace data + files = ['buffer_size_kb', 'current_tracer', 'trace', 'trace_clock', + 'trace_marker', 'trace_options', 'tracing_on'] + # files needed for callgraph trace data + tp = self.tpath + if(self.usecallgraph): + files += [ + 'available_filter_functions', + 'set_ftrace_filter', + 'set_graph_function' + ] + for f in files: + if(os.path.exists(tp+f) == False): + return False + return True + def verifyKprobes(self): + # files needed for kprobes to work + files = ['kprobe_events', 'events'] + tp = self.tpath + for f in files: + if(os.path.exists(tp+f) == False): + return False + return True + def colorText(self, str): + if not self.ansi: + return str + return '\x1B[31;40m'+str+'\x1B[m' sysvals = SystemValues() +# Class: DevProps +# Description: +# Simple class which holds property values collected +# for all the devices used in the timeline. +class DevProps: + syspath = '' + altname = '' + async = True + xtraclass = '' + xtrainfo = '' + def out(self, dev): + return '%s,%s,%d;' % (dev, self.altname, self.async) + def debug(self, dev): + print '%s:\n\taltname = %s\n\t async = %s' % (dev, self.altname, self.async) + def altName(self, dev): + if not self.altname or self.altname == dev: + return dev + return '%s [%s]' % (self.altname, dev) + def xtraClass(self): + if self.xtraclass: + return ' '+self.xtraclass + if not self.async: + return ' sync' + return '' + def xtraInfo(self): + if self.xtraclass: + return ' '+self.xtraclass + if self.async: + return ' async' + return ' sync' + # Class: DeviceNode # Description: # A container used to create a device hierachy, with a single root node @@ -228,6 +681,7 @@ class Data: html_device_id = 0 stamp = 0 outfile = '' + dev_ubiquitous = ['msleep', 'udelay'] def __init__(self, num): idchar = 'abcdefghijklmnopqrstuvwxyz' self.testnumber = num @@ -257,6 +711,9 @@ class Data: 'row': 0, 'color': '#FFFFCC', 'order': 9} } self.phases = self.sortedPhases() + self.devicegroups = [] + for phase in self.phases: + self.devicegroups.append([phase]) def getStart(self): return self.dmesg[self.phases[0]]['start'] def setStart(self, time): @@ -273,51 +730,61 @@ class Data: for dev in list: d = list[dev] if(d['pid'] == pid and time >= d['start'] and - time <= d['end']): + time < d['end']): return False return True - def addIntraDevTraceEvent(self, action, name, pid, time): - if(action == 'mutex_lock_try'): - color = 'red' - elif(action == 'mutex_lock_pass'): - color = 'green' - elif(action == 'mutex_unlock'): - color = 'blue' - else: - # create separate colors based on the name - v1 = len(name)*10 % 256 - v2 = string.count(name, 'e')*100 % 256 - v3 = ord(name[0])*20 % 256 - color = '#%06X' % ((v1*0x10000) + (v2*0x100) + v3) - for phase in self.phases: - list = self.dmesg[phase]['list'] - for dev in list: - d = list[dev] - if(d['pid'] == pid and time >= d['start'] and - time <= d['end']): - e = TraceEvent(action, name, color, time) - if('traceevents' not in d): - d['traceevents'] = [] - d['traceevents'].append(e) - return d - break - return 0 - def capIntraDevTraceEvent(self, action, name, pid, time): - for phase in self.phases: + def targetDevice(self, phaselist, start, end, pid=-1): + tgtdev = '' + for phase in phaselist: list = self.dmesg[phase]['list'] - for dev in list: - d = list[dev] - if(d['pid'] == pid and time >= d['start'] and - time <= d['end']): - if('traceevents' not in d): - return - for e in d['traceevents']: - if(e.action == action and - e.name == name and not e.ready): - e.length = time - e.time - e.ready = True - break - return + for devname in list: + dev = list[devname] + if(pid >= 0 and dev['pid'] != pid): + continue + devS = dev['start'] + devE = dev['end'] + if(start < devS or start >= devE or end <= devS or end > devE): + continue + tgtdev = dev + break + return tgtdev + def addDeviceFunctionCall(self, displayname, kprobename, proc, pid, start, end, cdata, rdata): + machstart = self.dmesg['suspend_machine']['start'] + machend = self.dmesg['resume_machine']['end'] + tgtdev = self.targetDevice(self.phases, start, end, pid) + if not tgtdev and start >= machstart and end < machend: + # device calls in machine phases should be serial + tgtdev = self.targetDevice(['suspend_machine', 'resume_machine'], start, end) + if not tgtdev: + if 'scsi_eh' in proc: + self.newActionGlobal(proc, start, end, pid) + self.addDeviceFunctionCall(displayname, kprobename, proc, pid, start, end, cdata, rdata) + else: + vprint('IGNORE: %s[%s](%d) [%f - %f] | %s | %s | %s' % (displayname, kprobename, + pid, start, end, cdata, rdata, proc)) + return False + # detail block fits within tgtdev + if('src' not in tgtdev): + tgtdev['src'] = [] + title = cdata+' '+rdata + mstr = '\(.*\) *(?P<args>.*) *\((?P<caller>.*)\+.* arg1=(?P<ret>.*)' + m = re.match(mstr, title) + if m: + c = m.group('caller') + a = m.group('args').strip() + r = m.group('ret') + if len(r) > 6: + r = '' + else: + r = 'ret=%s ' % r + l = '%0.3fms' % ((end - start) * 1000) + if kprobename in self.dev_ubiquitous: + title = '%s(%s) <- %s, %s(%s)' % (displayname, a, c, r, l) + else: + title = '%s(%s) %s(%s)' % (displayname, a, r, l) + e = TraceEvent(title, kprobename, start, end - start) + tgtdev['src'].append(e) + return True def trimTimeVal(self, t, t0, dT, left): if left: if(t > t0): @@ -353,11 +820,11 @@ class Data: cg.end = self.trimTimeVal(cg.end, t0, dT, left) for line in cg.list: line.time = self.trimTimeVal(line.time, t0, dT, left) - if('traceevents' in d): - for e in d['traceevents']: + if('src' in d): + for e in d['src']: e.time = self.trimTimeVal(e.time, t0, dT, left) def normalizeTime(self, tZero): - # first trim out any standby or freeze clock time + # trim out any standby or freeze clock time if(self.tSuspended != self.tResumed): if(self.tResumed > tZero): self.trimTime(self.tSuspended, \ @@ -365,29 +832,6 @@ class Data: else: self.trimTime(self.tSuspended, \ self.tResumed-self.tSuspended, False) - # shift the timeline so that tZero is the new 0 - self.tSuspended -= tZero - self.tResumed -= tZero - self.start -= tZero - self.end -= tZero - for phase in self.phases: - p = self.dmesg[phase] - p['start'] -= tZero - p['end'] -= tZero - list = p['list'] - for name in list: - d = list[name] - d['start'] -= tZero - d['end'] -= tZero - if('ftrace' in d): - cg = d['ftrace'] - cg.start -= tZero - cg.end -= tZero - for line in cg.list: - line.time -= tZero - if('traceevents' in d): - for e in d['traceevents']: - e.time -= tZero def newPhaseWithSingleAction(self, phasename, devname, start, end, color): for phase in self.phases: self.dmesg[phase]['order'] += 1 @@ -417,6 +861,7 @@ class Data: {'list': list, 'start': start, 'end': end, 'row': 0, 'color': color, 'order': order} self.phases = self.sortedPhases() + self.devicegroups.append([phasename]) def setPhase(self, phase, ktime, isbegin): if(isbegin): self.dmesg[phase]['start'] = ktime @@ -442,7 +887,10 @@ class Data: for devname in phaselist: dev = phaselist[devname] if(dev['end'] < 0): - dev['end'] = end + for p in self.phases: + if self.dmesg[p]['end'] > dev['start']: + dev['end'] = self.dmesg[p]['end'] + break vprint('%s (%s): callback didnt return' % (devname, phase)) def deviceFilter(self, devicefilter): # remove all by the relatives of the filter devnames @@ -472,22 +920,58 @@ class Data: # if any calls never returned, clip them at system resume end for phase in self.phases: self.fixupInitcalls(phase, self.getEnd()) - def newActionGlobal(self, name, start, end): + def isInsideTimeline(self, start, end): + if(self.start <= start and self.end > start): + return True + return False + def phaseOverlap(self, phases): + rmgroups = [] + newgroup = [] + for group in self.devicegroups: + for phase in phases: + if phase not in group: + continue + for p in group: + if p not in newgroup: + newgroup.append(p) + if group not in rmgroups: + rmgroups.append(group) + for group in rmgroups: + self.devicegroups.remove(group) + self.devicegroups.append(newgroup) + def newActionGlobal(self, name, start, end, pid=-1, color=''): + # if event starts before timeline start, expand timeline + if(start < self.start): + self.setStart(start) + # if event ends after timeline end, expand the timeline + if(end > self.end): + self.setEnd(end) # which phase is this device callback or action "in" targetphase = "none" + htmlclass = '' overlap = 0.0 + phases = [] for phase in self.phases: pstart = self.dmesg[phase]['start'] pend = self.dmesg[phase]['end'] o = max(0, min(end, pend) - max(start, pstart)) - if(o > overlap): + if o > 0: + phases.append(phase) + if o > overlap: + if overlap > 0 and phase == 'post_resume': + continue targetphase = phase overlap = o + if pid == -2: + htmlclass = ' bg' + if len(phases) > 1: + htmlclass = ' bg' + self.phaseOverlap(phases) if targetphase in self.phases: - self.newAction(targetphase, name, -1, '', start, end, '') - return True + newname = self.newAction(targetphase, name, pid, '', start, end, '', htmlclass, color) + return (targetphase, newname) return False - def newAction(self, phase, name, pid, parent, start, end, drv): + def newAction(self, phase, name, pid, parent, start, end, drv, htmlclass='', color=''): # new device callback for a specific phase self.html_device_id += 1 devid = '%s%d' % (self.idstr, self.html_device_id) @@ -495,8 +979,19 @@ class Data: length = -1.0 if(start >= 0 and end >= 0): length = end - start + if pid == -2: + i = 2 + origname = name + while(name in list): + name = '%s[%d]' % (origname, i) + i += 1 list[name] = {'start': start, 'end': end, 'pid': pid, 'par': parent, 'length': length, 'row': 0, 'id': devid, 'drv': drv } + if htmlclass: + list[name]['htmlclass'] = htmlclass + if color: + list[name]['color'] = color + return name def deviceIDs(self, devlist, phase): idlist = [] list = self.dmesg[phase]['list'] @@ -536,10 +1031,21 @@ class Data: vprint(' %16s: %f - %f (%d devices)' % (phase, \ self.dmesg[phase]['start'], self.dmesg[phase]['end'], dc)) vprint(' test end: %f' % self.end) + def deviceChildrenAllPhases(self, devname): + devlist = [] + for phase in self.phases: + list = self.deviceChildren(devname, phase) + for dev in list: + if dev not in devlist: + devlist.append(dev) + return devlist def masterTopology(self, name, list, depth): node = DeviceNode(name, depth) for cname in list: - clist = self.deviceChildren(cname, 'resume') + # avoid recursions + if name == cname: + continue + clist = self.deviceChildrenAllPhases(cname) cnode = self.masterTopology(cname, clist, depth+1) node.children.append(cnode) return node @@ -580,7 +1086,8 @@ class Data: list = self.dmesg[phase]['list'] for dev in list: pdev = list[dev]['par'] - if(re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): + pid = list[dev]['pid'] + if(pid < 0 or re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): continue if pdev and pdev not in real and pdev not in rootlist: rootlist.append(pdev) @@ -589,22 +1096,33 @@ class Data: rootlist = self.rootDeviceList() master = self.masterTopology('', rootlist, 0) return self.printTopology(master) + def selectTimelineDevices(self, widfmt, tTotal, mindevlen): + # only select devices that will actually show up in html + self.tdevlist = dict() + for phase in self.dmesg: + devlist = [] + list = self.dmesg[phase]['list'] + for dev in list: + length = (list[dev]['end'] - list[dev]['start']) * 1000 + width = widfmt % (((list[dev]['end']-list[dev]['start'])*100)/tTotal) + if width != '0.000000' and length >= mindevlen: + devlist.append(dev) + self.tdevlist[phase] = devlist # Class: TraceEvent # Description: # A container for trace event data found in the ftrace file class TraceEvent: - ready = False - name = '' + text = '' time = 0.0 - color = '#FFFFFF' length = 0.0 - action = '' - def __init__(self, a, n, c, t): - self.action = a - self.name = n - self.color = c + title = '' + row = 0 + def __init__(self, a, n, t, l): + self.title = a + self.text = n self.time = t + self.length = l # Class: FTraceLine # Description: @@ -623,11 +1141,14 @@ class FTraceLine: fcall = False freturn = False fevent = False + fkprobe = False depth = 0 name = '' type = '' - def __init__(self, t, m, d): + def __init__(self, t, m='', d=''): self.time = float(t) + if not m and not d: + return # is this a trace event if(d == 'traceevent' or re.match('^ *\/\* *(?P<msg>.*) \*\/ *$', m)): if(d == 'traceevent'): @@ -644,6 +1165,18 @@ class FTraceLine: self.type = emm.group('call') else: self.name = msg + km = re.match('^(?P<n>.*)_cal$', self.type) + if km: + self.fcall = True + self.fkprobe = True + self.type = km.group('n') + return + km = re.match('^(?P<n>.*)_ret$', self.type) + if km: + self.freturn = True + self.fkprobe = True + self.type = km.group('n') + return self.fevent = True return # convert the duration to seconds @@ -662,7 +1195,7 @@ class FTraceLine: # includes comment with function name match = re.match('^} *\/\* *(?P<n>.*) *\*\/$', m) if(match): - self.name = match.group('n') + self.name = match.group('n').strip() # function call else: self.fcall = True @@ -670,19 +1203,19 @@ class FTraceLine: if(m[-1] == '{'): match = re.match('^(?P<n>.*) *\(.*', m) if(match): - self.name = match.group('n') + self.name = match.group('n').strip() # function call with no children (leaf) elif(m[-1] == ';'): self.freturn = True match = re.match('^(?P<n>.*) *\(.*', m) if(match): - self.name = match.group('n') + self.name = match.group('n').strip() # something else (possibly a trace marker) else: self.name = m def getDepth(self, str): return len(str)/2 - def debugPrint(self, dev): + def debugPrint(self, dev=''): if(self.freturn and self.fcall): print('%s -- %f (%02d): %s(); (%.3f us)' % (dev, self.time, \ self.depth, self.name, self.length*1000000)) @@ -692,6 +1225,33 @@ class FTraceLine: else: print('%s -- %f (%02d): %s() { (%.3f us)' % (dev, self.time, \ self.depth, self.name, self.length*1000000)) + def startMarker(self): + global sysvals + # Is this the starting line of a suspend? + if not self.fevent: + return False + if sysvals.usetracemarkers: + if(self.name == 'SUSPEND START'): + return True + return False + else: + if(self.type == 'suspend_resume' and + re.match('suspend_enter\[.*\] begin', self.name)): + return True + return False + def endMarker(self): + # Is this the ending line of a resume? + if not self.fevent: + return False + if sysvals.usetracemarkers: + if(self.name == 'RESUME COMPLETE'): + return True + return False + else: + if(self.type == 'suspend_resume' and + re.match('thaw_processes\[.*\] end', self.name)): + return True + return False # Class: FTraceCallGraph # Description: @@ -705,54 +1265,124 @@ class FTraceCallGraph: list = [] invalid = False depth = 0 - def __init__(self): + pid = 0 + def __init__(self, pid): self.start = -1.0 self.end = -1.0 self.list = [] self.depth = 0 - def setDepth(self, line): + self.pid = pid + def addLine(self, line, debug=False): + # if this is already invalid, just leave + if(self.invalid): + return False + # invalidate on too much data or bad depth + if(len(self.list) >= 1000000 or self.depth < 0): + self.invalidate(line) + return False + # compare current depth with this lines pre-call depth + prelinedep = line.depth + if(line.freturn and not line.fcall): + prelinedep += 1 + last = 0 + lasttime = line.time + virtualfname = 'execution_misalignment' + if len(self.list) > 0: + last = self.list[-1] + lasttime = last.time + # handle low misalignments by inserting returns + if prelinedep < self.depth: + if debug and last: + print '-------- task %d --------' % self.pid + last.debugPrint() + idx = 0 + # add return calls to get the depth down + while prelinedep < self.depth: + if debug: + print 'MISALIGN LOW (add returns): C%d - eC%d' % (self.depth, prelinedep) + self.depth -= 1 + if idx == 0 and last and last.fcall and not last.freturn: + # special case, turn last call into a leaf + last.depth = self.depth + last.freturn = True + last.length = line.time - last.time + if debug: + last.debugPrint() + else: + vline = FTraceLine(lasttime) + vline.depth = self.depth + vline.name = virtualfname + vline.freturn = True + self.list.append(vline) + if debug: + vline.debugPrint() + idx += 1 + if debug: + line.debugPrint() + print '' + # handle high misalignments by inserting calls + elif prelinedep > self.depth: + if debug and last: + print '-------- task %d --------' % self.pid + last.debugPrint() + idx = 0 + # add calls to get the depth up + while prelinedep > self.depth: + if debug: + print 'MISALIGN HIGH (add calls): C%d - eC%d' % (self.depth, prelinedep) + if idx == 0 and line.freturn and not line.fcall: + # special case, turn this return into a leaf + line.fcall = True + prelinedep -= 1 + else: + vline = FTraceLine(lasttime) + vline.depth = self.depth + vline.name = virtualfname + vline.fcall = True + if debug: + vline.debugPrint() + self.list.append(vline) + self.depth += 1 + if not last: + self.start = vline.time + idx += 1 + if debug: + line.debugPrint() + print '' + # process the call and set the new depth if(line.fcall and not line.freturn): - line.depth = self.depth self.depth += 1 elif(line.freturn and not line.fcall): self.depth -= 1 - line.depth = self.depth - else: - line.depth = self.depth - def addLine(self, line, match): - if(not self.invalid): - self.setDepth(line) + if len(self.list) < 1: + self.start = line.time + self.list.append(line) if(line.depth == 0 and line.freturn): if(self.start < 0): self.start = line.time self.end = line.time - self.list.append(line) + if line.fcall: + self.end += line.length + if self.list[0].name == virtualfname: + self.invalid = True return True - if(self.invalid): - return False - if(len(self.list) >= 1000000 or self.depth < 0): - if(len(self.list) > 0): - first = self.list[0] - self.list = [] - self.list.append(first) - self.invalid = True - if(not match): - return False - id = 'task %s cpu %s' % (match.group('pid'), match.group('cpu')) - window = '(%f - %f)' % (self.start, line.time) - if(self.depth < 0): - print('Too much data for '+id+\ - ' (buffer overflow), ignoring this callback') - else: - print('Too much data for '+id+\ - ' '+window+', ignoring this callback') - return False - self.list.append(line) - if(self.start < 0): - self.start = line.time return False + def invalidate(self, line): + if(len(self.list) > 0): + first = self.list[0] + self.list = [] + self.list.append(first) + self.invalid = True + id = 'task %s' % (self.pid) + window = '(%f - %f)' % (self.start, line.time) + if(self.depth < 0): + vprint('Too much data for '+id+\ + ' (buffer overflow), ignoring this callback') + else: + vprint('Too much data for '+id+\ + ' '+window+', ignoring this callback') def slice(self, t0, tN): - minicg = FTraceCallGraph() + minicg = FTraceCallGraph(0) count = -1 firstdepth = 0 for l in self.list: @@ -764,13 +1394,26 @@ class FTraceCallGraph: firstdepth = l.depth count = 0 l.depth -= firstdepth - minicg.addLine(l, 0) + minicg.addLine(l) if((count == 0 and l.freturn and l.fcall) or (count > 0 and l.depth <= 0)): break count += 1 return minicg - def sanityCheck(self): + def repair(self, enddepth): + # bring the depth back to 0 with additional returns + fixed = False + last = self.list[-1] + for i in reversed(range(enddepth)): + t = FTraceLine(last.time) + t.depth = i + t.freturn = True + fixed = self.addLine(t) + if fixed: + self.end = last.time + return True + return False + def postProcess(self, debug=False): stack = dict() cnt = 0 for l in self.list: @@ -779,98 +1422,317 @@ class FTraceCallGraph: cnt += 1 elif(l.freturn and not l.fcall): if(l.depth not in stack): + if debug: + print 'Post Process Error: Depth missing' + l.debugPrint() return False + # transfer total time from return line to call line stack[l.depth].length = l.length - stack[l.depth] = 0 + stack.pop(l.depth) l.length = 0 cnt -= 1 if(cnt == 0): + # trace caught the whole call tree return True - return False - def debugPrint(self, filename): - if(filename == 'stdout'): - print('[%f - %f]') % (self.start, self.end) - for l in self.list: - if(l.freturn and l.fcall): - print('%f (%02d): %s(); (%.3f us)' % (l.time, \ - l.depth, l.name, l.length*1000000)) - elif(l.freturn): - print('%f (%02d): %s} (%.3f us)' % (l.time, \ - l.depth, l.name, l.length*1000000)) - else: - print('%f (%02d): %s() { (%.3f us)' % (l.time, \ - l.depth, l.name, l.length*1000000)) - print(' ') - else: - fp = open(filename, 'w') - print(filename) - for l in self.list: - if(l.freturn and l.fcall): - fp.write('%f (%02d): %s(); (%.3f us)\n' % (l.time, \ - l.depth, l.name, l.length*1000000)) - elif(l.freturn): - fp.write('%f (%02d): %s} (%.3f us)\n' % (l.time, \ - l.depth, l.name, l.length*1000000)) - else: - fp.write('%f (%02d): %s() { (%.3f us)\n' % (l.time, \ - l.depth, l.name, l.length*1000000)) - fp.close() + elif(cnt < 0): + if debug: + print 'Post Process Error: Depth is less than 0' + return False + # trace ended before call tree finished + return self.repair(cnt) + def deviceMatch(self, pid, data): + found = False + # add the callgraph data to the device hierarchy + borderphase = { + 'dpm_prepare': 'suspend_prepare', + 'dpm_complete': 'resume_complete' + } + if(self.list[0].name in borderphase): + p = borderphase[self.list[0].name] + list = data.dmesg[p]['list'] + for devname in list: + dev = list[devname] + if(pid == dev['pid'] and + self.start <= dev['start'] and + self.end >= dev['end']): + dev['ftrace'] = self.slice(dev['start'], dev['end']) + found = True + return found + for p in data.phases: + if(data.dmesg[p]['start'] <= self.start and + self.start <= data.dmesg[p]['end']): + list = data.dmesg[p]['list'] + for devname in list: + dev = list[devname] + if(pid == dev['pid'] and + self.start <= dev['start'] and + self.end >= dev['end']): + dev['ftrace'] = self + found = True + break + break + return found + def newActionFromFunction(self, data): + name = self.list[0].name + if name in ['dpm_run_callback', 'dpm_prepare', 'dpm_complete']: + return + fs = self.start + fe = self.end + if fs < data.start or fe > data.end: + return + phase = '' + for p in data.phases: + if(data.dmesg[p]['start'] <= self.start and + self.start < data.dmesg[p]['end']): + phase = p + break + if not phase: + return + out = data.newActionGlobal(name, fs, fe, -2) + if out: + phase, myname = out + data.dmesg[phase]['list'][myname]['ftrace'] = self + def debugPrint(self): + print('[%f - %f] %s (%d)') % (self.start, self.end, self.list[0].name, self.pid) + for l in self.list: + if(l.freturn and l.fcall): + print('%f (%02d): %s(); (%.3f us)' % (l.time, \ + l.depth, l.name, l.length*1000000)) + elif(l.freturn): + print('%f (%02d): %s} (%.3f us)' % (l.time, \ + l.depth, l.name, l.length*1000000)) + else: + print('%f (%02d): %s() { (%.3f us)' % (l.time, \ + l.depth, l.name, l.length*1000000)) + print(' ') # Class: Timeline # Description: -# A container for a suspend/resume html timeline. In older versions -# of the script there were multiple timelines, but in the latest -# there is only one. +# A container for a device timeline which calculates +# all the html properties to display it correctly class Timeline: html = {} - scaleH = 0.0 # height of the row as a percent of the timeline height - rowH = 0.0 # height of each row in percent of the timeline height - row_height_pixels = 30 - maxrows = 0 - height = 0 - def __init__(self): + height = 0 # total timeline height + scaleH = 20 # timescale (top) row height + rowH = 30 # device row height + bodyH = 0 # body height + rows = 0 # total timeline rows + phases = [] + rowmaxlines = dict() + rowcount = dict() + rowheight = dict() + def __init__(self, rowheight): + self.rowH = rowheight self.html = { + 'header': '', 'timeline': '', 'legend': '', - 'scale': '' } - def setRows(self, rows): - self.maxrows = int(rows) - self.scaleH = 100.0/float(self.maxrows) - self.height = self.maxrows*self.row_height_pixels - r = float(self.maxrows - 1) - if(r < 1.0): - r = 1.0 - self.rowH = (100.0 - self.scaleH)/r + # Function: getDeviceRows + # Description: + # determine how may rows the device funcs will take + # Arguments: + # rawlist: the list of devices/actions for a single phase + # Output: + # The total number of rows needed to display this phase of the timeline + def getDeviceRows(self, rawlist): + # clear all rows and set them to undefined + lendict = dict() + for item in rawlist: + item.row = -1 + lendict[item] = item.length + list = [] + for i in sorted(lendict, key=lendict.get, reverse=True): + list.append(i) + remaining = len(list) + rowdata = dict() + row = 1 + # try to pack each row with as many ranges as possible + while(remaining > 0): + if(row not in rowdata): + rowdata[row] = [] + for i in list: + if(i.row >= 0): + continue + s = i.time + e = i.time + i.length + valid = True + for ritem in rowdata[row]: + rs = ritem.time + re = ritem.time + ritem.length + if(not (((s <= rs) and (e <= rs)) or + ((s >= re) and (e >= re)))): + valid = False + break + if(valid): + rowdata[row].append(i) + i.row = row + remaining -= 1 + row += 1 + return row + # Function: getPhaseRows + # Description: + # Organize the timeline entries into the smallest + # number of rows possible, with no entry overlapping + # Arguments: + # list: the list of devices/actions for a single phase + # devlist: string list of device names to use + # Output: + # The total number of rows needed to display this phase of the timeline + def getPhaseRows(self, dmesg, devlist): + # clear all rows and set them to undefined + remaining = len(devlist) + rowdata = dict() + row = 0 + lendict = dict() + myphases = [] + for item in devlist: + if item[0] not in self.phases: + self.phases.append(item[0]) + if item[0] not in myphases: + myphases.append(item[0]) + self.rowmaxlines[item[0]] = dict() + self.rowheight[item[0]] = dict() + dev = dmesg[item[0]]['list'][item[1]] + dev['row'] = -1 + lendict[item] = float(dev['end']) - float(dev['start']) + if 'src' in dev: + dev['devrows'] = self.getDeviceRows(dev['src']) + lenlist = [] + for i in sorted(lendict, key=lendict.get, reverse=True): + lenlist.append(i) + orderedlist = [] + for item in lenlist: + dev = dmesg[item[0]]['list'][item[1]] + if dev['pid'] == -2: + orderedlist.append(item) + for item in lenlist: + if item not in orderedlist: + orderedlist.append(item) + # try to pack each row with as many ranges as possible + while(remaining > 0): + rowheight = 1 + if(row not in rowdata): + rowdata[row] = [] + for item in orderedlist: + dev = dmesg[item[0]]['list'][item[1]] + if(dev['row'] < 0): + s = dev['start'] + e = dev['end'] + valid = True + for ritem in rowdata[row]: + rs = ritem['start'] + re = ritem['end'] + if(not (((s <= rs) and (e <= rs)) or + ((s >= re) and (e >= re)))): + valid = False + break + if(valid): + rowdata[row].append(dev) + dev['row'] = row + remaining -= 1 + if 'devrows' in dev and dev['devrows'] > rowheight: + rowheight = dev['devrows'] + for phase in myphases: + self.rowmaxlines[phase][row] = rowheight + self.rowheight[phase][row] = rowheight * self.rowH + row += 1 + if(row > self.rows): + self.rows = int(row) + for phase in myphases: + self.rowcount[phase] = row + return row + def phaseRowHeight(self, phase, row): + return self.rowheight[phase][row] + def phaseRowTop(self, phase, row): + top = 0 + for i in sorted(self.rowheight[phase]): + if i >= row: + break + top += self.rowheight[phase][i] + return top + # Function: calcTotalRows + # Description: + # Calculate the heights and offsets for the header and rows + def calcTotalRows(self): + maxrows = 0 + standardphases = [] + for phase in self.phases: + total = 0 + for i in sorted(self.rowmaxlines[phase]): + total += self.rowmaxlines[phase][i] + if total > maxrows: + maxrows = total + if total == self.rowcount[phase]: + standardphases.append(phase) + self.height = self.scaleH + (maxrows*self.rowH) + self.bodyH = self.height - self.scaleH + for phase in standardphases: + for i in sorted(self.rowheight[phase]): + self.rowheight[phase][i] = self.bodyH/self.rowcount[phase] + # Function: createTimeScale + # Description: + # Create the timescale for a timeline block + # Arguments: + # m0: start time (mode begin) + # mMax: end time (mode end) + # tTotal: total timeline time + # mode: suspend or resume + # Output: + # The html code needed to display the time scale + def createTimeScale(self, m0, mMax, tTotal, mode): + timescale = '<div class="t" style="right:{0}%">{1}</div>\n' + rline = '<div class="t" style="left:0;border-left:1px solid black;border-right:0;">Resume</div>\n' + output = '<div class="timescale">\n' + # set scale for timeline + mTotal = mMax - m0 + tS = 0.1 + if(tTotal <= 0): + return output+'</div>\n' + if(tTotal > 4): + tS = 1 + divTotal = int(mTotal/tS) + 1 + divEdge = (mTotal - tS*(divTotal-1))*100/mTotal + for i in range(divTotal): + htmlline = '' + if(mode == 'resume'): + pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal)) + val = '%0.fms' % (float(i)*tS*1000) + htmlline = timescale.format(pos, val) + if(i == 0): + htmlline = rline + else: + pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal) - divEdge) + val = '%0.fms' % (float(i-divTotal+1)*tS*1000) + if(i == divTotal - 1): + val = 'Suspend' + htmlline = timescale.format(pos, val) + output += htmlline + output += '</div>\n' + return output -# Class: TestRun +# Class: TestProps # Description: -# A container for a suspend/resume test run. This is necessary as -# there could be more than one, and they need to be separate. -class TestRun: +# A list of values describing the properties of these test runs +class TestProps: + stamp = '' + tracertype = '' + S0i3 = False + fwdata = [] ftrace_line_fmt_fg = \ '^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\ ' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\ - '[ +!]*(?P<dur>[0-9\.]*) .*\| (?P<msg>.*)' + '[ +!#\*@$]*(?P<dur>[0-9\.]*) .*\| (?P<msg>.*)' ftrace_line_fmt_nop = \ ' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\ '(?P<flags>.{4}) *(?P<time>[0-9\.]*): *'+\ '(?P<msg>.*)' ftrace_line_fmt = ftrace_line_fmt_nop cgformat = False - ftemp = dict() - ttemp = dict() - inthepipe = False - tracertype = '' data = 0 - def __init__(self, dataobj): - self.data = dataobj - self.ftemp = dict() - self.ttemp = dict() - def isReady(self): - if(tracertype == '' or not data): - return False - return True + ktemp = dict() + def __init__(self): + self.ktemp = dict() def setTracerType(self, tracer): self.tracertype = tracer if(tracer == 'function_graph'): @@ -881,6 +1743,19 @@ class TestRun: else: doError('Invalid tracer format: [%s]' % tracer, False) +# Class: TestRun +# Description: +# A container for a suspend/resume test run. This is necessary as +# there could be more than one, and they need to be separate. +class TestRun: + ftemp = dict() + ttemp = dict() + data = 0 + def __init__(self, dataobj): + self.data = dataobj + self.ftemp = dict() + self.ttemp = dict() + # ----------------- FUNCTIONS -------------------- # Function: vprint @@ -893,104 +1768,16 @@ def vprint(msg): if(sysvals.verbose): print(msg) -# Function: initFtrace -# Description: -# Configure ftrace to use trace events and/or a callgraph -def initFtrace(): - global sysvals - - tp = sysvals.tpath - cf = 'dpm_run_callback' - if(sysvals.usetraceeventsonly): - cf = '-e dpm_prepare -e dpm_complete -e dpm_run_callback' - if(sysvals.usecallgraph or sysvals.usetraceevents): - print('INITIALIZING FTRACE...') - # turn trace off - os.system('echo 0 > '+tp+'tracing_on') - # set the trace clock to global - os.system('echo global > '+tp+'trace_clock') - # set trace buffer to a huge value - os.system('echo nop > '+tp+'current_tracer') - os.system('echo 100000 > '+tp+'buffer_size_kb') - # initialize the callgraph trace, unless this is an x2 run - if(sysvals.usecallgraph and sysvals.execcount == 1): - # set trace type - os.system('echo function_graph > '+tp+'current_tracer') - os.system('echo "" > '+tp+'set_ftrace_filter') - # set trace format options - os.system('echo funcgraph-abstime > '+tp+'trace_options') - os.system('echo funcgraph-proc > '+tp+'trace_options') - # focus only on device suspend and resume - os.system('cat '+tp+'available_filter_functions | grep '+\ - cf+' > '+tp+'set_graph_function') - if(sysvals.usetraceevents): - # turn trace events on - events = iter(sysvals.traceevents) - for e in events: - os.system('echo 1 > '+sysvals.epath+e+'/enable') - # clear the trace buffer - os.system('echo "" > '+tp+'trace') - -# Function: initFtraceAndroid -# Description: -# Configure ftrace to capture trace events -def initFtraceAndroid(): - global sysvals - - tp = sysvals.tpath - if(sysvals.usetraceevents): - print('INITIALIZING FTRACE...') - # turn trace off - os.system(sysvals.adb+" shell 'echo 0 > "+tp+"tracing_on'") - # set the trace clock to global - os.system(sysvals.adb+" shell 'echo global > "+tp+"trace_clock'") - # set trace buffer to a huge value - os.system(sysvals.adb+" shell 'echo nop > "+tp+"current_tracer'") - os.system(sysvals.adb+" shell 'echo 10000 > "+tp+"buffer_size_kb'") - # turn trace events on - events = iter(sysvals.traceevents) - for e in events: - os.system(sysvals.adb+" shell 'echo 1 > "+\ - sysvals.epath+e+"/enable'") - # clear the trace buffer - os.system(sysvals.adb+" shell 'echo \"\" > "+tp+"trace'") - -# Function: verifyFtrace -# Description: -# Check that ftrace is working on the system -# Output: -# True or False -def verifyFtrace(): - global sysvals - # files needed for any trace data - files = ['buffer_size_kb', 'current_tracer', 'trace', 'trace_clock', - 'trace_marker', 'trace_options', 'tracing_on'] - # files needed for callgraph trace data - tp = sysvals.tpath - if(sysvals.usecallgraph): - files += [ - 'available_filter_functions', - 'set_ftrace_filter', - 'set_graph_function' - ] - for f in files: - if(sysvals.android): - out = os.popen(sysvals.adb+' shell ls '+tp+f).read().strip() - if(out != tp+f): - return False - else: - if(os.path.exists(tp+f) == False): - return False - return True - # Function: parseStamp # Description: # Pull in the stamp comment line from the data file(s), # create the stamp, and add it to the global sysvals object # Arguments: # m: the valid re.match output for the stamp line -def parseStamp(m, data): +def parseStamp(line, data): global sysvals + + m = re.match(sysvals.stampfmt, line) data.stamp = {'time': '', 'host': '', 'mode': ''} dt = datetime(int(m.group('y'))+2000, int(m.group('m')), int(m.group('d')), int(m.group('H')), int(m.group('M')), @@ -999,6 +1786,7 @@ def parseStamp(m, data): data.stamp['host'] = m.group('host') data.stamp['mode'] = m.group('mode') data.stamp['kernel'] = m.group('kernel') + sysvals.hostname = data.stamp['host'] sysvals.suspendmode = data.stamp['mode'] if not sysvals.stamp: sysvals.stamp = data.stamp @@ -1031,14 +1819,35 @@ def diffStamp(stamp1, stamp2): def doesTraceLogHaveTraceEvents(): global sysvals + # check for kprobes + sysvals.usekprobes = False + out = os.system('grep -q "_cal: (" '+sysvals.ftracefile) + if(out == 0): + sysvals.usekprobes = True + # check for callgraph data on trace event blocks + out = os.system('grep -q "_cpu_down()" '+sysvals.ftracefile) + if(out == 0): + sysvals.usekprobes = True + out = os.popen('head -1 '+sysvals.ftracefile).read().replace('\n', '') + m = re.match(sysvals.stampfmt, out) + if m and m.group('mode') == 'command': + sysvals.usetraceeventsonly = True + sysvals.usetraceevents = True + return + # figure out what level of trace events are supported sysvals.usetraceeventsonly = True sysvals.usetraceevents = False for e in sysvals.traceevents: - out = os.popen('cat '+sysvals.ftracefile+' | grep "'+e+': "').read() - if(not out): + out = os.system('grep -q "'+e+': " '+sysvals.ftracefile) + if(out != 0): sysvals.usetraceeventsonly = False - if(e == 'suspend_resume' and out): + if(e == 'suspend_resume' and out == 0): sysvals.usetraceevents = True + # determine is this log is properly formatted + for e in ['SUSPEND START', 'RESUME COMPLETE']: + out = os.system('grep -q "'+e+'" '+sysvals.ftracefile) + if(out != 0): + sysvals.usetracemarkers = False # Function: appendIncompleteTraceLog # Description: @@ -1055,44 +1864,42 @@ def appendIncompleteTraceLog(testruns): # create TestRun vessels for ftrace parsing testcnt = len(testruns) - testidx = -1 + testidx = 0 testrun = [] for data in testruns: testrun.append(TestRun(data)) # extract the callgraph and traceevent data vprint('Analyzing the ftrace data...') + tp = TestProps() tf = open(sysvals.ftracefile, 'r') + data = 0 for line in tf: # remove any latent carriage returns line = line.replace('\r\n', '') - # grab the time stamp first (signifies the start of the test run) + # grab the time stamp m = re.match(sysvals.stampfmt, line) if(m): - testidx += 1 - parseStamp(m, testrun[testidx].data) - continue - # pull out any firmware data - if(re.match(sysvals.firmwarefmt, line)): - continue - # if we havent found a test time stamp yet keep spinning til we do - if(testidx < 0): + tp.stamp = line continue # determine the trace data type (required for further parsing) m = re.match(sysvals.tracertypefmt, line) if(m): - tracer = m.group('t') - testrun[testidx].setTracerType(tracer) + tp.setTracerType(m.group('t')) + continue + # device properties line + if(re.match(sysvals.devpropfmt, line)): + devProps(line) continue - # parse only valid lines, if this isnt one move on - m = re.match(testrun[testidx].ftrace_line_fmt, line) + # parse only valid lines, if this is not one move on + m = re.match(tp.ftrace_line_fmt, line) if(not m): continue # gather the basic message data from the line m_time = m.group('time') m_pid = m.group('pid') m_msg = m.group('msg') - if(testrun[testidx].cgformat): + if(tp.cgformat): m_param3 = m.group('dur') else: m_param3 = 'traceevent' @@ -1104,119 +1911,114 @@ def appendIncompleteTraceLog(testruns): # the line should be a call, return, or event if(not t.fcall and not t.freturn and not t.fevent): continue - # only parse the ftrace data during suspend/resume - data = testrun[testidx].data - if(not testrun[testidx].inthepipe): - # look for the suspend start marker - if(t.fevent): - if(t.name == 'SUSPEND START'): - testrun[testidx].inthepipe = True - data.setStart(t.time) + # look for the suspend start marker + if(t.startMarker()): + data = testrun[testidx].data + parseStamp(tp.stamp, data) + data.setStart(t.time) + continue + if(not data): + continue + # find the end of resume + if(t.endMarker()): + data.setEnd(t.time) + testidx += 1 + if(testidx >= testcnt): + break + continue + # trace event processing + if(t.fevent): + # general trace events have two types, begin and end + if(re.match('(?P<name>.*) begin$', t.name)): + isbegin = True + elif(re.match('(?P<name>.*) end$', t.name)): + isbegin = False + else: continue - else: - # trace event processing - if(t.fevent): - if(t.name == 'RESUME COMPLETE'): - testrun[testidx].inthepipe = False - data.setEnd(t.time) - if(testidx == testcnt - 1): - break - continue - # general trace events have two types, begin and end - if(re.match('(?P<name>.*) begin$', t.name)): - isbegin = True - elif(re.match('(?P<name>.*) end$', t.name)): - isbegin = False + m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name) + if(m): + val = m.group('val') + if val == '0': + name = m.group('name') else: - continue - m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name) - if(m): - val = m.group('val') - if val == '0': - name = m.group('name') - else: - name = m.group('name')+'['+val+']' + name = m.group('name')+'['+val+']' + else: + m = re.match('(?P<name>.*) .*', t.name) + name = m.group('name') + # special processing for trace events + if re.match('dpm_prepare\[.*', name): + continue + elif re.match('machine_suspend.*', name): + continue + elif re.match('suspend_enter\[.*', name): + if(not isbegin): + data.dmesg['suspend_prepare']['end'] = t.time + continue + elif re.match('dpm_suspend\[.*', name): + if(not isbegin): + data.dmesg['suspend']['end'] = t.time + continue + elif re.match('dpm_suspend_late\[.*', name): + if(isbegin): + data.dmesg['suspend_late']['start'] = t.time else: - m = re.match('(?P<name>.*) .*', t.name) - name = m.group('name') - # special processing for trace events - if re.match('dpm_prepare\[.*', name): - continue - elif re.match('machine_suspend.*', name): - continue - elif re.match('suspend_enter\[.*', name): - if(not isbegin): - data.dmesg['suspend_prepare']['end'] = t.time - continue - elif re.match('dpm_suspend\[.*', name): - if(not isbegin): - data.dmesg['suspend']['end'] = t.time - continue - elif re.match('dpm_suspend_late\[.*', name): - if(isbegin): - data.dmesg['suspend_late']['start'] = t.time - else: - data.dmesg['suspend_late']['end'] = t.time - continue - elif re.match('dpm_suspend_noirq\[.*', name): - if(isbegin): - data.dmesg['suspend_noirq']['start'] = t.time - else: - data.dmesg['suspend_noirq']['end'] = t.time - continue - elif re.match('dpm_resume_noirq\[.*', name): - if(isbegin): - data.dmesg['resume_machine']['end'] = t.time - data.dmesg['resume_noirq']['start'] = t.time - else: - data.dmesg['resume_noirq']['end'] = t.time - continue - elif re.match('dpm_resume_early\[.*', name): - if(isbegin): - data.dmesg['resume_early']['start'] = t.time - else: - data.dmesg['resume_early']['end'] = t.time - continue - elif re.match('dpm_resume\[.*', name): - if(isbegin): - data.dmesg['resume']['start'] = t.time - else: - data.dmesg['resume']['end'] = t.time - continue - elif re.match('dpm_complete\[.*', name): - if(isbegin): - data.dmesg['resume_complete']['start'] = t.time - else: - data.dmesg['resume_complete']['end'] = t.time - continue - # is this trace event outside of the devices calls - if(data.isTraceEventOutsideDeviceCalls(pid, t.time)): - # global events (outside device calls) are simply graphed - if(isbegin): - # store each trace event in ttemp - if(name not in testrun[testidx].ttemp): - testrun[testidx].ttemp[name] = [] - testrun[testidx].ttemp[name].append(\ - {'begin': t.time, 'end': t.time}) - else: - # finish off matching trace event in ttemp - if(name in testrun[testidx].ttemp): - testrun[testidx].ttemp[name][-1]['end'] = t.time + data.dmesg['suspend_late']['end'] = t.time + continue + elif re.match('dpm_suspend_noirq\[.*', name): + if(isbegin): + data.dmesg['suspend_noirq']['start'] = t.time else: - if(isbegin): - data.addIntraDevTraceEvent('', name, pid, t.time) - else: - data.capIntraDevTraceEvent('', name, pid, t.time) - # call/return processing - elif sysvals.usecallgraph: - # create a callgraph object for the data - if(pid not in testrun[testidx].ftemp): - testrun[testidx].ftemp[pid] = [] - testrun[testidx].ftemp[pid].append(FTraceCallGraph()) - # when the call is finished, see which device matches it - cg = testrun[testidx].ftemp[pid][-1] - if(cg.addLine(t, m)): - testrun[testidx].ftemp[pid].append(FTraceCallGraph()) + data.dmesg['suspend_noirq']['end'] = t.time + continue + elif re.match('dpm_resume_noirq\[.*', name): + if(isbegin): + data.dmesg['resume_machine']['end'] = t.time + data.dmesg['resume_noirq']['start'] = t.time + else: + data.dmesg['resume_noirq']['end'] = t.time + continue + elif re.match('dpm_resume_early\[.*', name): + if(isbegin): + data.dmesg['resume_early']['start'] = t.time + else: + data.dmesg['resume_early']['end'] = t.time + continue + elif re.match('dpm_resume\[.*', name): + if(isbegin): + data.dmesg['resume']['start'] = t.time + else: + data.dmesg['resume']['end'] = t.time + continue + elif re.match('dpm_complete\[.*', name): + if(isbegin): + data.dmesg['resume_complete']['start'] = t.time + else: + data.dmesg['resume_complete']['end'] = t.time + continue + # skip trace events inside devices calls + if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)): + continue + # global events (outside device calls) are simply graphed + if(isbegin): + # store each trace event in ttemp + if(name not in testrun[testidx].ttemp): + testrun[testidx].ttemp[name] = [] + testrun[testidx].ttemp[name].append(\ + {'begin': t.time, 'end': t.time}) + else: + # finish off matching trace event in ttemp + if(name in testrun[testidx].ttemp): + testrun[testidx].ttemp[name][-1]['end'] = t.time + # call/return processing + elif sysvals.usecallgraph: + # create a callgraph object for the data + if(pid not in testrun[testidx].ftemp): + testrun[testidx].ftemp[pid] = [] + testrun[testidx].ftemp[pid].append(FTraceCallGraph(pid)) + # when the call is finished, see which device matches it + cg = testrun[testidx].ftemp[pid][-1] + if(cg.addLine(t)): + testrun[testidx].ftemp[pid].append(FTraceCallGraph(pid)) tf.close() for test in testrun: @@ -1224,20 +2026,14 @@ def appendIncompleteTraceLog(testruns): if(sysvals.usetraceevents): for name in test.ttemp: for event in test.ttemp[name]: - begin = event['begin'] - end = event['end'] - # if event starts before timeline start, expand timeline - if(begin < test.data.start): - test.data.setStart(begin) - # if event ends after timeline end, expand the timeline - if(end > test.data.end): - test.data.setEnd(end) - test.data.newActionGlobal(name, begin, end) + test.data.newActionGlobal(name, event['begin'], event['end']) # add the callgraph data to the device hierarchy for pid in test.ftemp: for cg in test.ftemp[pid]: - if(not cg.sanityCheck()): + if len(cg.list) < 1 or cg.invalid: + continue + if(not cg.postProcess()): id = 'task %s cpu %s' % (pid, m.group('cpu')) vprint('Sanity check failed for '+\ id+', ignoring this callback') @@ -1259,14 +2055,6 @@ def appendIncompleteTraceLog(testruns): if(sysvals.verbose): test.data.printDetails() - - # add the time in between the tests as a new phase so we can see it - if(len(testruns) > 1): - t1e = testruns[0].getEnd() - t2s = testruns[-1].getStart() - testruns[-1].newPhaseWithSingleAction('user mode', \ - 'user mode', t1e, t2s, '#FF9966') - # Function: parseTraceLog # Description: # Analyze an ftrace log output file generated from this app during @@ -1280,9 +2068,16 @@ def parseTraceLog(): vprint('Analyzing the ftrace data...') if(os.path.exists(sysvals.ftracefile) == False): - doError('%s doesnt exist' % sysvals.ftracefile, False) + doError('%s does not exist' % sysvals.ftracefile, False) + + sysvals.setupAllKprobes() + tracewatch = ['suspend_enter'] + if sysvals.usekprobes: + tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend', + 'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON', 'CPU_OFF'] # extract the callgraph and traceevent data + tp = TestProps() testruns = [] testdata = [] testrun = 0 @@ -1295,27 +2090,17 @@ def parseTraceLog(): # stamp line: each stamp means a new test run m = re.match(sysvals.stampfmt, line) if(m): - data = Data(len(testdata)) - testdata.append(data) - testrun = TestRun(data) - testruns.append(testrun) - parseStamp(m, data) - continue - if(not data): + tp.stamp = line continue # firmware line: pull out any firmware data m = re.match(sysvals.firmwarefmt, line) if(m): - data.fwSuspend = int(m.group('s')) - data.fwResume = int(m.group('r')) - if(data.fwSuspend > 0 or data.fwResume > 0): - data.fwValid = True + tp.fwdata.append((int(m.group('s')), int(m.group('r')))) continue # tracer type line: determine the trace data type m = re.match(sysvals.tracertypefmt, line) if(m): - tracer = m.group('t') - testrun.setTracerType(tracer) + tp.setTracerType(m.group('t')) continue # post resume time line: did this test run include post-resume data m = re.match(sysvals.postresumefmt, line) @@ -1324,15 +2109,20 @@ def parseTraceLog(): if(t > 0): sysvals.postresumetime = t continue + # device properties line + if(re.match(sysvals.devpropfmt, line)): + devProps(line) + continue # ftrace line: parse only valid lines - m = re.match(testrun.ftrace_line_fmt, line) + m = re.match(tp.ftrace_line_fmt, line) if(not m): continue # gather the basic message data from the line m_time = m.group('time') + m_proc = m.group('proc') m_pid = m.group('pid') m_msg = m.group('msg') - if(testrun.cgformat): + if(tp.cgformat): m_param3 = m.group('dur') else: m_param3 = 'traceevent' @@ -1344,24 +2134,38 @@ def parseTraceLog(): # the line should be a call, return, or event if(not t.fcall and not t.freturn and not t.fevent): continue - # only parse the ftrace data during suspend/resume - if(not testrun.inthepipe): - # look for the suspend start marker - if(t.fevent): - if(t.name == 'SUSPEND START'): - testrun.inthepipe = True - data.setStart(t.time) + # find the start of suspend + if(t.startMarker()): + phase = 'suspend_prepare' + data = Data(len(testdata)) + testdata.append(data) + testrun = TestRun(data) + testruns.append(testrun) + parseStamp(tp.stamp, data) + if len(tp.fwdata) > data.testnumber: + data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber] + if(data.fwSuspend > 0 or data.fwResume > 0): + data.fwValid = True + data.setStart(t.time) + continue + if(not data): + continue + # find the end of resume + if(t.endMarker()): + if(sysvals.usetracemarkers and sysvals.postresumetime > 0): + phase = 'post_resume' + data.newPhase(phase, t.time, t.time, '#F0F0F0', -1) + data.setEnd(t.time) + if(not sysvals.usetracemarkers): + # no trace markers? then quit and be sure to finish recording + # the event we used to trigger resume end + if(len(testrun.ttemp['thaw_processes']) > 0): + # if an entry exists, assume this is its end + testrun.ttemp['thaw_processes'][-1]['end'] = t.time + break continue # trace event processing if(t.fevent): - if(t.name == 'RESUME COMPLETE'): - if(sysvals.postresumetime > 0): - phase = 'post_resume' - data.newPhase(phase, t.time, t.time, '#FF9966', -1) - else: - testrun.inthepipe = False - data.setEnd(t.time) - continue if(phase == 'post_resume'): data.setEnd(t.time) if(t.type == 'suspend_resume'): @@ -1383,8 +2187,7 @@ def parseTraceLog(): m = re.match('(?P<name>.*) .*', t.name) name = m.group('name') # ignore these events - if(re.match('acpi_suspend\[.*', t.name) or - re.match('suspend_enter\[.*', name)): + if(name.split('[')[0] in tracewatch): continue # -- phase changes -- # suspend_prepare start @@ -1418,7 +2221,7 @@ def parseTraceLog(): data.dmesg[phase]['end'] = t.time data.tSuspended = t.time else: - if(sysvals.suspendmode in ['mem', 'disk']): + if(sysvals.suspendmode in ['mem', 'disk'] and not tp.S0i3): data.dmesg['suspend_machine']['end'] = t.time data.tSuspended = t.time phase = 'resume_machine' @@ -1426,6 +2229,15 @@ def parseTraceLog(): data.tResumed = t.time data.tLow = data.tResumed - data.tSuspended continue + # acpi_suspend + elif(re.match('acpi_suspend\[.*', t.name)): + # acpi_suspend[0] S0i3 + if(re.match('acpi_suspend\[0\] begin', t.name)): + if(sysvals.suspendmode == 'mem'): + tp.S0i3 = True + data.dmesg['suspend_machine']['end'] = t.time + data.tSuspended = t.time + continue # resume_noirq start elif(re.match('dpm_resume_noirq\[.*', t.name)): phase = 'resume_noirq' @@ -1449,30 +2261,25 @@ def parseTraceLog(): if(isbegin): data.dmesg[phase]['start'] = t.time continue - - # is this trace event outside of the devices calls - if(data.isTraceEventOutsideDeviceCalls(pid, t.time)): - # global events (outside device calls) are simply graphed - if(name not in testrun.ttemp): - testrun.ttemp[name] = [] - if(isbegin): - # create a new list entry - testrun.ttemp[name].append(\ - {'begin': t.time, 'end': t.time}) - else: - if(len(testrun.ttemp[name]) > 0): - # if an antry exists, assume this is its end - testrun.ttemp[name][-1]['end'] = t.time - elif(phase == 'post_resume'): - # post resume events can just have ends - testrun.ttemp[name].append({ - 'begin': data.dmesg[phase]['start'], - 'end': t.time}) + # skip trace events inside devices calls + if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)): + continue + # global events (outside device calls) are graphed + if(name not in testrun.ttemp): + testrun.ttemp[name] = [] + if(isbegin): + # create a new list entry + testrun.ttemp[name].append(\ + {'begin': t.time, 'end': t.time, 'pid': pid}) else: - if(isbegin): - data.addIntraDevTraceEvent('', name, pid, t.time) - else: - data.capIntraDevTraceEvent('', name, pid, t.time) + if(len(testrun.ttemp[name]) > 0): + # if an entry exists, assume this is its end + testrun.ttemp[name][-1]['end'] = t.time + elif(phase == 'post_resume'): + # post resume events can just have ends + testrun.ttemp[name].append({ + 'begin': data.dmesg[phase]['start'], + 'end': t.time}) # device callback start elif(t.type == 'device_pm_callback_start'): m = re.match('(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\ @@ -1495,75 +2302,127 @@ def parseTraceLog(): dev = list[n] dev['length'] = t.time - dev['start'] dev['end'] = t.time + # kprobe event processing + elif(t.fkprobe): + kprobename = t.type + kprobedata = t.name + key = (kprobename, pid) + # displayname is generated from kprobe data + displayname = '' + if(t.fcall): + displayname = sysvals.kprobeDisplayName(kprobename, kprobedata) + if not displayname: + continue + if(key not in tp.ktemp): + tp.ktemp[key] = [] + tp.ktemp[key].append({ + 'pid': pid, + 'begin': t.time, + 'end': t.time, + 'name': displayname, + 'cdata': kprobedata, + 'proc': m_proc, + }) + elif(t.freturn): + if(key not in tp.ktemp) or len(tp.ktemp[key]) < 1: + continue + e = tp.ktemp[key][-1] + if e['begin'] < 0.0 or t.time - e['begin'] < 0.000001: + tp.ktemp[key].pop() + else: + e['end'] = t.time + e['rdata'] = kprobedata # callgraph processing elif sysvals.usecallgraph: - # this shouldn't happen, but JIC, ignore callgraph data post-res - if(phase == 'post_resume'): - continue # create a callgraph object for the data - if(pid not in testrun.ftemp): - testrun.ftemp[pid] = [] - testrun.ftemp[pid].append(FTraceCallGraph()) + key = (m_proc, pid) + if(key not in testrun.ftemp): + testrun.ftemp[key] = [] + testrun.ftemp[key].append(FTraceCallGraph(pid)) # when the call is finished, see which device matches it - cg = testrun.ftemp[pid][-1] - if(cg.addLine(t, m)): - testrun.ftemp[pid].append(FTraceCallGraph()) + cg = testrun.ftemp[key][-1] + if(cg.addLine(t)): + testrun.ftemp[key].append(FTraceCallGraph(pid)) tf.close() + if sysvals.suspendmode == 'command': + for test in testruns: + for p in test.data.phases: + if p == 'resume_complete': + test.data.dmesg[p]['start'] = test.data.start + test.data.dmesg[p]['end'] = test.data.end + else: + test.data.dmesg[p]['start'] = test.data.start + test.data.dmesg[p]['end'] = test.data.start + test.data.tSuspended = test.data.start + test.data.tResumed = test.data.start + test.data.tLow = 0 + test.data.fwValid = False + for test in testruns: # add the traceevent data to the device hierarchy if(sysvals.usetraceevents): + # add actual trace funcs for name in test.ttemp: for event in test.ttemp[name]: - begin = event['begin'] - end = event['end'] - # if event starts before timeline start, expand timeline - if(begin < test.data.start): - test.data.setStart(begin) - # if event ends after timeline end, expand the timeline - if(end > test.data.end): - test.data.setEnd(end) - test.data.newActionGlobal(name, begin, end) - - # add the callgraph data to the device hierarchy - borderphase = { - 'dpm_prepare': 'suspend_prepare', - 'dpm_complete': 'resume_complete' - } - for pid in test.ftemp: - for cg in test.ftemp[pid]: - if len(cg.list) < 2: + test.data.newActionGlobal(name, event['begin'], event['end'], event['pid']) + # add the kprobe based virtual tracefuncs as actual devices + for key in tp.ktemp: + name, pid = key + if name not in sysvals.tracefuncs: continue - if(not cg.sanityCheck()): - id = 'task %s cpu %s' % (pid, m.group('cpu')) - vprint('Sanity check failed for '+\ - id+', ignoring this callback') + for e in tp.ktemp[key]: + kb, ke = e['begin'], e['end'] + if kb == ke or not test.data.isInsideTimeline(kb, ke): + continue + test.data.newActionGlobal(e['name'], kb, ke, pid) + # add config base kprobes and dev kprobes + for key in tp.ktemp: + name, pid = key + if name in sysvals.tracefuncs: continue - callstart = cg.start - callend = cg.end - if(cg.list[0].name in borderphase): - p = borderphase[cg.list[0].name] - list = test.data.dmesg[p]['list'] - for devname in list: - dev = list[devname] - if(pid == dev['pid'] and - callstart <= dev['start'] and - callend >= dev['end']): - dev['ftrace'] = cg.slice(dev['start'], dev['end']) - continue - if(cg.list[0].name != 'dpm_run_callback'): - continue - for p in test.data.phases: - if(test.data.dmesg[p]['start'] <= callstart and - callstart <= test.data.dmesg[p]['end']): - list = test.data.dmesg[p]['list'] - for devname in list: - dev = list[devname] - if(pid == dev['pid'] and - callstart <= dev['start'] and - callend >= dev['end']): - dev['ftrace'] = cg - break + for e in tp.ktemp[key]: + kb, ke = e['begin'], e['end'] + if kb == ke or not test.data.isInsideTimeline(kb, ke): + continue + color = sysvals.kprobeColor(e['name']) + if name not in sysvals.dev_tracefuncs: + # config base kprobe + test.data.newActionGlobal(e['name'], kb, ke, -2, color) + elif sysvals.usedevsrc: + # dev kprobe + data.addDeviceFunctionCall(e['name'], name, e['proc'], pid, kb, + ke, e['cdata'], e['rdata']) + if sysvals.usecallgraph: + # add the callgraph data to the device hierarchy + sortlist = dict() + for key in test.ftemp: + proc, pid = key + for cg in test.ftemp[key]: + if len(cg.list) < 1 or cg.invalid: + continue + if(not cg.postProcess()): + id = 'task %s' % (pid) + vprint('Sanity check failed for '+\ + id+', ignoring this callback') + continue + # match cg data to devices + if sysvals.suspendmode == 'command' or not cg.deviceMatch(pid, test.data): + sortkey = '%f%f%d' % (cg.start, cg.end, pid) + sortlist[sortkey] = cg + # create blocks for orphan cg data + for sortkey in sorted(sortlist): + cg = sortlist[sortkey] + name = cg.list[0].name + if sysvals.isCallgraphFunc(name): + vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name)) + cg.newActionFromFunction(test.data) + + if sysvals.suspendmode == 'command': + if(sysvals.verbose): + for data in testdata: + data.printDetails() + return testdata # fill in any missing phases for data in testdata: @@ -1587,14 +2446,52 @@ def parseTraceLog(): if(sysvals.verbose): data.printDetails() - # add the time in between the tests as a new phase so we can see it - if(len(testdata) > 1): - t1e = testdata[0].getEnd() - t2s = testdata[-1].getStart() - testdata[-1].newPhaseWithSingleAction('user mode', \ - 'user mode', t1e, t2s, '#FF9966') return testdata +# Function: loadRawKernelLog +# Description: +# Load a raw kernel log that wasn't created by this tool, it might be +# possible to extract a valid suspend/resume log +def loadRawKernelLog(dmesgfile): + global sysvals + + stamp = {'time': '', 'host': '', 'mode': 'mem', 'kernel': ''} + stamp['time'] = datetime.now().strftime('%B %d %Y, %I:%M:%S %p') + stamp['host'] = sysvals.hostname + + testruns = [] + data = 0 + lf = open(dmesgfile, 'r') + for line in lf: + line = line.replace('\r\n', '') + idx = line.find('[') + if idx > 1: + line = line[idx:] + m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) + if(not m): + continue + msg = m.group("msg") + m = re.match('PM: Syncing filesystems.*', msg) + if(m): + if(data): + testruns.append(data) + data = Data(len(testruns)) + data.stamp = stamp + if(data): + m = re.match('.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg) + if(m): + stamp['kernel'] = m.group('k') + m = re.match('PM: Preparing system for (?P<m>.*) sleep', msg) + if(m): + stamp['mode'] = m.group('m') + data.dmesgtext.append(line) + if(data): + testruns.append(data) + sysvals.stamp = stamp + sysvals.suspendmode = stamp['mode'] + lf.close() + return testruns + # Function: loadKernelLog # Description: # [deprecated for kernel 3.15.0 or newer] @@ -1607,9 +2504,10 @@ def loadKernelLog(): vprint('Analyzing the dmesg data...') if(os.path.exists(sysvals.dmesgfile) == False): - doError('%s doesnt exist' % sysvals.dmesgfile, False) + doError('%s does not exist' % sysvals.dmesgfile, False) - # there can be multiple test runs in a single file delineated by stamps + # there can be multiple test runs in a single file + tp = TestProps() testruns = [] data = 0 lf = open(sysvals.dmesgfile, 'r') @@ -1620,35 +2518,43 @@ def loadKernelLog(): line = line[idx:] m = re.match(sysvals.stampfmt, line) if(m): - if(data): - testruns.append(data) - data = Data(len(testruns)) - parseStamp(m, data) - continue - if(not data): + tp.stamp = line continue m = re.match(sysvals.firmwarefmt, line) if(m): - data.fwSuspend = int(m.group('s')) - data.fwResume = int(m.group('r')) - if(data.fwSuspend > 0 or data.fwResume > 0): - data.fwValid = True + tp.fwdata.append((int(m.group('s')), int(m.group('r')))) continue m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line) - if(m): - data.dmesgtext.append(line) - if(re.match('ACPI: resume from mwait', m.group('msg'))): - print('NOTE: This suspend appears to be freeze rather than'+\ - ' %s, it will be treated as such' % sysvals.suspendmode) - sysvals.suspendmode = 'freeze' - else: - vprint('ignoring dmesg line: %s' % line.replace('\n', '')) - testruns.append(data) + if(not m): + continue + msg = m.group("msg") + if(re.match('PM: Syncing filesystems.*', msg)): + if(data): + testruns.append(data) + data = Data(len(testruns)) + parseStamp(tp.stamp, data) + if len(tp.fwdata) > data.testnumber: + data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber] + if(data.fwSuspend > 0 or data.fwResume > 0): + data.fwValid = True + if(re.match('ACPI: resume from mwait', msg)): + print('NOTE: This suspend appears to be freeze rather than'+\ + ' %s, it will be treated as such' % sysvals.suspendmode) + sysvals.suspendmode = 'freeze' + if(not data): + continue + data.dmesgtext.append(line) + if(data): + testruns.append(data) lf.close() - if(not data): - print('ERROR: analyze_suspend header missing from dmesg log') - sys.exit() + if(len(testruns) < 1): + # bad log, but see if you can extract something meaningful anyway + testruns = loadRawKernelLog(sysvals.dmesgfile) + + if(len(testruns) < 1): + doError(' dmesg log is completely unreadable: %s' \ + % sysvals.dmesgfile, False) # fix lines with same timestamp/function with the call and return swapped for data in testruns: @@ -1865,7 +2771,8 @@ def parseKernelLog(data): actions[a] = [] actions[a].append({'begin': ktime, 'end': ktime}) if(re.match(at[a]['emsg'], msg)): - actions[a][-1]['end'] = ktime + if(a in actions): + actions[a][-1]['end'] = ktime # now look for CPU on/off events if(re.match('Disabling non-boot CPUs .*', msg)): # start of first cpu suspend @@ -1912,15 +2819,7 @@ def parseKernelLog(data): # fill in any actions we've found for name in actions: for event in actions[name]: - begin = event['begin'] - end = event['end'] - # if event starts before timeline start, expand timeline - if(begin < data.start): - data.setStart(begin) - # if event ends after timeline end, expand the timeline - if(end > data.end): - data.setEnd(end) - data.newActionGlobal(name, begin, end) + data.newActionGlobal(name, event['begin'], event['end']) if(sysvals.verbose): data.printDetails() @@ -1929,92 +2828,6 @@ def parseKernelLog(data): data.fixupInitcallsThatDidntReturn() return True -# Function: setTimelineRows -# Description: -# Organize the timeline entries into the smallest -# number of rows possible, with no entry overlapping -# Arguments: -# list: the list of devices/actions for a single phase -# sortedkeys: cronologically sorted key list to use -# Output: -# The total number of rows needed to display this phase of the timeline -def setTimelineRows(list, sortedkeys): - - # clear all rows and set them to undefined - remaining = len(list) - rowdata = dict() - row = 0 - for item in list: - list[item]['row'] = -1 - - # try to pack each row with as many ranges as possible - while(remaining > 0): - if(row not in rowdata): - rowdata[row] = [] - for item in sortedkeys: - if(list[item]['row'] < 0): - s = list[item]['start'] - e = list[item]['end'] - valid = True - for ritem in rowdata[row]: - rs = ritem['start'] - re = ritem['end'] - if(not (((s <= rs) and (e <= rs)) or - ((s >= re) and (e >= re)))): - valid = False - break - if(valid): - rowdata[row].append(list[item]) - list[item]['row'] = row - remaining -= 1 - row += 1 - return row - -# Function: createTimeScale -# Description: -# Create the timescale header for the html timeline -# Arguments: -# t0: start time (suspend begin) -# tMax: end time (resume end) -# tSuspend: time when suspend occurs, i.e. the zero time -# Output: -# The html code needed to display the time scale -def createTimeScale(t0, tMax, tSuspended): - timescale = '<div class="t" style="right:{0}%">{1}</div>\n' - output = '<div id="timescale">\n' - - # set scale for timeline - tTotal = tMax - t0 - tS = 0.1 - if(tTotal <= 0): - return output - if(tTotal > 4): - tS = 1 - if(tSuspended < 0): - for i in range(int(tTotal/tS)+1): - pos = '%0.3f' % (100 - ((float(i)*tS*100)/tTotal)) - if(i > 0): - val = '%0.fms' % (float(i)*tS*1000) - else: - val = '' - output += timescale.format(pos, val) - else: - tSuspend = tSuspended - t0 - divTotal = int(tTotal/tS) + 1 - divSuspend = int(tSuspend/tS) - s0 = (tSuspend - tS*divSuspend)*100/tTotal - for i in range(divTotal): - pos = '%0.3f' % (100 - ((float(i)*tS*100)/tTotal) - s0) - if((i == 0) and (s0 < 3)): - val = '' - elif(i == divSuspend): - val = 'S/R' - else: - val = '%0.fms' % (float(i-divSuspend)*tS*1000) - output += timescale.format(pos, val) - output += '</div>\n' - return output - # Function: createHTMLSummarySimple # Description: # Create summary html file for a series of tests @@ -2146,6 +2959,32 @@ def createHTMLSummarySimple(testruns, htmlfile): hf.write('</body>\n</html>\n') hf.close() +def htmlTitle(): + global sysvals + modename = { + 'freeze': 'Freeze (S0)', + 'standby': 'Standby (S1)', + 'mem': 'Suspend (S3)', + 'disk': 'Hibernate (S4)' + } + kernel = sysvals.stamp['kernel'] + host = sysvals.hostname[0].upper()+sysvals.hostname[1:] + mode = sysvals.suspendmode + if sysvals.suspendmode in modename: + mode = modename[sysvals.suspendmode] + return host+' '+mode+' '+kernel + +def ordinal(value): + suffix = 'th' + if value < 10 or value > 19: + if value % 10 == 1: + suffix = 'st' + elif value % 10 == 2: + suffix = 'nd' + elif value % 10 == 3: + suffix = 'rd' + return '%d%s' % (value, suffix) + # Function: createHTML # Description: # Create the output html file from the resident test data @@ -2156,6 +2995,10 @@ def createHTMLSummarySimple(testruns, htmlfile): def createHTML(testruns): global sysvals + if len(testruns) < 1: + print('ERROR: Not enough test data to build a timeline') + return + for data in testruns: data.normalizeTime(testruns[-1].tSuspended) @@ -2163,16 +3006,18 @@ def createHTML(testruns): if len(testruns) > 1: x2changes = ['1', 'relative'] # html function templates + headline_version = '<div class="version"><a href="https://01.org/suspendresume">AnalyzeSuspend v%s</a></div>' % sysvals.version headline_stamp = '<div class="stamp">{0} {1} {2} {3}</div>\n' html_devlist1 = '<button id="devlist1" class="devlist" style="float:left;">Device Detail%s</button>' % x2changes[0] html_zoombox = '<center><button id="zoomin">ZOOM IN</button><button id="zoomout">ZOOM OUT</button><button id="zoomdef">ZOOM 1:1</button></center>\n' html_devlist2 = '<button id="devlist2" class="devlist" style="float:right;">Device Detail2</button>\n' html_timeline = '<div id="dmesgzoombox" class="zoombox">\n<div id="{0}" class="timeline" style="height:{1}px">\n' - html_device = '<div id="{0}" title="{1}" class="thread" style="left:{2}%;top:{3}%;height:{4}%;width:{5}%;">{6}</div>\n' - html_traceevent = '<div title="{0}" class="traceevent" style="left:{1}%;top:{2}%;height:{3}%;width:{4}%;border:1px solid {5};background-color:{5}">{6}</div>\n' - html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}%;height:{3}%;background-color:{4}">{5}</div>\n' + html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;">\n' + html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n' + html_traceevent = '<div title="{0}" class="traceevent" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;">{5}</div>\n' + html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background-color:{4}">{5}</div>\n' html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background-color:{3}"></div>\n' - html_legend = '<div class="square" style="left:{0}%;background-color:{1}"> {2}</div>\n' + html_legend = '<div id="p{3}" class="square" style="left:{0}%;background-color:{1}"> {2}</div>\n' html_timetotal = '<table class="time1">\n<tr>'\ '<td class="green">{2} Suspend Time: <b>{0} ms</b></td>'\ '<td class="yellow">{2} Resume Time: <b>{1} ms</b></td>'\ @@ -2182,6 +3027,10 @@ def createHTML(testruns): '<td class="gray">'+sysvals.suspendmode+' time: <b>{1} ms</b></td>'\ '<td class="yellow">{3} Resume Time: <b>{2} ms</b></td>'\ '</tr>\n</table>\n' + html_timetotal3 = '<table class="time1">\n<tr>'\ + '<td class="green">Execution Time: <b>{0} ms</b></td>'\ + '<td class="yellow">Command: <b>{1}</b></td>'\ + '</tr>\n</table>\n' html_timegroups = '<table class="time2">\n<tr>'\ '<td class="green">{4}Kernel Suspend: {0} ms</td>'\ '<td class="purple">{4}Firmware Suspend: {1} ms</td>'\ @@ -2189,12 +3038,21 @@ def createHTML(testruns): '<td class="yellow">{4}Kernel Resume: {3} ms</td>'\ '</tr>\n</table>\n' + # html format variables + rowheight = 30 + devtextS = '14px' + devtextH = '30px' + hoverZ = 'z-index:10;' + + if sysvals.usedevsrc: + hoverZ = '' + # device timeline vprint('Creating Device Timeline...') - devtl = Timeline() + + devtl = Timeline(rowheight) # Generate the header for this timeline - textnum = ['First', 'Second'] for data in testruns: tTotal = data.end - data.start tEnd = data.dmesg['resume_complete']['end'] @@ -2203,7 +3061,17 @@ def createHTML(testruns): sys.exit() if(data.tLow > 0): low_time = '%.0f'%(data.tLow*1000) - if data.fwValid: + if sysvals.suspendmode == 'command': + run_time = '%.0f'%((data.end-data.start)*1000) + if sysvals.testcommand: + testdesc = sysvals.testcommand + else: + testdesc = 'unknown' + if(len(testruns) > 1): + testdesc = ordinal(data.testnumber+1)+' '+testdesc + thtml = html_timetotal3.format(run_time, testdesc) + devtl.html['header'] += thtml + elif data.fwValid: suspend_time = '%.0f'%((data.tSuspended-data.start)*1000 + \ (data.fwSuspend/1000000.0)) resume_time = '%.0f'%((tEnd-data.tSuspended)*1000 + \ @@ -2211,7 +3079,7 @@ def createHTML(testruns): testdesc1 = 'Total' testdesc2 = '' if(len(testruns) > 1): - testdesc1 = testdesc2 = textnum[data.testnumber] + testdesc1 = testdesc2 = ordinal(data.testnumber+1) testdesc2 += ' ' if(data.tLow == 0): thtml = html_timetotal.format(suspend_time, \ @@ -2219,28 +3087,28 @@ def createHTML(testruns): else: thtml = html_timetotal2.format(suspend_time, low_time, \ resume_time, testdesc1) - devtl.html['timeline'] += thtml + devtl.html['header'] += thtml sktime = '%.3f'%((data.dmesg['suspend_machine']['end'] - \ data.getStart())*1000) sftime = '%.3f'%(data.fwSuspend / 1000000.0) rftime = '%.3f'%(data.fwResume / 1000000.0) - rktime = '%.3f'%((data.getEnd() - \ + rktime = '%.3f'%((data.dmesg['resume_complete']['end'] - \ data.dmesg['resume_machine']['start'])*1000) - devtl.html['timeline'] += html_timegroups.format(sktime, \ + devtl.html['header'] += html_timegroups.format(sktime, \ sftime, rftime, rktime, testdesc2) else: suspend_time = '%.0f'%((data.tSuspended-data.start)*1000) resume_time = '%.0f'%((tEnd-data.tSuspended)*1000) testdesc = 'Kernel' if(len(testruns) > 1): - testdesc = textnum[data.testnumber]+' '+testdesc + testdesc = ordinal(data.testnumber+1)+' '+testdesc if(data.tLow == 0): thtml = html_timetotal.format(suspend_time, \ resume_time, testdesc) else: thtml = html_timetotal2.format(suspend_time, low_time, \ resume_time, testdesc) - devtl.html['timeline'] += thtml + devtl.html['header'] += thtml # time scale for potentially multiple datasets t0 = testruns[0].start @@ -2249,153 +3117,241 @@ def createHTML(testruns): tTotal = tMax - t0 # determine the maximum number of rows we need to draw - timelinerows = 0 for data in testruns: - for phase in data.dmesg: - list = data.dmesg[phase]['list'] - rows = setTimelineRows(list, list) - data.dmesg[phase]['row'] = rows - if(rows > timelinerows): - timelinerows = rows - - # calculate the timeline height and create bounding box, add buttons - devtl.setRows(timelinerows + 1) - devtl.html['timeline'] += html_devlist1 - if len(testruns) > 1: - devtl.html['timeline'] += html_devlist2 + data.selectTimelineDevices('%f', tTotal, sysvals.mindevlen) + for group in data.devicegroups: + devlist = [] + for phase in group: + for devname in data.tdevlist[phase]: + devlist.append((phase,devname)) + devtl.getPhaseRows(data.dmesg, devlist) + devtl.calcTotalRows() + + # create bounding box, add buttons + if sysvals.suspendmode != 'command': + devtl.html['timeline'] += html_devlist1 + if len(testruns) > 1: + devtl.html['timeline'] += html_devlist2 devtl.html['timeline'] += html_zoombox devtl.html['timeline'] += html_timeline.format('dmesg', devtl.height) - # draw the colored boxes for each of the phases - for data in testruns: - for b in data.dmesg: - phase = data.dmesg[b] - length = phase['end']-phase['start'] - left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal) - width = '%.3f' % ((length*100.0)/tTotal) - devtl.html['timeline'] += html_phase.format(left, width, \ - '%.3f'%devtl.scaleH, '%.3f'%(100-devtl.scaleH), \ - data.dmesg[b]['color'], '') + # draw the full timeline + phases = {'suspend':[],'resume':[]} + for phase in data.dmesg: + if 'resume' in phase: + phases['resume'].append(phase) + else: + phases['suspend'].append(phase) - # draw the time scale, try to make the number of labels readable - devtl.html['scale'] = createTimeScale(t0, tMax, tSuspended) - devtl.html['timeline'] += devtl.html['scale'] + # draw each test run chronologically for data in testruns: - for b in data.dmesg: - phaselist = data.dmesg[b]['list'] - for d in phaselist: - name = d - drv = '' - dev = phaselist[d] - if(d in sysvals.altdevname): - name = sysvals.altdevname[d] - if('drv' in dev and dev['drv']): - drv = ' {%s}' % dev['drv'] - height = (100.0 - devtl.scaleH)/data.dmesg[b]['row'] - top = '%.3f' % ((dev['row']*height) + devtl.scaleH) - left = '%.3f' % (((dev['start']-t0)*100)/tTotal) - width = '%.3f' % (((dev['end']-dev['start'])*100)/tTotal) - length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000) - color = 'rgba(204,204,204,0.5)' - devtl.html['timeline'] += html_device.format(dev['id'], \ - d+drv+length+b, left, top, '%.3f'%height, width, name+drv) - - # draw any trace events found - for data in testruns: - for b in data.dmesg: - phaselist = data.dmesg[b]['list'] - for name in phaselist: - dev = phaselist[name] - if('traceevents' in dev): - vprint('Debug trace events found for device %s' % name) - vprint('%20s %20s %10s %8s' % ('action', \ + # if nore than one test, draw a block to represent user mode + if(data.testnumber > 0): + m0 = testruns[data.testnumber-1].end + mMax = testruns[data.testnumber].start + mTotal = mMax - m0 + name = 'usermode%d' % data.testnumber + top = '%d' % devtl.scaleH + left = '%f' % (((m0-t0)*100.0)/tTotal) + width = '%f' % ((mTotal*100.0)/tTotal) + title = 'user mode (%0.3f ms) ' % (mTotal*1000) + devtl.html['timeline'] += html_device.format(name, \ + title, left, top, '%d'%devtl.bodyH, width, '', '', '') + # now draw the actual timeline blocks + for dir in phases: + # draw suspend and resume blocks separately + bname = '%s%d' % (dir[0], data.testnumber) + if dir == 'suspend': + m0 = testruns[data.testnumber].start + mMax = testruns[data.testnumber].tSuspended + mTotal = mMax - m0 + left = '%f' % (((m0-t0)*100.0)/tTotal) + else: + m0 = testruns[data.testnumber].tSuspended + mMax = testruns[data.testnumber].end + mTotal = mMax - m0 + left = '%f' % ((((m0-t0)*100.0)+sysvals.srgap/2)/tTotal) + # if a timeline block is 0 length, skip altogether + if mTotal == 0: + continue + width = '%f' % (((mTotal*100.0)-sysvals.srgap/2)/tTotal) + devtl.html['timeline'] += html_tblock.format(bname, left, width) + for b in sorted(phases[dir]): + # draw the phase color background + phase = data.dmesg[b] + length = phase['end']-phase['start'] + left = '%f' % (((phase['start']-m0)*100.0)/mTotal) + width = '%f' % ((length*100.0)/mTotal) + devtl.html['timeline'] += html_phase.format(left, width, \ + '%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \ + data.dmesg[b]['color'], '') + # draw the devices for this phase + phaselist = data.dmesg[b]['list'] + for d in data.tdevlist[b]: + name = d + drv = '' + dev = phaselist[d] + xtraclass = '' + xtrainfo = '' + xtrastyle = '' + if 'htmlclass' in dev: + xtraclass = dev['htmlclass'] + xtrainfo = dev['htmlclass'] + if 'color' in dev: + xtrastyle = 'background-color:%s;' % dev['color'] + if(d in sysvals.devprops): + name = sysvals.devprops[d].altName(d) + xtraclass = sysvals.devprops[d].xtraClass() + xtrainfo = sysvals.devprops[d].xtraInfo() + if('drv' in dev and dev['drv']): + drv = ' {%s}' % dev['drv'] + rowheight = devtl.phaseRowHeight(b, dev['row']) + rowtop = devtl.phaseRowTop(b, dev['row']) + top = '%.3f' % (rowtop + devtl.scaleH) + left = '%f' % (((dev['start']-m0)*100)/mTotal) + width = '%f' % (((dev['end']-dev['start'])*100)/mTotal) + length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000) + if sysvals.suspendmode == 'command': + title = name+drv+xtrainfo+length+'cmdexec' + else: + title = name+drv+xtrainfo+length+b + devtl.html['timeline'] += html_device.format(dev['id'], \ + title, left, top, '%.3f'%rowheight, width, \ + d+drv, xtraclass, xtrastyle) + if('src' not in dev): + continue + # draw any trace events for this device + vprint('Debug trace events found for device %s' % d) + vprint('%20s %20s %10s %8s' % ('title', \ 'name', 'time(ms)', 'length(ms)')) - for e in dev['traceevents']: - vprint('%20s %20s %10.3f %8.3f' % (e.action, \ - e.name, e.time*1000, e.length*1000)) - height = (100.0 - devtl.scaleH)/data.dmesg[b]['row'] - top = '%.3f' % ((dev['row']*height) + devtl.scaleH) - left = '%.3f' % (((e.time-t0)*100)/tTotal) - width = '%.3f' % (e.length*100/tTotal) + for e in dev['src']: + vprint('%20s %20s %10.3f %8.3f' % (e.title, \ + e.text, e.time*1000, e.length*1000)) + height = devtl.rowH + top = '%.3f' % (rowtop + devtl.scaleH + (e.row*devtl.rowH)) + left = '%f' % (((e.time-m0)*100)/mTotal) + width = '%f' % (e.length*100/mTotal) color = 'rgba(204,204,204,0.5)' devtl.html['timeline'] += \ - html_traceevent.format(e.action+' '+e.name, \ + html_traceevent.format(e.title, \ left, top, '%.3f'%height, \ - width, e.color, '') + width, e.text) + # draw the time scale, try to make the number of labels readable + devtl.html['timeline'] += devtl.createTimeScale(m0, mMax, tTotal, dir) + devtl.html['timeline'] += '</div>\n' # timeline is finished devtl.html['timeline'] += '</div>\n</div>\n' # draw a legend which describes the phases by color - data = testruns[-1] - devtl.html['legend'] = '<div class="legend">\n' - pdelta = 100.0/len(data.phases) - pmargin = pdelta / 4.0 - for phase in data.phases: - order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin) - name = string.replace(phase, '_', ' ') - devtl.html['legend'] += html_legend.format(order, \ - data.dmesg[phase]['color'], name) - devtl.html['legend'] += '</div>\n' + if sysvals.suspendmode != 'command': + data = testruns[-1] + devtl.html['legend'] = '<div class="legend">\n' + pdelta = 100.0/len(data.phases) + pmargin = pdelta / 4.0 + for phase in data.phases: + tmp = phase.split('_') + id = tmp[0][0] + if(len(tmp) > 1): + id += tmp[1][0] + order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin) + name = string.replace(phase, '_', ' ') + devtl.html['legend'] += html_legend.format(order, \ + data.dmesg[phase]['color'], name, id) + devtl.html['legend'] += '</div>\n' hf = open(sysvals.htmlfile, 'w') - thread_height = 0 + + if not sysvals.cgexp: + cgchk = 'checked' + cgnchk = 'not(:checked)' + else: + cgchk = 'not(:checked)' + cgnchk = 'checked' # write the html header first (html head, css code, up to body start) html_header = '<!DOCTYPE html>\n<html>\n<head>\n\ <meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\ - <title>AnalyzeSuspend</title>\n\ + <title>'+htmlTitle()+'</title>\n\ <style type=\'text/css\'>\n\ - body {overflow-y: scroll;}\n\ - .stamp {width: 100%;text-align:center;background-color:gray;line-height:30px;color:white;font: 25px Arial;}\n\ - .callgraph {margin-top: 30px;box-shadow: 5px 5px 20px black;}\n\ - .callgraph article * {padding-left: 28px;}\n\ - h1 {color:black;font: bold 30px Times;}\n\ - t0 {color:black;font: bold 30px Times;}\n\ - t1 {color:black;font: 30px Times;}\n\ - t2 {color:black;font: 25px Times;}\n\ - t3 {color:black;font: 20px Times;white-space:nowrap;}\n\ - t4 {color:black;font: bold 30px Times;line-height:60px;white-space:nowrap;}\n\ + body {overflow-y:scroll;}\n\ + .stamp {width:100%;text-align:center;background-color:gray;line-height:30px;color:white;font:25px Arial;}\n\ + .callgraph {margin-top:30px;box-shadow:5px 5px 20px black;}\n\ + .callgraph article * {padding-left:28px;}\n\ + h1 {color:black;font:bold 30px Times;}\n\ + t0 {color:black;font:bold 30px Times;}\n\ + t1 {color:black;font:30px Times;}\n\ + t2 {color:black;font:25px Times;}\n\ + t3 {color:black;font:20px Times;white-space:nowrap;}\n\ + t4 {color:black;font:bold 30px Times;line-height:60px;white-space:nowrap;}\n\ + cS {color:blue;font:bold 11px Times;}\n\ + cR {color:red;font:bold 11px Times;}\n\ table {width:100%;}\n\ .gray {background-color:rgba(80,80,80,0.1);}\n\ .green {background-color:rgba(204,255,204,0.4);}\n\ .purple {background-color:rgba(128,0,128,0.2);}\n\ .yellow {background-color:rgba(255,255,204,0.4);}\n\ - .time1 {font: 22px Arial;border:1px solid;}\n\ - .time2 {font: 15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\ - td {text-align: center;}\n\ + .time1 {font:22px Arial;border:1px solid;}\n\ + .time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\ + td {text-align:center;}\n\ r {color:#500000;font:15px Tahoma;}\n\ n {color:#505050;font:15px Tahoma;}\n\ - .tdhl {color: red;}\n\ - .hide {display: none;}\n\ - .pf {display: none;}\n\ - .pf:checked + label {background: url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\ - .pf:not(:checked) ~ label {background: url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\ - .pf:checked ~ *:not(:nth-child(2)) {display: none;}\n\ - .zoombox {position: relative; width: 100%; overflow-x: scroll;}\n\ - .timeline {position: relative; font-size: 14px;cursor: pointer;width: 100%; overflow: hidden; background-color:#dddddd;}\n\ - .thread {position: absolute; height: '+'%.3f'%thread_height+'%; overflow: hidden; line-height: 30px; border:1px solid;text-align:center;white-space:nowrap;background-color:rgba(204,204,204,0.5);}\n\ - .thread:hover {background-color:white;border:1px solid red;z-index:10;}\n\ - .hover {background-color:white;border:1px solid red;z-index:10;}\n\ - .traceevent {position: absolute;opacity: 0.3;height: '+'%.3f'%thread_height+'%;width:0;overflow:hidden;line-height:30px;text-align:center;white-space:nowrap;}\n\ - .phase {position: absolute;overflow: hidden;border:0px;text-align:center;}\n\ + .tdhl {color:red;}\n\ + .hide {display:none;}\n\ + .pf {display:none;}\n\ + .pf:'+cgchk+' + label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\ + .pf:'+cgnchk+' ~ label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\ + .pf:'+cgchk+' ~ *:not(:nth-child(2)) {display:none;}\n\ + .zoombox {position:relative;width:100%;overflow-x:scroll;}\n\ + .timeline {position:relative;font-size:14px;cursor:pointer;width:100%; overflow:hidden;background:linear-gradient(#cccccc, white);}\n\ + .thread {position:absolute;height:0%;overflow:hidden;line-height:'+devtextH+';font-size:'+devtextS+';border:1px solid;text-align:center;white-space:nowrap;background-color:rgba(204,204,204,0.5);}\n\ + .thread.sync {background-color:'+sysvals.synccolor+';}\n\ + .thread.bg {background-color:'+sysvals.kprobecolor+';}\n\ + .thread:hover {background-color:white;border:1px solid red;'+hoverZ+'}\n\ + .hover {background-color:white;border:1px solid red;'+hoverZ+'}\n\ + .hover.sync {background-color:white;}\n\ + .hover.bg {background-color:white;}\n\ + .traceevent {position:absolute;font-size:10px;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,rgba(204,204,204,1),rgba(150,150,150,1));}\n\ + .traceevent:hover {background:white;}\n\ + .phase {position:absolute;overflow:hidden;border:0px;text-align:center;}\n\ .phaselet {position:absolute;overflow:hidden;border:0px;text-align:center;height:100px;font-size:24px;}\n\ - .t {position:absolute;top:0%;height:100%;border-right:1px solid black;}\n\ - .legend {position: relative; width: 100%; height: 40px; text-align: center;margin-bottom:20px}\n\ - .legend .square {position:absolute;top:10px; width: 0px;height: 20px;border:1px solid;padding-left:20px;}\n\ + .t {z-index:2;position:absolute;pointer-events:none;top:0%;height:100%;border-right:1px solid black;}\n\ + .legend {position:relative; width:100%; height:40px; text-align:center;margin-bottom:20px}\n\ + .legend .square {position:absolute;cursor:pointer;top:10px; width:0px;height:20px;border:1px solid;padding-left:20px;}\n\ button {height:40px;width:200px;margin-bottom:20px;margin-top:20px;font-size:24px;}\n\ + .logbtn {position:relative;float:right;height:25px;width:50px;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\ .devlist {position:'+x2changes[1]+';width:190px;}\n\ - #devicedetail {height:100px;box-shadow: 5px 5px 20px black;}\n\ + a:link {color:white;text-decoration:none;}\n\ + a:visited {color:white;}\n\ + a:hover {color:white;}\n\ + a:active {color:white;}\n\ + .version {position:relative;float:left;color:white;font-size:10px;line-height:30px;margin-left:10px;}\n\ + #devicedetail {height:100px;box-shadow:5px 5px 20px black;}\n\ + .tblock {position:absolute;height:100%;}\n\ + .bg {z-index:1;}\n\ </style>\n</head>\n<body>\n' - hf.write(html_header) + + # no header or css if its embedded + if(sysvals.embedded): + hf.write('pass True tSus %.3f tRes %.3f tLow %.3f fwvalid %s tSus %.3f tRes %.3f\n' % + (data.tSuspended-data.start, data.end-data.tSuspended, data.tLow, data.fwValid, \ + data.fwSuspend/1000000, data.fwResume/1000000)) + else: + hf.write(html_header) # write the test title and general info header if(sysvals.stamp['time'] != ""): + hf.write(headline_version) + if sysvals.addlogs and sysvals.dmesgfile: + hf.write('<button id="showdmesg" class="logbtn">dmesg</button>') + if sysvals.addlogs and sysvals.ftracefile: + hf.write('<button id="showftrace" class="logbtn">ftrace</button>') hf.write(headline_stamp.format(sysvals.stamp['host'], sysvals.stamp['kernel'], sysvals.stamp['mode'], \ sysvals.stamp['time'])) # write the device timeline + hf.write(devtl.html['header']) hf.write(devtl.html['timeline']) hf.write(devtl.html['legend']) hf.write('<div id="devicedetailtitle"></div>\n') @@ -2410,12 +3366,15 @@ def createHTML(testruns): width = '%.3f' % ((length*100.0)/tTotal) hf.write(html_phaselet.format(b, left, width, \ data.dmesg[b]['color'])) + if sysvals.suspendmode == 'command': + hf.write(html_phaselet.format('cmdexec', '0', '0', \ + data.dmesg['resume_complete']['color'])) hf.write('</div>\n') hf.write('</div>\n') # write the ftrace data (callgraph) data = testruns[-1] - if(sysvals.usecallgraph): + if(sysvals.usecallgraph and not sysvals.embedded): hf.write('<section id="callgraphs" class="callgraph">\n') # write out the ftrace data converted to html html_func_top = '<article id="{0}" class="atop" style="background-color:{1}">\n<input type="checkbox" class="pf" id="f{2}" checked/><label for="f{2}">{3} {4}</label>\n' @@ -2428,22 +3387,29 @@ def createHTML(testruns): for devname in data.sortedDevices(p): if('ftrace' not in list[devname]): continue - name = devname - if(devname in sysvals.altdevname): - name = sysvals.altdevname[devname] devid = list[devname]['id'] cg = list[devname]['ftrace'] - flen = '<r>(%.3f ms @ %.3f to %.3f)</r>' % \ - ((cg.end - cg.start)*1000, cg.start*1000, cg.end*1000) + clen = (cg.end - cg.start) * 1000 + if clen < sysvals.mincglen: + continue + fmt = '<r>(%.3f ms @ '+sysvals.timeformat+' to '+sysvals.timeformat+')</r>' + flen = fmt % (clen, cg.start, cg.end) + name = devname + if(devname in sysvals.devprops): + name = sysvals.devprops[devname].altName(devname) + if sysvals.suspendmode == 'command': + ftitle = name + else: + ftitle = name+' '+p hf.write(html_func_top.format(devid, data.dmesg[p]['color'], \ - num, name+' '+p, flen)) + num, ftitle, flen)) num += 1 for line in cg.list: if(line.length < 0.000000001): flen = '' else: - flen = '<n>(%.3f ms @ %.3f)</n>' % (line.length*1000, \ - line.time*1000) + fmt = '<n>(%.3f ms @ '+sysvals.timeformat+')</n>' + flen = fmt % (line.length*1000, line.time) if(line.freturn and line.fcall): hf.write(html_func_leaf.format(line.name, flen)) elif(line.freturn): @@ -2453,9 +3419,40 @@ def createHTML(testruns): num += 1 hf.write(html_func_end) hf.write('\n\n </section>\n') - # write the footer and close - addScriptCode(hf, testruns) - hf.write('</body>\n</html>\n') + + # add the dmesg log as a hidden div + if sysvals.addlogs and sysvals.dmesgfile: + hf.write('<div id="dmesglog" style="display:none;">\n') + lf = open(sysvals.dmesgfile, 'r') + for line in lf: + hf.write(line) + lf.close() + hf.write('</div>\n') + # add the ftrace log as a hidden div + if sysvals.addlogs and sysvals.ftracefile: + hf.write('<div id="ftracelog" style="display:none;">\n') + lf = open(sysvals.ftracefile, 'r') + for line in lf: + hf.write(line) + lf.close() + hf.write('</div>\n') + + if(not sysvals.embedded): + # write the footer and close + addScriptCode(hf, testruns) + hf.write('</body>\n</html>\n') + else: + # embedded out will be loaded in a page, skip the js + t0 = (testruns[0].start - testruns[-1].tSuspended) * 1000 + tMax = (testruns[-1].end - testruns[-1].tSuspended) * 1000 + # add js code in a div entry for later evaluation + detail = 'var bounds = [%f,%f];\n' % (t0, tMax) + detail += 'var devtable = [\n' + for data in testruns: + topo = data.deviceTopology() + detail += '\t"%s",\n' % (topo) + detail += '];\n' + hf.write('<div id=customcode style=display:none>\n'+detail+'</div>\n') hf.close() return True @@ -2466,8 +3463,8 @@ def createHTML(testruns): # hf: the open html file pointer # testruns: array of Data objects from parseKernelLog or parseTraceLog def addScriptCode(hf, testruns): - t0 = (testruns[0].start - testruns[-1].tSuspended) * 1000 - tMax = (testruns[-1].end - testruns[-1].tSuspended) * 1000 + t0 = testruns[0].start * 1000 + tMax = testruns[-1].end * 1000 # create an array in javascript memory with the device details detail = ' var devtable = [];\n' for data in testruns: @@ -2477,8 +3474,43 @@ def addScriptCode(hf, testruns): # add the code which will manipulate the data in the browser script_code = \ '<script type="text/javascript">\n'+detail+\ + ' var resolution = -1;\n'\ + ' function redrawTimescale(t0, tMax, tS) {\n'\ + ' var rline = \'<div class="t" style="left:0;border-left:1px solid black;border-right:0;"><cR><-R</cR></div>\';\n'\ + ' var tTotal = tMax - t0;\n'\ + ' var list = document.getElementsByClassName("tblock");\n'\ + ' for (var i = 0; i < list.length; i++) {\n'\ + ' var timescale = list[i].getElementsByClassName("timescale")[0];\n'\ + ' var m0 = t0 + (tTotal*parseFloat(list[i].style.left)/100);\n'\ + ' var mTotal = tTotal*parseFloat(list[i].style.width)/100;\n'\ + ' var mMax = m0 + mTotal;\n'\ + ' var html = "";\n'\ + ' var divTotal = Math.floor(mTotal/tS) + 1;\n'\ + ' if(divTotal > 1000) continue;\n'\ + ' var divEdge = (mTotal - tS*(divTotal-1))*100/mTotal;\n'\ + ' var pos = 0.0, val = 0.0;\n'\ + ' for (var j = 0; j < divTotal; j++) {\n'\ + ' var htmlline = "";\n'\ + ' if(list[i].id[5] == "r") {\n'\ + ' pos = 100 - (((j)*tS*100)/mTotal);\n'\ + ' val = (j)*tS;\n'\ + ' htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\ + ' if(j == 0)\n'\ + ' htmlline = rline;\n'\ + ' } else {\n'\ + ' pos = 100 - (((j)*tS*100)/mTotal) - divEdge;\n'\ + ' val = (j-divTotal+1)*tS;\n'\ + ' if(j == divTotal - 1)\n'\ + ' htmlline = \'<div class="t" style="right:\'+pos+\'%"><cS>S-></cS></div>\';\n'\ + ' else\n'\ + ' htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\ + ' }\n'\ + ' html += htmlline;\n'\ + ' }\n'\ + ' timescale.innerHTML = html;\n'\ + ' }\n'\ + ' }\n'\ ' function zoomTimeline() {\n'\ - ' var timescale = document.getElementById("timescale");\n'\ ' var dmesg = document.getElementById("dmesg");\n'\ ' var zoombox = document.getElementById("dmesgzoombox");\n'\ ' var val = parseFloat(dmesg.style.width);\n'\ @@ -2486,7 +3518,7 @@ def addScriptCode(hf, testruns): ' var sh = window.outerWidth / 2;\n'\ ' if(this.id == "zoomin") {\n'\ ' newval = val * 1.2;\n'\ - ' if(newval > 40000) newval = 40000;\n'\ + ' if(newval > 910034) newval = 910034;\n'\ ' dmesg.style.width = newval+"%";\n'\ ' zoombox.scrollLeft = ((zoombox.scrollLeft + sh) * newval / val) - sh;\n'\ ' } else if (this.id == "zoomout") {\n'\ @@ -2498,19 +3530,17 @@ def addScriptCode(hf, testruns): ' zoombox.scrollLeft = 0;\n'\ ' dmesg.style.width = "100%";\n'\ ' }\n'\ - ' var html = "";\n'\ + ' var tS = [10000, 5000, 2000, 1000, 500, 200, 100, 50, 20, 10, 5, 2, 1];\n'\ ' var t0 = bounds[0];\n'\ ' var tMax = bounds[1];\n'\ ' var tTotal = tMax - t0;\n'\ ' var wTotal = tTotal * 100.0 / newval;\n'\ - ' for(var tS = 1000; (wTotal / tS) < 3; tS /= 10);\n'\ - ' if(tS < 1) tS = 1;\n'\ - ' for(var s = ((t0 / tS)|0) * tS; s < tMax; s += tS) {\n'\ - ' var pos = (tMax - s) * 100.0 / tTotal;\n'\ - ' var name = (s == 0)?"S/R":(s+"ms");\n'\ - ' html += "<div class=\\"t\\" style=\\"right:"+pos+"%\\">"+name+"</div>";\n'\ - ' }\n'\ - ' timescale.innerHTML = html;\n'\ + ' var idx = 7*window.innerWidth/1100;\n'\ + ' for(var i = 0; (i < tS.length)&&((wTotal / tS[i]) < idx); i++);\n'\ + ' if(i >= tS.length) i = tS.length - 1;\n'\ + ' if(tS[i] == resolution) return;\n'\ + ' resolution = tS[i];\n'\ + ' redrawTimescale(t0, tMax, tS[i]);\n'\ ' }\n'\ ' function deviceHover() {\n'\ ' var name = this.title.slice(0, this.title.indexOf(" ("));\n'\ @@ -2523,12 +3553,13 @@ def addScriptCode(hf, testruns): ' cpu = parseInt(name.slice(8));\n'\ ' for (var i = 0; i < dev.length; i++) {\n'\ ' dname = dev[i].title.slice(0, dev[i].title.indexOf(" ("));\n'\ + ' var cname = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\ ' if((cpu >= 0 && dname.match("CPU_O[NF]*\\\[*"+cpu+"\\\]")) ||\n'\ ' (name == dname))\n'\ ' {\n'\ - ' dev[i].className = "thread hover";\n'\ + ' dev[i].className = "hover "+cname;\n'\ ' } else {\n'\ - ' dev[i].className = "thread";\n'\ + ' dev[i].className = cname;\n'\ ' }\n'\ ' }\n'\ ' }\n'\ @@ -2536,7 +3567,7 @@ def addScriptCode(hf, testruns): ' var dmesg = document.getElementById("dmesg");\n'\ ' var dev = dmesg.getElementsByClassName("thread");\n'\ ' for (var i = 0; i < dev.length; i++) {\n'\ - ' dev[i].className = "thread";\n'\ + ' dev[i].className = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\ ' }\n'\ ' }\n'\ ' function deviceTitle(title, total, cpu) {\n'\ @@ -2547,7 +3578,7 @@ def addScriptCode(hf, testruns): ' total[2] = (total[2]+total[4])/2;\n'\ ' }\n'\ ' var devtitle = document.getElementById("devicedetailtitle");\n'\ - ' var name = title.slice(0, title.indexOf(" "));\n'\ + ' var name = title.slice(0, title.indexOf(" ("));\n'\ ' if(cpu >= 0) name = "CPU"+cpu;\n'\ ' var driver = "";\n'\ ' var tS = "<t2>(</t2>";\n'\ @@ -2579,6 +3610,8 @@ def addScriptCode(hf, testruns): ' var dev = dmesg.getElementsByClassName("thread");\n'\ ' var idlist = [];\n'\ ' var pdata = [[]];\n'\ + ' if(document.getElementById("devicedetail1"))\n'\ + ' pdata = [[], []];\n'\ ' var pd = pdata[0];\n'\ ' var total = [0.0, 0.0, 0.0];\n'\ ' for (var i = 0; i < dev.length; i++) {\n'\ @@ -2634,6 +3667,7 @@ def addScriptCode(hf, testruns): ' var cglist = document.getElementById("callgraphs");\n'\ ' if(!cglist) return;\n'\ ' var cg = cglist.getElementsByClassName("atop");\n'\ + ' if(cg.length < 10) return;\n'\ ' for (var i = 0; i < cg.length; i++) {\n'\ ' if(idlist.indexOf(cg[i].id) >= 0) {\n'\ ' cg[i].style.display = "block";\n'\ @@ -2658,15 +3692,32 @@ def addScriptCode(hf, testruns): ' dt = devtable[1];\n'\ ' win.document.write(html+dt);\n'\ ' }\n'\ + ' function logWindow(e) {\n'\ + ' var name = e.target.id.slice(4);\n'\ + ' var win = window.open();\n'\ + ' var log = document.getElementById(name+"log");\n'\ + ' var title = "<title>"+document.title.split(" ")[0]+" "+name+" log</title>";\n'\ + ' win.document.write(title+"<pre>"+log.innerHTML+"</pre>");\n'\ + ' win.document.close();\n'\ + ' }\n'\ + ' function onClickPhase(e) {\n'\ + ' }\n'\ + ' window.addEventListener("resize", function () {zoomTimeline();});\n'\ ' window.addEventListener("load", function () {\n'\ ' var dmesg = document.getElementById("dmesg");\n'\ ' dmesg.style.width = "100%"\n'\ ' document.getElementById("zoomin").onclick = zoomTimeline;\n'\ ' document.getElementById("zoomout").onclick = zoomTimeline;\n'\ ' document.getElementById("zoomdef").onclick = zoomTimeline;\n'\ - ' var devlist = document.getElementsByClassName("devlist");\n'\ - ' for (var i = 0; i < devlist.length; i++)\n'\ - ' devlist[i].onclick = devListWindow;\n'\ + ' var list = document.getElementsByClassName("square");\n'\ + ' for (var i = 0; i < list.length; i++)\n'\ + ' list[i].onclick = onClickPhase;\n'\ + ' var list = document.getElementsByClassName("logbtn");\n'\ + ' for (var i = 0; i < list.length; i++)\n'\ + ' list[i].onclick = logWindow;\n'\ + ' list = document.getElementsByClassName("devlist");\n'\ + ' for (var i = 0; i < list.length; i++)\n'\ + ' list[i].onclick = devListWindow;\n'\ ' var dev = dmesg.getElementsByClassName("thread");\n'\ ' for (var i = 0; i < dev.length; i++) {\n'\ ' dev[i].onclick = deviceDetail;\n'\ @@ -2685,141 +3736,87 @@ def addScriptCode(hf, testruns): def executeSuspend(): global sysvals - detectUSB(False) t0 = time.time()*1000 tp = sysvals.tpath + fwdata = [] + # mark the start point in the kernel ring buffer just as we start + sysvals.initdmesg() + # start ftrace + if(sysvals.usecallgraph or sysvals.usetraceevents): + print('START TRACING') + sysvals.fsetVal('1', 'tracing_on') # execute however many s/r runs requested for count in range(1,sysvals.execcount+1): - # clear the kernel ring buffer just as we start - os.system('dmesg -C') - # enable callgraph ftrace only for the second run - if(sysvals.usecallgraph and count == 2): - # set trace type - os.system('echo function_graph > '+tp+'current_tracer') - os.system('echo "" > '+tp+'set_ftrace_filter') - # set trace format options - os.system('echo funcgraph-abstime > '+tp+'trace_options') - os.system('echo funcgraph-proc > '+tp+'trace_options') - # focus only on device suspend and resume - os.system('cat '+tp+'available_filter_functions | '+\ - 'grep dpm_run_callback > '+tp+'set_graph_function') # if this is test2 and there's a delay, start here if(count > 1 and sysvals.x2delay > 0): tN = time.time()*1000 while (tN - t0) < sysvals.x2delay: tN = time.time()*1000 time.sleep(0.001) - # start ftrace - if(sysvals.usecallgraph or sysvals.usetraceevents): - print('START TRACING') - os.system('echo 1 > '+tp+'tracing_on') # initiate suspend if(sysvals.usecallgraph or sysvals.usetraceevents): - os.system('echo SUSPEND START > '+tp+'trace_marker') - if(sysvals.rtcwake): - print('SUSPEND START') - print('will autoresume in %d seconds' % sysvals.rtcwaketime) - sysvals.rtcWakeAlarm() + sysvals.fsetVal('SUSPEND START', 'trace_marker') + if sysvals.suspendmode == 'command': + print('COMMAND START') + if(sysvals.rtcwake): + print('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime) + sysvals.rtcWakeAlarmOn() + os.system(sysvals.testcommand) else: - print('SUSPEND START (press a key to resume)') - pf = open(sysvals.powerfile, 'w') - pf.write(sysvals.suspendmode) - # execution will pause here - pf.close() + if(sysvals.rtcwake): + print('SUSPEND START') + print('will autoresume in %d seconds' % sysvals.rtcwaketime) + sysvals.rtcWakeAlarmOn() + else: + print('SUSPEND START (press a key to resume)') + pf = open(sysvals.powerfile, 'w') + pf.write(sysvals.suspendmode) + # execution will pause here + try: + pf.close() + except: + pass t0 = time.time()*1000 + if(sysvals.rtcwake): + sysvals.rtcWakeAlarmOff() # return from suspend print('RESUME COMPLETE') if(sysvals.usecallgraph or sysvals.usetraceevents): - os.system('echo RESUME COMPLETE > '+tp+'trace_marker') - # see if there's firmware timing data to be had - t = sysvals.postresumetime - if(t > 0): - print('Waiting %d seconds for POST-RESUME trace events...' % t) - time.sleep(t) - # stop ftrace - if(sysvals.usecallgraph or sysvals.usetraceevents): - os.system('echo 0 > '+tp+'tracing_on') - print('CAPTURING TRACE') - writeDatafileHeader(sysvals.ftracefile) - os.system('cat '+tp+'trace >> '+sysvals.ftracefile) - os.system('echo "" > '+tp+'trace') - # grab a copy of the dmesg output - print('CAPTURING DMESG') - writeDatafileHeader(sysvals.dmesgfile) - os.system('dmesg -c >> '+sysvals.dmesgfile) - -def writeDatafileHeader(filename): + sysvals.fsetVal('RESUME COMPLETE', 'trace_marker') + if(sysvals.suspendmode == 'mem'): + fwdata.append(getFPDT(False)) + # look for post resume events after the last test run + t = sysvals.postresumetime + if(t > 0): + print('Waiting %d seconds for POST-RESUME trace events...' % t) + time.sleep(t) + # stop ftrace + if(sysvals.usecallgraph or sysvals.usetraceevents): + sysvals.fsetVal('0', 'tracing_on') + print('CAPTURING TRACE') + writeDatafileHeader(sysvals.ftracefile, fwdata) + os.system('cat '+tp+'trace >> '+sysvals.ftracefile) + sysvals.fsetVal('', 'trace') + devProps() + # grab a copy of the dmesg output + print('CAPTURING DMESG') + writeDatafileHeader(sysvals.dmesgfile, fwdata) + sysvals.getdmesg() + +def writeDatafileHeader(filename, fwdata): global sysvals - fw = getFPDT(False) prt = sysvals.postresumetime fp = open(filename, 'a') fp.write(sysvals.teststamp+'\n') - if(fw): - fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1])) + if(sysvals.suspendmode == 'mem'): + for fw in fwdata: + if(fw): + fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1])) if(prt > 0): fp.write('# post resume time %u\n' % prt) fp.close() -# Function: executeAndroidSuspend -# Description: -# Execute system suspend through the sysfs interface -# on a remote android device, then transfer the output -# dmesg and ftrace files to the local output directory. -def executeAndroidSuspend(): - global sysvals - - # check to see if the display is currently off - tp = sysvals.tpath - out = os.popen(sysvals.adb+\ - ' shell dumpsys power | grep mScreenOn').read().strip() - # if so we need to turn it on so we can issue a new suspend - if(out.endswith('false')): - print('Waking the device up for the test...') - # send the KEYPAD_POWER keyevent to wake it up - os.system(sysvals.adb+' shell input keyevent 26') - # wait a few seconds so the user can see the device wake up - time.sleep(3) - # execute however many s/r runs requested - for count in range(1,sysvals.execcount+1): - # clear the kernel ring buffer just as we start - os.system(sysvals.adb+' shell dmesg -c > /dev/null 2>&1') - # start ftrace - if(sysvals.usetraceevents): - print('START TRACING') - os.system(sysvals.adb+" shell 'echo 1 > "+tp+"tracing_on'") - # initiate suspend - for count in range(1,sysvals.execcount+1): - if(sysvals.usetraceevents): - os.system(sysvals.adb+\ - " shell 'echo SUSPEND START > "+tp+"trace_marker'") - print('SUSPEND START (press a key on the device to resume)') - os.system(sysvals.adb+" shell 'echo "+sysvals.suspendmode+\ - " > "+sysvals.powerfile+"'") - # execution will pause here, then adb will exit - while(True): - check = os.popen(sysvals.adb+\ - ' shell pwd 2>/dev/null').read().strip() - if(len(check) > 0): - break - time.sleep(1) - if(sysvals.usetraceevents): - os.system(sysvals.adb+" shell 'echo RESUME COMPLETE > "+tp+\ - "trace_marker'") - # return from suspend - print('RESUME COMPLETE') - # stop ftrace - if(sysvals.usetraceevents): - os.system(sysvals.adb+" shell 'echo 0 > "+tp+"tracing_on'") - print('CAPTURING TRACE') - os.system('echo "'+sysvals.teststamp+'" > '+sysvals.ftracefile) - os.system(sysvals.adb+' shell cat '+tp+\ - 'trace >> '+sysvals.ftracefile) - # grab a copy of the dmesg output - print('CAPTURING DMESG') - os.system('echo "'+sysvals.teststamp+'" > '+sysvals.dmesgfile) - os.system(sysvals.adb+' shell dmesg >> '+sysvals.dmesgfile) - # Function: setUSBDevicesAuto # Description: # Set the autosuspend control parameter of all USB devices to auto @@ -2829,7 +3826,7 @@ def executeAndroidSuspend(): def setUSBDevicesAuto(): global sysvals - rootCheck() + rootCheck(True) for dirname, dirnames, filenames in os.walk('/sys/devices'): if(re.match('.*/usb[0-9]*.*', dirname) and 'idVendor' in filenames and 'idProduct' in filenames): @@ -2874,9 +3871,7 @@ def ms2nice(val): # Description: # Detect all the USB hosts and devices currently connected and add # a list of USB device names to sysvals for better timeline readability -# Arguments: -# output: True to output the info to stdout, False otherwise -def detectUSB(output): +def detectUSB(): global sysvals field = {'idVendor':'', 'idProduct':'', 'product':'', 'speed':''} @@ -2887,18 +3882,18 @@ def detectUSB(output): 'runtime_suspended_time':'', 'active_duration':'', 'connected_duration':''} - if(output): - print('LEGEND') - print('---------------------------------------------------------------------------------------------') - print(' A = async/sync PM queue Y/N D = autosuspend delay (seconds)') - print(' S = autosuspend Y/N rACTIVE = runtime active (min/sec)') - print(' P = persist across suspend Y/N rSUSPEN = runtime suspend (min/sec)') - print(' E = runtime suspend enabled/forbidden Y/N ACTIVE = active duration (min/sec)') - print(' R = runtime status active/suspended Y/N CONNECT = connected duration (min/sec)') - print(' U = runtime usage count') - print('---------------------------------------------------------------------------------------------') - print(' NAME ID DESCRIPTION SPEED A S P E R U D rACTIVE rSUSPEN ACTIVE CONNECT') - print('---------------------------------------------------------------------------------------------') + + print('LEGEND') + print('---------------------------------------------------------------------------------------------') + print(' A = async/sync PM queue Y/N D = autosuspend delay (seconds)') + print(' S = autosuspend Y/N rACTIVE = runtime active (min/sec)') + print(' P = persist across suspend Y/N rSUSPEN = runtime suspend (min/sec)') + print(' E = runtime suspend enabled/forbidden Y/N ACTIVE = active duration (min/sec)') + print(' R = runtime status active/suspended Y/N CONNECT = connected duration (min/sec)') + print(' U = runtime usage count') + print('---------------------------------------------------------------------------------------------') + print(' NAME ID DESCRIPTION SPEED A S P E R U D rACTIVE rSUSPEN ACTIVE CONNECT') + print('---------------------------------------------------------------------------------------------') for dirname, dirnames, filenames in os.walk('/sys/devices'): if(re.match('.*/usb[0-9]*.*', dirname) and @@ -2907,35 +3902,149 @@ def detectUSB(output): field[i] = os.popen('cat %s/%s 2>/dev/null' % \ (dirname, i)).read().replace('\n', '') name = dirname.split('/')[-1] - if(len(field['product']) > 0): - sysvals.altdevname[name] = \ - '%s [%s]' % (field['product'], name) + for i in power: + power[i] = os.popen('cat %s/power/%s 2>/dev/null' % \ + (dirname, i)).read().replace('\n', '') + if(re.match('usb[0-9]*', name)): + first = '%-8s' % name else: - sysvals.altdevname[name] = \ - '%s:%s [%s]' % (field['idVendor'], \ - field['idProduct'], name) - if(output): - for i in power: - power[i] = os.popen('cat %s/power/%s 2>/dev/null' % \ - (dirname, i)).read().replace('\n', '') - if(re.match('usb[0-9]*', name)): - first = '%-8s' % name - else: - first = '%8s' % name - print('%s [%s:%s] %-20s %-4s %1s %1s %1s %1s %1s %1s %1s %s %s %s %s' % \ - (first, field['idVendor'], field['idProduct'], \ - field['product'][0:20], field['speed'], \ - yesno(power['async']), \ - yesno(power['control']), \ - yesno(power['persist']), \ - yesno(power['runtime_enabled']), \ - yesno(power['runtime_status']), \ - power['runtime_usage'], \ - power['autosuspend'], \ - ms2nice(power['runtime_active_time']), \ - ms2nice(power['runtime_suspended_time']), \ - ms2nice(power['active_duration']), \ - ms2nice(power['connected_duration']))) + first = '%8s' % name + print('%s [%s:%s] %-20s %-4s %1s %1s %1s %1s %1s %1s %1s %s %s %s %s' % \ + (first, field['idVendor'], field['idProduct'], \ + field['product'][0:20], field['speed'], \ + yesno(power['async']), \ + yesno(power['control']), \ + yesno(power['persist']), \ + yesno(power['runtime_enabled']), \ + yesno(power['runtime_status']), \ + power['runtime_usage'], \ + power['autosuspend'], \ + ms2nice(power['runtime_active_time']), \ + ms2nice(power['runtime_suspended_time']), \ + ms2nice(power['active_duration']), \ + ms2nice(power['connected_duration']))) + +# Function: devProps +# Description: +# Retrieve a list of properties for all devices in the trace log +def devProps(data=0): + global sysvals + props = dict() + + if data: + idx = data.index(': ') + 2 + if idx >= len(data): + return + devlist = data[idx:].split(';') + for dev in devlist: + f = dev.split(',') + if len(f) < 3: + continue + dev = f[0] + props[dev] = DevProps() + props[dev].altname = f[1] + if int(f[2]): + props[dev].async = True + else: + props[dev].async = False + sysvals.devprops = props + if sysvals.suspendmode == 'command' and 'testcommandstring' in props: + sysvals.testcommand = props['testcommandstring'].altname + return + + if(os.path.exists(sysvals.ftracefile) == False): + doError('%s does not exist' % sysvals.ftracefile, False) + + # first get the list of devices we need properties for + msghead = 'Additional data added by AnalyzeSuspend' + alreadystamped = False + tp = TestProps() + tf = open(sysvals.ftracefile, 'r') + for line in tf: + if msghead in line: + alreadystamped = True + continue + # determine the trace data type (required for further parsing) + m = re.match(sysvals.tracertypefmt, line) + if(m): + tp.setTracerType(m.group('t')) + continue + # parse only valid lines, if this is not one move on + m = re.match(tp.ftrace_line_fmt, line) + if(not m or 'device_pm_callback_start' not in line): + continue + m = re.match('.*: (?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*', m.group('msg')); + if(not m): + continue + drv, dev, par = m.group('drv'), m.group('d'), m.group('p') + if dev not in props: + props[dev] = DevProps() + tf.close() + + if not alreadystamped and sysvals.suspendmode == 'command': + out = '#\n# '+msghead+'\n# Device Properties: ' + out += 'testcommandstring,%s,0;' % (sysvals.testcommand) + with open(sysvals.ftracefile, 'a') as fp: + fp.write(out+'\n') + sysvals.devprops = props + return + + # now get the syspath for each of our target devices + for dirname, dirnames, filenames in os.walk('/sys/devices'): + if(re.match('.*/power', dirname) and 'async' in filenames): + dev = dirname.split('/')[-2] + if dev in props and (not props[dev].syspath or len(dirname) < len(props[dev].syspath)): + props[dev].syspath = dirname[:-6] + + # now fill in the properties for our target devices + for dev in props: + dirname = props[dev].syspath + if not dirname or not os.path.exists(dirname): + continue + with open(dirname+'/power/async') as fp: + text = fp.read() + props[dev].async = False + if 'enabled' in text: + props[dev].async = True + fields = os.listdir(dirname) + if 'product' in fields: + with open(dirname+'/product') as fp: + props[dev].altname = fp.read() + elif 'name' in fields: + with open(dirname+'/name') as fp: + props[dev].altname = fp.read() + elif 'model' in fields: + with open(dirname+'/model') as fp: + props[dev].altname = fp.read() + elif 'description' in fields: + with open(dirname+'/description') as fp: + props[dev].altname = fp.read() + elif 'id' in fields: + with open(dirname+'/id') as fp: + props[dev].altname = fp.read() + elif 'idVendor' in fields and 'idProduct' in fields: + idv, idp = '', '' + with open(dirname+'/idVendor') as fp: + idv = fp.read().strip() + with open(dirname+'/idProduct') as fp: + idp = fp.read().strip() + props[dev].altname = '%s:%s' % (idv, idp) + + if props[dev].altname: + out = props[dev].altname.strip().replace('\n', ' ') + out = out.replace(',', ' ') + out = out.replace(';', ' ') + props[dev].altname = out + + # and now write the data to the ftrace file + if not alreadystamped: + out = '#\n# '+msghead+'\n# Device Properties: ' + for dev in sorted(props): + out += props[dev].out(dev) + with open(sysvals.ftracefile, 'a') as fp: + fp.write(out+'\n') + + sysvals.devprops = props # Function: getModes # Description: @@ -2945,15 +4054,10 @@ def detectUSB(output): def getModes(): global sysvals modes = '' - if(not sysvals.android): - if(os.path.exists(sysvals.powerfile)): - fp = open(sysvals.powerfile, 'r') - modes = string.split(fp.read()) - fp.close() - else: - line = os.popen(sysvals.adb+' shell cat '+\ - sysvals.powerfile).read().strip() - modes = string.split(line) + if(os.path.exists(sysvals.powerfile)): + fp = open(sysvals.powerfile, 'r') + modes = string.split(fp.read()) + fp.close() return modes # Function: getFPDT @@ -2971,22 +4075,22 @@ def getFPDT(output): prectype[0] = 'Basic S3 Resume Performance Record' prectype[1] = 'Basic S3 Suspend Performance Record' - rootCheck() + rootCheck(True) if(not os.path.exists(sysvals.fpdtpath)): if(output): - doError('file doesnt exist: %s' % sysvals.fpdtpath, False) + doError('file does not exist: %s' % sysvals.fpdtpath, False) return False if(not os.access(sysvals.fpdtpath, os.R_OK)): if(output): - doError('file isnt readable: %s' % sysvals.fpdtpath, False) + doError('file is not readable: %s' % sysvals.fpdtpath, False) return False if(not os.path.exists(sysvals.mempath)): if(output): - doError('file doesnt exist: %s' % sysvals.mempath, False) + doError('file does not exist: %s' % sysvals.mempath, False) return False if(not os.access(sysvals.mempath, os.R_OK)): if(output): - doError('file isnt readable: %s' % sysvals.mempath, False) + doError('file is not readable: %s' % sysvals.mempath, False) return False fp = open(sysvals.fpdtpath, 'rb') @@ -3027,15 +4131,19 @@ def getFPDT(output): while(i < len(records)): header = struct.unpack('HBB', records[i:i+4]) if(header[0] not in rectype): + i += header[1] continue if(header[1] != 16): + i += header[1] continue addr = struct.unpack('Q', records[i+8:i+16])[0] try: fp.seek(addr) first = fp.read(8) except: - doError('Bad address 0x%x in %s' % (addr, sysvals.mempath), False) + if(output): + print('Bad address 0x%x in %s' % (addr, sysvals.mempath)) + return [0, 0] rechead = struct.unpack('4sI', first) recdata = fp.read(rechead[1]-8) if(rechead[0] == 'FBPT'): @@ -3090,89 +4198,60 @@ def getFPDT(output): # print the results to the terminal # Output: # True if the test will work, False if not -def statusCheck(): +def statusCheck(probecheck=False): global sysvals status = True - if(sysvals.android): - print('Checking the android system ...') - else: - print('Checking this system (%s)...' % platform.node()) - - # check if adb is connected to a device - if(sysvals.android): - res = 'NO' - out = os.popen(sysvals.adb+' get-state').read().strip() - if(out == 'device'): - res = 'YES' - print(' is android device connected: %s' % res) - if(res != 'YES'): - print(' Please connect the device before using this tool') - return False + print('Checking this system (%s)...' % platform.node()) # check we have root access - res = 'NO (No features of this tool will work!)' - if(sysvals.android): - out = os.popen(sysvals.adb+' shell id').read().strip() - if('root' in out): - res = 'YES' - else: - if(os.environ['USER'] == 'root'): - res = 'YES' + res = sysvals.colorText('NO (No features of this tool will work!)') + if(rootCheck(False)): + res = 'YES' print(' have root access: %s' % res) if(res != 'YES'): - if(sysvals.android): - print(' Try running "adb root" to restart the daemon as root') - else: - print(' Try running this script with sudo') + print(' Try running this script with sudo') return False # check sysfs is mounted - res = 'NO (No features of this tool will work!)' - if(sysvals.android): - out = os.popen(sysvals.adb+' shell ls '+\ - sysvals.powerfile).read().strip() - if(out == sysvals.powerfile): - res = 'YES' - else: - if(os.path.exists(sysvals.powerfile)): - res = 'YES' + res = sysvals.colorText('NO (No features of this tool will work!)') + if(os.path.exists(sysvals.powerfile)): + res = 'YES' print(' is sysfs mounted: %s' % res) if(res != 'YES'): return False # check target mode is a valid mode - res = 'NO' - modes = getModes() - if(sysvals.suspendmode in modes): - res = 'YES' - else: - status = False - print(' is "%s" a valid power mode: %s' % (sysvals.suspendmode, res)) - if(res == 'NO'): - print(' valid power modes are: %s' % modes) - print(' please choose one with -m') - - # check if the tool can unlock the device - if(sysvals.android): - res = 'YES' - out1 = os.popen(sysvals.adb+\ - ' shell dumpsys power | grep mScreenOn').read().strip() - out2 = os.popen(sysvals.adb+\ - ' shell input').read().strip() - if(not out1.startswith('mScreenOn') or not out2.startswith('usage')): - res = 'NO (wake the android device up before running the test)' - print(' can I unlock the screen: %s' % res) + if sysvals.suspendmode != 'command': + res = sysvals.colorText('NO') + modes = getModes() + if(sysvals.suspendmode in modes): + res = 'YES' + else: + status = False + print(' is "%s" a valid power mode: %s' % (sysvals.suspendmode, res)) + if(res == 'NO'): + print(' valid power modes are: %s' % modes) + print(' please choose one with -m') # check if ftrace is available - res = 'NO' - ftgood = verifyFtrace() + res = sysvals.colorText('NO') + ftgood = sysvals.verifyFtrace() if(ftgood): res = 'YES' elif(sysvals.usecallgraph): status = False print(' is ftrace supported: %s' % res) + # check if kprobes are available + res = sysvals.colorText('NO') + sysvals.usekprobes = sysvals.verifyKprobes() + if(sysvals.usekprobes): + res = 'YES' + else: + sysvals.usedevsrc = False + print(' are kprobes supported: %s' % res) + # what data source are we using res = 'DMESG' if(ftgood): @@ -3180,14 +4259,8 @@ def statusCheck(): sysvals.usetraceevents = False for e in sysvals.traceevents: check = False - if(sysvals.android): - out = os.popen(sysvals.adb+' shell ls -d '+\ - sysvals.epath+e).read().strip() - if(out == sysvals.epath+e): - check = True - else: - if(os.path.exists(sysvals.epath+e)): - check = True + if(os.path.exists(sysvals.epath+e)): + check = True if(not check): sysvals.usetraceeventsonly = False if(e == 'suspend_resume' and check): @@ -3199,13 +4272,48 @@ def statusCheck(): print(' timeline data source: %s' % res) # check if rtcwake - res = 'NO' + res = sysvals.colorText('NO') if(sysvals.rtcpath != ''): res = 'YES' elif(sysvals.rtcwake): status = False print(' is rtcwake supported: %s' % res) + if not probecheck: + return status + + if (sysvals.usecallgraph and len(sysvals.debugfuncs) > 0) or len(sysvals.kprobes) > 0: + sysvals.initFtrace(True) + + # verify callgraph debugfuncs + if sysvals.usecallgraph and len(sysvals.debugfuncs) > 0: + print(' verifying these ftrace callgraph functions work:') + sysvals.setFtraceFilterFunctions(sysvals.debugfuncs) + fp = open(sysvals.tpath+'set_graph_function', 'r') + flist = fp.read().split('\n') + fp.close() + for func in sysvals.debugfuncs: + res = sysvals.colorText('NO') + if func in flist: + res = 'YES' + else: + for i in flist: + if ' [' in i and func == i.split(' ')[0]: + res = 'YES' + break + print(' %s: %s' % (func, res)) + + # verify kprobes + if len(sysvals.kprobes) > 0: + print(' verifying these kprobes work:') + for name in sorted(sysvals.kprobes): + if name in sysvals.tracefuncs: + continue + res = sysvals.colorText('NO') + if sysvals.testKprobe(sysvals.kprobes[name]): + res = 'YES' + print(' %s: %s' % (name, res)) + return status # Function: doError @@ -3226,7 +4334,7 @@ def doError(msg, help): # Arguments: # msg: the warning message to print # file: If not empty, a filename to request be sent to the owner for debug -def doWarning(msg, file): +def doWarning(msg, file=''): print('/* %s */') % msg if(file): print('/* For a fix, please send this'+\ @@ -3235,18 +4343,25 @@ def doWarning(msg, file): # Function: rootCheck # Description: # quick check to see if we have root access -def rootCheck(): - if(os.environ['USER'] != 'root'): - doError('This script must be run as root', False) +def rootCheck(fatal): + global sysvals + if(os.access(sysvals.powerfile, os.W_OK)): + return True + if fatal: + doError('This command must be run as root', False) + return False # Function: getArgInt # Description: # pull out an integer argument from the command line with checks -def getArgInt(name, args, min, max): - try: - arg = args.next() - except: - doError(name+': no argument supplied', True) +def getArgInt(name, args, min, max, main=True): + if main: + try: + arg = args.next() + except: + doError(name+': no argument supplied', True) + else: + arg = args try: val = int(arg) except: @@ -3255,6 +4370,25 @@ def getArgInt(name, args, min, max): doError(name+': value should be between %d and %d' % (min, max), True) return val +# Function: getArgFloat +# Description: +# pull out a float argument from the command line with checks +def getArgFloat(name, args, min, max, main=True): + if main: + try: + arg = args.next() + except: + doError(name+': no argument supplied', True) + else: + arg = args + try: + val = float(arg) + except: + doError(name+': non-numerical value given', True) + if(val < min or val > max): + doError(name+': value should be between %f and %f' % (min, max), True) + return val + # Function: rerunTest # Description: # generate an output from an existing set of ftrace/dmesg logs @@ -3282,15 +4416,12 @@ def rerunTest(): # Function: runTest # Description: # execute a suspend/resume, gather the logs, and generate the output -def runTest(subdir): +def runTest(subdir, testpath=''): global sysvals # prepare for the test - if(not sysvals.android): - initFtrace() - else: - initFtraceAndroid() - sysvals.initTestOutput(subdir) + sysvals.initFtrace() + sysvals.initTestOutput(subdir, testpath) vprint('Output files:\n %s' % sysvals.dmesgfile) if(sysvals.usecallgraph or @@ -3300,10 +4431,8 @@ def runTest(subdir): vprint(' %s' % sysvals.htmlfile) # execute the test - if(not sysvals.android): - executeSuspend() - else: - executeAndroidSuspend() + executeSuspend() + sysvals.cleanupFtrace() # analyze the data and create the html output print('PROCESSING DATA') @@ -3367,6 +4496,153 @@ def runSummary(subdir, output): createHTMLSummarySimple(testruns, subdir+'/summary.html') +# Function: checkArgBool +# Description: +# check if a boolean string value is true or false +def checkArgBool(value): + yes = ['1', 'true', 'yes', 'on'] + if value.lower() in yes: + return True + return False + +# Function: configFromFile +# Description: +# Configure the script via the info in a config file +def configFromFile(file): + global sysvals + Config = ConfigParser.ConfigParser() + + ignorekprobes = False + Config.read(file) + sections = Config.sections() + if 'Settings' in sections: + for opt in Config.options('Settings'): + value = Config.get('Settings', opt).lower() + if(opt.lower() == 'verbose'): + sysvals.verbose = checkArgBool(value) + elif(opt.lower() == 'addlogs'): + sysvals.addlogs = checkArgBool(value) + elif(opt.lower() == 'dev'): + sysvals.usedevsrc = checkArgBool(value) + elif(opt.lower() == 'ignorekprobes'): + ignorekprobes = checkArgBool(value) + elif(opt.lower() == 'x2'): + if checkArgBool(value): + sysvals.execcount = 2 + elif(opt.lower() == 'callgraph'): + sysvals.usecallgraph = checkArgBool(value) + elif(opt.lower() == 'callgraphfunc'): + sysvals.debugfuncs = [] + if value: + value = value.split(',') + for i in value: + sysvals.debugfuncs.append(i.strip()) + elif(opt.lower() == 'expandcg'): + sysvals.cgexp = checkArgBool(value) + elif(opt.lower() == 'srgap'): + if checkArgBool(value): + sysvals.srgap = 5 + elif(opt.lower() == 'mode'): + sysvals.suspendmode = value + elif(opt.lower() == 'command'): + sysvals.testcommand = value + elif(opt.lower() == 'x2delay'): + sysvals.x2delay = getArgInt('-x2delay', value, 0, 60000, False) + elif(opt.lower() == 'postres'): + sysvals.postresumetime = getArgInt('-postres', value, 0, 3600, False) + elif(opt.lower() == 'rtcwake'): + sysvals.rtcwake = True + sysvals.rtcwaketime = getArgInt('-rtcwake', value, 0, 3600, False) + elif(opt.lower() == 'timeprec'): + sysvals.setPrecision(getArgInt('-timeprec', value, 0, 6, False)) + elif(opt.lower() == 'mindev'): + sysvals.mindevlen = getArgFloat('-mindev', value, 0.0, 10000.0, False) + elif(opt.lower() == 'mincg'): + sysvals.mincglen = getArgFloat('-mincg', value, 0.0, 10000.0, False) + elif(opt.lower() == 'kprobecolor'): + try: + val = int(value, 16) + sysvals.kprobecolor = '#'+value + except: + sysvals.kprobecolor = value + elif(opt.lower() == 'synccolor'): + try: + val = int(value, 16) + sysvals.synccolor = '#'+value + except: + sysvals.synccolor = value + elif(opt.lower() == 'output-dir'): + args = dict() + n = datetime.now() + args['date'] = n.strftime('%y%m%d') + args['time'] = n.strftime('%H%M%S') + args['hostname'] = sysvals.hostname + sysvals.outdir = value.format(**args) + + if sysvals.suspendmode == 'command' and not sysvals.testcommand: + doError('No command supplied for mode "command"', False) + if sysvals.usedevsrc and sysvals.usecallgraph: + doError('dev and callgraph cannot both be true', False) + if sysvals.usecallgraph and sysvals.execcount > 1: + doError('-x2 is not compatible with -f', False) + + if ignorekprobes: + return + + kprobes = dict() + archkprobe = 'Kprobe_'+platform.machine() + if archkprobe in sections: + for name in Config.options(archkprobe): + kprobes[name] = Config.get(archkprobe, name) + if 'Kprobe' in sections: + for name in Config.options('Kprobe'): + kprobes[name] = Config.get('Kprobe', name) + + for name in kprobes: + function = name + format = name + color = '' + args = dict() + data = kprobes[name].split() + i = 0 + for val in data: + # bracketted strings are special formatting, read them separately + if val[0] == '[' and val[-1] == ']': + for prop in val[1:-1].split(','): + p = prop.split('=') + if p[0] == 'color': + try: + color = int(p[1], 16) + color = '#'+p[1] + except: + color = p[1] + continue + # first real arg should be the format string + if i == 0: + format = val + # all other args are actual function args + else: + d = val.split('=') + args[d[0]] = d[1] + i += 1 + if not function or not format: + doError('Invalid kprobe: %s' % name, False) + for arg in re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', format): + if arg not in args: + doError('Kprobe "%s" is missing argument "%s"' % (name, arg), False) + if name in sysvals.kprobes: + doError('Duplicate kprobe found "%s"' % (name), False) + vprint('Adding KPROBE: %s %s %s %s' % (name, function, format, args)) + sysvals.kprobes[name] = { + 'name': name, + 'func': function, + 'format': format, + 'args': args, + 'mask': re.sub('{(?P<n>[a-z,A-Z,0-9]*)}', '.*', format) + } + if color: + sysvals.kprobes[name]['color'] = color + # Function: printHelp # Description: # print out the help text @@ -3375,7 +4651,7 @@ def printHelp(): modes = getModes() print('') - print('AnalyzeSuspend v%.1f' % sysvals.version) + print('AnalyzeSuspend v%s' % sysvals.version) print('Usage: sudo analyze_suspend.py <options>') print('') print('Description:') @@ -3396,27 +4672,38 @@ def printHelp(): print(' [general]') print(' -h Print this help text') print(' -v Print the current tool version') + print(' -config file Pull arguments and config options from a file') print(' -verbose Print extra information during execution and analysis') print(' -status Test to see if the system is enabled to run this tool') print(' -modes List available suspend modes') print(' -m mode Mode to initiate for suspend %s (default: %s)') % (modes, sysvals.suspendmode) - print(' -rtcwake t Use rtcwake to autoresume after <t> seconds (default: disabled)') + print(' -o subdir Override the output subdirectory') print(' [advanced]') + print(' -rtcwake t Use rtcwake to autoresume after <t> seconds (default: disabled)') + print(' -addlogs Add the dmesg and ftrace logs to the html output') + print(' -multi n d Execute <n> consecutive tests at <d> seconds intervals. The outputs will') + print(' be created in a new subdirectory with a summary page.') + print(' -srgap Add a visible gap in the timeline between sus/res (default: disabled)') + print(' -cmd {s} Instead of suspend/resume, run a command, e.g. "sync -d"') + print(' -mindev ms Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)') + print(' -mincg ms Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)') + print(' -timeprec N Number of significant digits in timestamps (0:S, [3:ms], 6:us)') + print(' [debug]') print(' -f Use ftrace to create device callgraphs (default: disabled)') - print(' -filter "d1 d2 ..." Filter out all but this list of dev names') + print(' -expandcg pre-expand the callgraph data in the html output (default: disabled)') + print(' -flist Print the list of functions currently being captured in ftrace') + print(' -flistall Print all functions capable of being captured in ftrace') + print(' -fadd file Add functions to be graphed in the timeline from a list in a text file') + print(' -filter "d1 d2 ..." Filter out all but this list of device names') + print(' -dev Display common low level functions in the timeline') + print(' [post-resume task analysis]') print(' -x2 Run two suspend/resumes back to back (default: disabled)') print(' -x2delay t Minimum millisecond delay <t> between the two test runs (default: 0 ms)') print(' -postres t Time after resume completion to wait for post-resume events (default: 0 S)') - print(' -multi n d Execute <n> consecutive tests at <d> seconds intervals. The outputs will') - print(' be created in a new subdirectory with a summary page.') print(' [utilities]') print(' -fpdt Print out the contents of the ACPI Firmware Performance Data Table') print(' -usbtopo Print out the current USB topology with power info') print(' -usbauto Enable autosuspend for all connected USB devices') - print(' [android testing]') - print(' -adb binary Use the given adb binary to run the test on an android device.') - print(' The device should already be connected and with root access.') - print(' Commands will be executed on the device using "adb shell"') print(' [re-analyze data from previous runs]') print(' -ftrace ftracefile Create HTML output using ftrace input') print(' -dmesg dmesgfile Create HTML output using dmesg (not needed for kernel >= 3.15)') @@ -3430,6 +4717,7 @@ if __name__ == '__main__': cmd = '' cmdarg = '' multitest = {'run': False, 'count': 0, 'delay': 0} + simplecmds = ['-modes', '-fpdt', '-flist', '-flistall', '-usbtopo', '-usbauto', '-status'] # loop through the command line arguments args = iter(sys.argv[1:]) for arg in args: @@ -3438,58 +4726,85 @@ if __name__ == '__main__': val = args.next() except: doError('No mode supplied', True) + if val == 'command' and not sysvals.testcommand: + doError('No command supplied for mode "command"', True) sysvals.suspendmode = val - elif(arg == '-adb'): - try: - val = args.next() - except: - doError('No adb binary supplied', True) - if(not os.path.exists(val)): - doError('file doesnt exist: %s' % val, False) - if(not os.access(val, os.X_OK)): - doError('file isnt executable: %s' % val, False) - try: - check = os.popen(val+' version').read().strip() - except: - doError('adb version failed to execute', False) - if(not re.match('Android Debug Bridge .*', check)): - doError('adb version failed to execute', False) - sysvals.adb = val - sysvals.android = True + elif(arg in simplecmds): + cmd = arg[1:] + elif(arg == '-h'): + printHelp() + sys.exit() + elif(arg == '-v'): + print("Version %s" % sysvals.version) + sys.exit() elif(arg == '-x2'): - if(sysvals.postresumetime > 0): - doError('-x2 is not compatible with -postres', False) sysvals.execcount = 2 + if(sysvals.usecallgraph): + doError('-x2 is not compatible with -f', False) elif(arg == '-x2delay'): sysvals.x2delay = getArgInt('-x2delay', args, 0, 60000) elif(arg == '-postres'): - if(sysvals.execcount != 1): - doError('-x2 is not compatible with -postres', False) sysvals.postresumetime = getArgInt('-postres', args, 0, 3600) elif(arg == '-f'): sysvals.usecallgraph = True - elif(arg == '-modes'): - cmd = 'modes' - elif(arg == '-fpdt'): - cmd = 'fpdt' - elif(arg == '-usbtopo'): - cmd = 'usbtopo' - elif(arg == '-usbauto'): - cmd = 'usbauto' - elif(arg == '-status'): - cmd = 'status' + if(sysvals.execcount > 1): + doError('-x2 is not compatible with -f', False) + if(sysvals.usedevsrc): + doError('-dev is not compatible with -f', False) + elif(arg == '-addlogs'): + sysvals.addlogs = True elif(arg == '-verbose'): sysvals.verbose = True - elif(arg == '-v'): - print("Version %.1f" % sysvals.version) - sys.exit() + elif(arg == '-dev'): + sysvals.usedevsrc = True + if(sysvals.usecallgraph): + doError('-dev is not compatible with -f', False) elif(arg == '-rtcwake'): sysvals.rtcwake = True sysvals.rtcwaketime = getArgInt('-rtcwake', args, 0, 3600) + elif(arg == '-timeprec'): + sysvals.setPrecision(getArgInt('-timeprec', args, 0, 6)) + elif(arg == '-mindev'): + sysvals.mindevlen = getArgFloat('-mindev', args, 0.0, 10000.0) + elif(arg == '-mincg'): + sysvals.mincglen = getArgFloat('-mincg', args, 0.0, 10000.0) + elif(arg == '-cmd'): + try: + val = args.next() + except: + doError('No command string supplied', True) + sysvals.testcommand = val + sysvals.suspendmode = 'command' + elif(arg == '-expandcg'): + sysvals.cgexp = True + elif(arg == '-srgap'): + sysvals.srgap = 5 elif(arg == '-multi'): multitest['run'] = True multitest['count'] = getArgInt('-multi n (exec count)', args, 2, 1000000) multitest['delay'] = getArgInt('-multi d (delay between tests)', args, 0, 3600) + elif(arg == '-o'): + try: + val = args.next() + except: + doError('No subdirectory name supplied', True) + sysvals.outdir = val + elif(arg == '-config'): + try: + val = args.next() + except: + doError('No text file supplied', True) + if(os.path.exists(val) == False): + doError('%s does not exist' % val, False) + configFromFile(val) + elif(arg == '-fadd'): + try: + val = args.next() + except: + doError('No text file supplied', True) + if(os.path.exists(val) == False): + doError('%s does not exist' % val, False) + sysvals.addFtraceFilterFunctions(val) elif(arg == '-dmesg'): try: val = args.next() @@ -3498,17 +4813,16 @@ if __name__ == '__main__': sysvals.notestrun = True sysvals.dmesgfile = val if(os.path.exists(sysvals.dmesgfile) == False): - doError('%s doesnt exist' % sysvals.dmesgfile, False) + doError('%s does not exist' % sysvals.dmesgfile, False) elif(arg == '-ftrace'): try: val = args.next() except: doError('No ftrace file supplied', True) sysvals.notestrun = True - sysvals.usecallgraph = True sysvals.ftracefile = val if(os.path.exists(sysvals.ftracefile) == False): - doError('%s doesnt exist' % sysvals.ftracefile, False) + doError('%s does not exist' % sysvals.ftracefile, False) elif(arg == '-summary'): try: val = args.next() @@ -3518,35 +4832,35 @@ if __name__ == '__main__': cmdarg = val sysvals.notestrun = True if(os.path.isdir(val) == False): - doError('%s isnt accesible' % val, False) + doError('%s is not accesible' % val, False) elif(arg == '-filter'): try: val = args.next() except: doError('No devnames supplied', True) sysvals.setDeviceFilter(val) - elif(arg == '-h'): - printHelp() - sys.exit() else: doError('Invalid argument: '+arg, True) + # callgraph size cannot exceed device size + if sysvals.mincglen < sysvals.mindevlen: + sysvals.mincglen = sysvals.mindevlen + # just run a utility command and exit if(cmd != ''): if(cmd == 'status'): - statusCheck() + statusCheck(True) elif(cmd == 'fpdt'): - if(sysvals.android): - doError('cannot read FPDT on android device', False) getFPDT(True) elif(cmd == 'usbtopo'): - if(sysvals.android): - doError('cannot read USB topology '+\ - 'on an android device', False) - detectUSB(True) + detectUSB() elif(cmd == 'modes'): modes = getModes() print modes + elif(cmd == 'flist'): + sysvals.getFtraceFilterFunctions(True) + elif(cmd == 'flistall'): + sysvals.getFtraceFilterFunctions(False) elif(cmd == 'usbauto'): setUSBDevicesAuto() elif(cmd == 'summary'): @@ -3554,15 +4868,6 @@ if __name__ == '__main__': runSummary(cmdarg, True) sys.exit() - # run test on android device - if(sysvals.android): - if(sysvals.usecallgraph): - doError('ftrace (-f) is not yet supported '+\ - 'in the android kernel', False) - if(sysvals.notestrun): - doError('cannot analyze test files on the '+\ - 'android device', False) - # if instructed, re-analyze existing data files if(sysvals.notestrun): rerunTest() @@ -3574,18 +4879,20 @@ if __name__ == '__main__': sys.exit() if multitest['run']: - # run multiple tests in a separte subdirectory + # run multiple tests in a separate subdirectory s = 'x%d' % multitest['count'] - subdir = datetime.now().strftime('suspend-'+s+'-%m%d%y-%H%M%S') - os.mkdir(subdir) + if not sysvals.outdir: + sysvals.outdir = datetime.now().strftime('suspend-'+s+'-%m%d%y-%H%M%S') + if not os.path.isdir(sysvals.outdir): + os.mkdir(sysvals.outdir) for i in range(multitest['count']): if(i != 0): print('Waiting %d seconds...' % (multitest['delay'])) time.sleep(multitest['delay']) print('TEST (%d/%d) START' % (i+1, multitest['count'])) - runTest(subdir) + runTest(sysvals.outdir) print('TEST (%d/%d) COMPLETE' % (i+1, multitest['count'])) - runSummary(subdir, False) + runSummary(sysvals.outdir, False) else: # run the test in the current directory - runTest(".") + runTest('.', sysvals.outdir) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index e367b1a85d70..8561e7ddca59 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,7 +1,7 @@ CC = $(CROSS_COMPILE)gcc BUILD_OUTPUT := $(CURDIR) -PREFIX := /usr -DESTDIR := +PREFIX ?= /usr +DESTDIR ?= ifeq ("$(origin O)", "command line") BUILD_OUTPUT := $(O) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 89a55d5e32f3..492e84fbebfa 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -123,7 +123,7 @@ cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300 35 * 100 = 3500 MHz TSC frequency cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled) cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0) -cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 +cpu0: MSR_TURBO_RATIO_LIMIT: 0x25262727 37 * 100 = 3700 MHz max turbo 4 active cores 38 * 100 = 3800 MHz max turbo 3 active cores 39 * 100 = 3900 MHz max turbo 2 active cores diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index acbf7ff2ee6e..3e199b508a96 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1480,7 +1480,7 @@ dump_knl_turbo_ratio_limits(void) unsigned int cores[buckets_no]; unsigned int ratio[buckets_no]; - get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); |