From ae77930060338226a4377d3b93580c43b5ec82ae Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 14:19:18 +0100 Subject: cpuidle: menu: Use shifts when calculating averages where possible We use do_div even though the divisor will usually be a power-of-two unless there are unusual outliers. Use shifts where possible Signed-off-by: Mel Gorman Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index c3732fa74f82..c36e1ea7ef08 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -31,7 +31,8 @@ * The default values do not overflow. */ #define BUCKETS 12 -#define INTERVALS 8 +#define INTERVAL_SHIFT 3 +#define INTERVALS (1UL << INTERVAL_SHIFT) #define RESOLUTION 1024 #define DECAY 8 #define MAX_INTERESTING 50000 @@ -227,7 +228,10 @@ again: max = value; } } - do_div(avg, divisor); + if (divisor == INTERVALS) + avg >>= INTERVAL_SHIFT; + else + do_div(avg, divisor); /* Then try to determine standard deviation */ stddev = 0; @@ -238,7 +242,11 @@ again: stddev += diff * diff; } } - do_div(stddev, divisor); + if (divisor == INTERVALS) + stddev >>= INTERVAL_SHIFT; + else + do_div(stddev, divisor); + /* * The typical interval is obtained when standard deviation is small * or standard deviation is small compared to the average interval. -- cgit v1.2.3-58-ga151 From 107d4f4601a1408d04a5b54ffba507c92c235f58 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 14:19:19 +0100 Subject: cpuidle: menu: Use ktime_to_us instead of reinventing the wheel The ktime_to_us implementation is slightly better than the one implemented in menu.c. Use it Signed-off-by: Mel Gorman Acked-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index c36e1ea7ef08..ba6df6044fff 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -296,7 +296,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int i; unsigned int interactivity_req; - struct timespec t; if (data->needs_update) { menu_update(drv, dev); @@ -310,9 +309,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) return 0; /* determine the expected residency time, round up */ - t = ktime_to_timespec(tick_nohz_get_sleep_length()); - data->next_timer_us = - t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC; + data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length()); data->bucket = which_bucket(data->next_timer_us); -- cgit v1.2.3-58-ga151 From 64b4ca5cb6e1a9f577588db5765dc996ddf595e1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 14:19:20 +0100 Subject: cpuidle: menu: Call nr_iowait_cpu less times menu_select() via inline functions calls nr_iowait_cpu() twice as much as necessary. Signed-off-by: Mel Gorman Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index ba6df6044fff..f55d8260ec43 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -142,7 +142,7 @@ static int get_loadavg(void) return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10; } -static inline int which_bucket(unsigned int duration) +static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters) { int bucket = 0; @@ -152,7 +152,7 @@ static inline int which_bucket(unsigned int duration) * This allows us to calculate * E(duration)|iowait */ - if (nr_iowait_cpu(smp_processor_id())) + if (nr_iowaiters) bucket = BUCKETS/2; if (duration < 10) @@ -175,7 +175,7 @@ static inline int which_bucket(unsigned int duration) * to be, the higher this multiplier, and thus the higher * the barrier to go to an expensive C state. */ -static inline int performance_multiplier(void) +static inline int performance_multiplier(unsigned long nr_iowaiters) { int mult = 1; @@ -184,7 +184,7 @@ static inline int performance_multiplier(void) mult += 2 * get_loadavg(); /* for IO wait tasks (per cpu!) we add 5x each */ - mult += 10 * nr_iowait_cpu(smp_processor_id()); + mult += 10 * nr_iowaiters; return mult; } @@ -296,6 +296,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int i; unsigned int interactivity_req; + unsigned long nr_iowaiters; if (data->needs_update) { menu_update(drv, dev); @@ -311,8 +312,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) /* determine the expected residency time, round up */ data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length()); - - data->bucket = which_bucket(data->next_timer_us); + nr_iowaiters = nr_iowait_cpu(smp_processor_id()); + data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); /* * Force the result of multiplication to be 64 bits even if both @@ -330,7 +331,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) * duration / latency ratio. Adjust the latency limit if * necessary. */ - interactivity_req = data->predicted_us / performance_multiplier(); + interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters); if (latency_req > interactivity_req) latency_req = interactivity_req; -- cgit v1.2.3-58-ga151 From 372ba8cb46b271a7662b92cbefedee56725f6bd0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 6 Aug 2014 14:19:21 +0100 Subject: cpuidle: menu: Lookup CPU runqueues less The menu governer makes separate lookups of the CPU runqueue to get load and number of IO waiters but it can be done with a single lookup. Signed-off-by: Mel Gorman Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 17 +++++++---------- include/linux/sched.h | 3 +-- kernel/sched/core.c | 7 +++++++ kernel/sched/proc.c | 7 ------- 4 files changed, 15 insertions(+), 19 deletions(-) (limited to 'drivers/cpuidle') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index f55d8260ec43..27702742b319 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -134,12 +134,9 @@ struct menu_device { #define LOAD_INT(x) ((x) >> FSHIFT) #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) -static int get_loadavg(void) +static inline int get_loadavg(unsigned long load) { - unsigned long this = this_cpu_load(); - - - return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10; + return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10; } static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters) @@ -175,13 +172,13 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters * to be, the higher this multiplier, and thus the higher * the barrier to go to an expensive C state. */ -static inline int performance_multiplier(unsigned long nr_iowaiters) +static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load) { int mult = 1; /* for higher loadavg, we are more reluctant */ - mult += 2 * get_loadavg(); + mult += 2 * get_loadavg(load); /* for IO wait tasks (per cpu!) we add 5x each */ mult += 10 * nr_iowaiters; @@ -296,7 +293,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY); int i; unsigned int interactivity_req; - unsigned long nr_iowaiters; + unsigned long nr_iowaiters, cpu_load; if (data->needs_update) { menu_update(drv, dev); @@ -312,7 +309,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) /* determine the expected residency time, round up */ data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length()); - nr_iowaiters = nr_iowait_cpu(smp_processor_id()); + get_iowait_load(&nr_iowaiters, &cpu_load); data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); /* @@ -331,7 +328,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) * duration / latency ratio. Adjust the latency limit if * necessary. */ - interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters); + interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load); if (latency_req > interactivity_req) latency_req = interactivity_req; diff --git a/include/linux/sched.h b/include/linux/sched.h index 306f4f0c987a..641bd954bb5d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -168,8 +168,7 @@ extern int nr_processes(void); extern unsigned long nr_running(void); extern unsigned long nr_iowait(void); extern unsigned long nr_iowait_cpu(int cpu); -extern unsigned long this_cpu_load(void); - +extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bdf01b494fe..863ef1d19563 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2385,6 +2385,13 @@ unsigned long nr_iowait_cpu(int cpu) return atomic_read(&this->nr_iowait); } +void get_iowait_load(unsigned long *nr_waiters, unsigned long *load) +{ + struct rq *this = this_rq(); + *nr_waiters = atomic_read(&this->nr_iowait); + *load = this->cpu_load[0]; +} + #ifdef CONFIG_SMP /* diff --git a/kernel/sched/proc.c b/kernel/sched/proc.c index 16f5a30f9c88..8ecd552fe4f2 100644 --- a/kernel/sched/proc.c +++ b/kernel/sched/proc.c @@ -8,13 +8,6 @@ #include "sched.h" -unsigned long this_cpu_load(void) -{ - struct rq *this = this_rq(); - return this->cpu_load[0]; -} - - /* * Global load-average calculations * -- cgit v1.2.3-58-ga151