diff options
Diffstat (limited to 'kernel/sched/topology.c')
-rw-r--r-- | kernel/sched/topology.c | 231 |
1 files changed, 223 insertions, 8 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 8d7f15ba5916..3f35ba1d8fde 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -201,6 +201,199 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) return 1; } +DEFINE_STATIC_KEY_FALSE(sched_energy_present); +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) +DEFINE_MUTEX(sched_energy_mutex); +bool sched_energy_update; + +static void free_pd(struct perf_domain *pd) +{ + struct perf_domain *tmp; + + while (pd) { + tmp = pd->next; + kfree(pd); + pd = tmp; + } +} + +static struct perf_domain *find_pd(struct perf_domain *pd, int cpu) +{ + while (pd) { + if (cpumask_test_cpu(cpu, perf_domain_span(pd))) + return pd; + pd = pd->next; + } + + return NULL; +} + +static struct perf_domain *pd_init(int cpu) +{ + struct em_perf_domain *obj = em_cpu_get(cpu); + struct perf_domain *pd; + + if (!obj) { + if (sched_debug()) + pr_info("%s: no EM found for CPU%d\n", __func__, cpu); + return NULL; + } + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return NULL; + pd->em_pd = obj; + + return pd; +} + +static void perf_domain_debug(const struct cpumask *cpu_map, + struct perf_domain *pd) +{ + if (!sched_debug() || !pd) + return; + + printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map)); + + while (pd) { + printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_cstate=%d }", + cpumask_first(perf_domain_span(pd)), + cpumask_pr_args(perf_domain_span(pd)), + em_pd_nr_cap_states(pd->em_pd)); + pd = pd->next; + } + + printk(KERN_CONT "\n"); +} + +static void destroy_perf_domain_rcu(struct rcu_head *rp) +{ + struct perf_domain *pd; + + pd = container_of(rp, struct perf_domain, rcu); + free_pd(pd); +} + +static void sched_energy_set(bool has_eas) +{ + if (!has_eas && static_branch_unlikely(&sched_energy_present)) { + if (sched_debug()) + pr_info("%s: stopping EAS\n", __func__); + static_branch_disable_cpuslocked(&sched_energy_present); + } else if (has_eas && !static_branch_unlikely(&sched_energy_present)) { + if (sched_debug()) + pr_info("%s: starting EAS\n", __func__); + static_branch_enable_cpuslocked(&sched_energy_present); + } +} + +/* + * EAS can be used on a root domain if it meets all the following conditions: + * 1. an Energy Model (EM) is available; + * 2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy. + * 3. the EM complexity is low enough to keep scheduling overheads low; + * 4. schedutil is driving the frequency of all CPUs of the rd; + * + * The complexity of the Energy Model is defined as: + * + * C = nr_pd * (nr_cpus + nr_cs) + * + * with parameters defined as: + * - nr_pd: the number of performance domains + * - nr_cpus: the number of CPUs + * - nr_cs: the sum of the number of capacity states of all performance + * domains (for example, on a system with 2 performance domains, + * with 10 capacity states each, nr_cs = 2 * 10 = 20). + * + * It is generally not a good idea to use such a model in the wake-up path on + * very complex platforms because of the associated scheduling overheads. The + * arbitrary constraint below prevents that. It makes EAS usable up to 16 CPUs + * with per-CPU DVFS and less than 8 capacity states each, for example. + */ +#define EM_MAX_COMPLEXITY 2048 + +extern struct cpufreq_governor schedutil_gov; +static bool build_perf_domains(const struct cpumask *cpu_map) +{ + int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map); + struct perf_domain *pd = NULL, *tmp; + int cpu = cpumask_first(cpu_map); + struct root_domain *rd = cpu_rq(cpu)->rd; + struct cpufreq_policy *policy; + struct cpufreq_governor *gov; + + /* EAS is enabled for asymmetric CPU capacity topologies. */ + if (!per_cpu(sd_asym_cpucapacity, cpu)) { + if (sched_debug()) { + pr_info("rd %*pbl: CPUs do not have asymmetric capacities\n", + cpumask_pr_args(cpu_map)); + } + goto free; + } + + for_each_cpu(i, cpu_map) { + /* Skip already covered CPUs. */ + if (find_pd(pd, i)) + continue; + + /* Do not attempt EAS if schedutil is not being used. */ + policy = cpufreq_cpu_get(i); + if (!policy) + goto free; + gov = policy->governor; + cpufreq_cpu_put(policy); + if (gov != &schedutil_gov) { + if (rd->pd) + pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n", + cpumask_pr_args(cpu_map)); + goto free; + } + + /* Create the new pd and add it to the local list. */ + tmp = pd_init(i); + if (!tmp) + goto free; + tmp->next = pd; + pd = tmp; + + /* + * Count performance domains and capacity states for the + * complexity check. + */ + nr_pd++; + nr_cs += em_pd_nr_cap_states(pd->em_pd); + } + + /* Bail out if the Energy Model complexity is too high. */ + if (nr_pd * (nr_cs + nr_cpus) > EM_MAX_COMPLEXITY) { + WARN(1, "rd %*pbl: Failed to start EAS, EM complexity is too high\n", + cpumask_pr_args(cpu_map)); + goto free; + } + + perf_domain_debug(cpu_map, pd); + + /* Attach the new list of performance domains to the root domain. */ + tmp = rd->pd; + rcu_assign_pointer(rd->pd, pd); + if (tmp) + call_rcu(&tmp->rcu, destroy_perf_domain_rcu); + + return !!pd; + +free: + free_pd(pd); + tmp = rd->pd; + rcu_assign_pointer(rd->pd, NULL); + if (tmp) + call_rcu(&tmp->rcu, destroy_perf_domain_rcu); + + return false; +} +#else +static void free_pd(struct perf_domain *pd) { } +#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/ + static void free_rootdomain(struct rcu_head *rcu) { struct root_domain *rd = container_of(rcu, struct root_domain, rcu); @@ -211,6 +404,7 @@ static void free_rootdomain(struct rcu_head *rcu) free_cpumask_var(rd->rto_mask); free_cpumask_var(rd->online); free_cpumask_var(rd->span); + free_pd(rd->pd); kfree(rd); } @@ -397,7 +591,8 @@ DEFINE_PER_CPU(int, sd_llc_size); DEFINE_PER_CPU(int, sd_llc_id); DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared); DEFINE_PER_CPU(struct sched_domain *, sd_numa); -DEFINE_PER_CPU(struct sched_domain *, sd_asym); +DEFINE_PER_CPU(struct sched_domain *, sd_asym_packing); +DEFINE_PER_CPU(struct sched_domain *, sd_asym_cpucapacity); DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity); static void update_top_cache_domain(int cpu) @@ -423,7 +618,10 @@ static void update_top_cache_domain(int cpu) rcu_assign_pointer(per_cpu(sd_numa, cpu), sd); sd = highest_flag_domain(cpu, SD_ASYM_PACKING); - rcu_assign_pointer(per_cpu(sd_asym, cpu), sd); + rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd); + + sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY); + rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd); } /* @@ -1133,7 +1331,6 @@ sd_init(struct sched_domain_topology_level *tl, .last_balance = jiffies, .balance_interval = sd_weight, - .smt_gain = 0, .max_newidle_lb_cost = 0, .next_decay_max_lb_cost = jiffies, .child = child, @@ -1164,7 +1361,6 @@ sd_init(struct sched_domain_topology_level *tl, if (sd->flags & SD_SHARE_CPUCAPACITY) { sd->imbalance_pct = 110; - sd->smt_gain = 1178; /* ~15% */ } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { sd->imbalance_pct = 117; @@ -1934,6 +2130,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { + bool __maybe_unused has_eas = false; int i, j, n; int new_topology; @@ -1961,8 +2158,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], /* Destroy deleted domains: */ for (i = 0; i < ndoms_cur; i++) { for (j = 0; j < n && !new_topology; j++) { - if (cpumask_equal(doms_cur[i], doms_new[j]) - && dattrs_equal(dattr_cur, i, dattr_new, j)) + if (cpumask_equal(doms_cur[i], doms_new[j]) && + dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; } /* No match - a current sched domain not in new doms_new[] */ @@ -1982,8 +2179,8 @@ match1: /* Build new domains: */ for (i = 0; i < ndoms_new; i++) { for (j = 0; j < n && !new_topology; j++) { - if (cpumask_equal(doms_new[i], doms_cur[j]) - && dattrs_equal(dattr_new, i, dattr_cur, j)) + if (cpumask_equal(doms_new[i], doms_cur[j]) && + dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; } /* No match - add a new doms_new */ @@ -1992,6 +2189,24 @@ match2: ; } +#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) + /* Build perf. domains: */ + for (i = 0; i < ndoms_new; i++) { + for (j = 0; j < n && !sched_energy_update; j++) { + if (cpumask_equal(doms_new[i], doms_cur[j]) && + cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) { + has_eas = true; + goto match3; + } + } + /* No match - add perf. domains for a new rd */ + has_eas |= build_perf_domains(doms_new[i]); +match3: + ; + } + sched_energy_set(has_eas); +#endif + /* Remember the new sched domains: */ if (doms_cur != &fallback_doms) free_sched_domains(doms_cur, ndoms_cur); |