summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-05-19 11:38:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-05-19 11:38:15 -0700
commit8dde191aabba42e9c16c8d9c853a72a062db27ee (patch)
treef6925e9996b8115ee7fab3a62d604b606af5a374
parentfe0d43f23110ec80aea1f94eeb1e6ddab9ef453f (diff)
parent49217ea147df7647cb89161b805c797487783fc0 (diff)
Merge tag 'sched-urgent-2024-05-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: - Fix a sched_balance_newidle setting bug - Fix bug in the setting of /sys/fs/cgroup/test/cpu.max.burst - Fix variable-shadowing build warning - Extend sched-domains debug output - Fix documentation - Fix comments * tag 'sched-urgent-2024-05-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/core: Fix incorrect initialization of the 'burst' parameter in cpu_max_write() sched/fair: Remove stale FREQUENCY_UTIL comment sched/fair: Fix initial util_avg calculation docs: cgroup-v1: Clarify that domain levels are system-specific sched/debug: Dump domains' level sched/fair: Allow disabling sched_balance_newidle with sched_relax_domain_level arch/topology: Fix variable naming to avoid shadowing
-rw-r--r--Documentation/admin-guide/cgroup-v1/cpusets.rst7
-rw-r--r--drivers/base/arch_topology.c8
-rw-r--r--kernel/cgroup/cpuset.c2
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/sched/debug.c1
-rw-r--r--kernel/sched/fair.c9
-rw-r--r--kernel/sched/topology.c2
7 files changed, 19 insertions, 12 deletions
diff --git a/Documentation/admin-guide/cgroup-v1/cpusets.rst b/Documentation/admin-guide/cgroup-v1/cpusets.rst
index 7d3415eea05d..f401af5e2f09 100644
--- a/Documentation/admin-guide/cgroup-v1/cpusets.rst
+++ b/Documentation/admin-guide/cgroup-v1/cpusets.rst
@@ -568,7 +568,7 @@ on the next tick. For some applications in special situation, waiting
The 'cpuset.sched_relax_domain_level' file allows you to request changing
this searching range as you like. This file takes int value which
-indicates size of searching range in levels ideally as follows,
+indicates size of searching range in levels approximately as follows,
otherwise initial value -1 that indicates the cpuset has no request.
====== ===========================================================
@@ -581,6 +581,11 @@ otherwise initial value -1 that indicates the cpuset has no request.
5 search system wide [on NUMA system]
====== ===========================================================
+Not all levels can be present and values can change depending on the
+system architecture and kernel configuration. Check
+/sys/kernel/debug/sched/domains/cpu*/domain*/ for system-specific
+details.
+
The system default is architecture dependent. The system default
can be changed using the relax_domain_level= boot parameter.
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 0248912ff687..c66d070207a0 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -179,7 +179,7 @@ DEFINE_PER_CPU(unsigned long, hw_pressure);
void topology_update_hw_pressure(const struct cpumask *cpus,
unsigned long capped_freq)
{
- unsigned long max_capacity, capacity, hw_pressure;
+ unsigned long max_capacity, capacity, pressure;
u32 max_freq;
int cpu;
@@ -196,12 +196,12 @@ void topology_update_hw_pressure(const struct cpumask *cpus,
else
capacity = mult_frac(max_capacity, capped_freq, max_freq);
- hw_pressure = max_capacity - capacity;
+ pressure = max_capacity - capacity;
- trace_hw_pressure_update(cpu, hw_pressure);
+ trace_hw_pressure_update(cpu, pressure);
for_each_cpu(cpu, cpus)
- WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure);
+ WRITE_ONCE(per_cpu(hw_pressure, cpu), pressure);
}
EXPORT_SYMBOL_GPL(topology_update_hw_pressure);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index a10e4bd0c0c1..c12b9fdb22a4 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2941,7 +2941,7 @@ bool current_cpuset_is_being_rebound(void)
static int update_relax_domain_level(struct cpuset *cs, s64 val)
{
#ifdef CONFIG_SMP
- if (val < -1 || val >= sched_domain_level_max)
+ if (val < -1 || val > sched_domain_level_max + 1)
return -EINVAL;
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 373eaeaf63b8..bcf2c4cc0522 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11401,7 +11401,7 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
{
struct task_group *tg = css_tg(of_css(of));
u64 period = tg_get_cfs_period(tg);
- u64 burst = tg_get_cfs_burst(tg);
+ u64 burst = tg->cfs_bandwidth.burst;
u64 quota;
int ret;
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 8d5d98a5834d..c1eb9a1afd13 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -425,6 +425,7 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent)
debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops);
+ debugfs_create_u32("level", 0444, parent, (u32 *)&sd->level);
}
void update_sched_domain_debugfs(void)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4214df32ba45..8a5b1ae0aa55 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1030,7 +1030,8 @@ void init_entity_runnable_average(struct sched_entity *se)
* With new tasks being created, their initial util_avgs are extrapolated
* based on the cfs_rq's current util_avg:
*
- * util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * se.load.weight
+ * util_avg = cfs_rq->avg.util_avg / (cfs_rq->avg.load_avg + 1)
+ * * se_weight(se)
*
* However, in many cases, the above util_avg does not give a desired
* value. Moreover, the sum of the util_avgs may be divergent, such
@@ -1077,7 +1078,7 @@ void post_init_entity_util_avg(struct task_struct *p)
if (cap > 0) {
if (cfs_rq->avg.util_avg != 0) {
- sa->util_avg = cfs_rq->avg.util_avg * se->load.weight;
+ sa->util_avg = cfs_rq->avg.util_avg * se_weight(se);
sa->util_avg /= (cfs_rq->avg.load_avg + 1);
if (sa->util_avg > cap)
@@ -7898,8 +7899,8 @@ eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
* Performance domain frequency: utilization clamping
* must be considered since it affects the selection
* of the performance domain frequency.
- * NOTE: in case RT tasks are running, by default the
- * FREQUENCY_UTIL's utilization can be max OPP.
+ * NOTE: in case RT tasks are running, by default the min
+ * utilization can be max OPP.
*/
eff_util = effective_cpu_util(cpu, util, &min, &max);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 683559831656..329c82faca9b 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1474,7 +1474,7 @@ static void set_domain_attribute(struct sched_domain *sd,
} else
request = attr->relax_domain_level;
- if (sd->level > request) {
+ if (sd->level >= request) {
/* Turn off idle balance on this domain: */
sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
}