From 3dcac251b066b60dba6d44c97d76faeb00bf19c5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Aug 2024 09:22:40 +0000 Subject: sched/core: Introduce SM_IDLE and an idle re-entry fast-path in __schedule() Since commit b2a02fc43a1f ("smp: Optimize send_call_function_single_ipi()") an idle CPU in TIF_POLLING_NRFLAG mode can be pulled out of idle by setting TIF_NEED_RESCHED flag to service an IPI without actually sending an interrupt. Even in cases where the IPI handler does not queue a task on the idle CPU, do_idle() will call __schedule() since need_resched() returns true in these cases. Introduce and use SM_IDLE to identify call to __schedule() from schedule_idle() and shorten the idle re-entry time by skipping pick_next_task() when nr_running is 0 and the previous task is the idle task. With the SM_IDLE fast-path, the time taken to complete a fixed set of IPIs using ipistorm improves noticeably. Following are the numbers from a dual socket Intel Ice Lake Xeon server (2 x 32C/64T) and 3rd Generation AMD EPYC system (2 x 64C/128T) (boost on, C2 disabled) running ipistorm between CPU8 and CPU16: cmdline: insmod ipistorm.ko numipi=100000 single=1 offset=8 cpulist=8 wait=1 ================================================================== Test : ipistorm (modified) Units : Normalized runtime Interpretation: Lower is better Statistic : AMean ======================= Intel Ice Lake Xeon ====================== kernel: time [pct imp] tip:sched/core 1.00 [baseline] tip:sched/core + SM_IDLE 0.80 [20.51%] ==================== 3rd Generation AMD EPYC ===================== kernel: time [pct imp] tip:sched/core 1.00 [baseline] tip:sched/core + SM_IDLE 0.90 [10.17%] ================================================================== [ kprateek: Commit message, SM_RTLOCK_WAIT fix ] Signed-off-by: Peter Zijlstra (Intel) Not-yet-signed-off-by: Peter Zijlstra Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Acked-by: Vincent Guittot Link: https://lore.kernel.org/r/20240809092240.6921-1-kprateek.nayak@amd.com --- kernel/sched/core.c | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ffcd637dc8e4..2922facebae7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6410,19 +6410,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * Constants for the sched_mode argument of __schedule(). * * The mode argument allows RT enabled kernels to differentiate a - * preemption from blocking on an 'sleeping' spin/rwlock. Note that - * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to - * optimize the AND operation out and just check for zero. + * preemption from blocking on an 'sleeping' spin/rwlock. */ -#define SM_NONE 0x0 -#define SM_PREEMPT 0x1 -#define SM_RTLOCK_WAIT 0x2 - -#ifndef CONFIG_PREEMPT_RT -# define SM_MASK_PREEMPT (~0U) -#else -# define SM_MASK_PREEMPT SM_PREEMPT -#endif +#define SM_IDLE (-1) +#define SM_NONE 0 +#define SM_PREEMPT 1 +#define SM_RTLOCK_WAIT 2 /* * __schedule() is the main scheduler function. @@ -6463,9 +6456,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * * WARNING: must be called with preemption disabled! */ -static void __sched notrace __schedule(unsigned int sched_mode) +static void __sched notrace __schedule(int sched_mode) { struct task_struct *prev, *next; + /* + * On PREEMPT_RT kernel, SM_RTLOCK_WAIT is noted + * as a preemption by schedule_debug() and RCU. + */ + bool preempt = sched_mode > SM_NONE; unsigned long *switch_count; unsigned long prev_state; struct rq_flags rf; @@ -6476,13 +6474,13 @@ static void __sched notrace __schedule(unsigned int sched_mode) rq = cpu_rq(cpu); prev = rq->curr; - schedule_debug(prev, !!sched_mode); + schedule_debug(prev, preempt); if (sched_feat(HRTICK) || sched_feat(HRTICK_DL)) hrtick_clear(rq); local_irq_disable(); - rcu_note_context_switch(!!sched_mode); + rcu_note_context_switch(preempt); /* * Make sure that signal_pending_state()->signal_pending() below @@ -6511,12 +6509,20 @@ static void __sched notrace __schedule(unsigned int sched_mode) switch_count = &prev->nivcsw; + /* Task state changes only considers SM_PREEMPT as preemption */ + preempt = sched_mode == SM_PREEMPT; + /* * We must load prev->state once (task_struct::state is volatile), such * that we form a control dependency vs deactivate_task() below. */ prev_state = READ_ONCE(prev->__state); - if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { + if (sched_mode == SM_IDLE) { + if (!rq->nr_running) { + next = prev; + goto picked; + } + } else if (!preempt && prev_state) { if (signal_pending_state(prev_state, prev)) { WRITE_ONCE(prev->__state, TASK_RUNNING); } else { @@ -6547,6 +6553,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) } next = pick_next_task(rq, prev, &rf); +picked: clear_tsk_need_resched(prev); clear_preempt_need_resched(); #ifdef CONFIG_SCHED_DEBUG @@ -6588,7 +6595,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) psi_account_irqtime(rq, prev, next); psi_sched_switch(prev, next, !task_on_rq_queued(prev)); - trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); + trace_sched_switch(preempt, prev, next, prev_state); /* Also unlocks the rq: */ rq = context_switch(rq, prev, next, &rf); @@ -6664,7 +6671,7 @@ static void sched_update_worker(struct task_struct *tsk) } } -static __always_inline void __schedule_loop(unsigned int sched_mode) +static __always_inline void __schedule_loop(int sched_mode) { do { preempt_disable(); @@ -6709,7 +6716,7 @@ void __sched schedule_idle(void) */ WARN_ON_ONCE(current->__state); do { - __schedule(SM_NONE); + __schedule(SM_IDLE); } while (need_resched()); } -- cgit v1.2.3-58-ga151 From 5d871a63997fa8bcf80adb49ea1f2f7840dff932 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 4 Sep 2024 11:24:17 +0200 Subject: sched/fair: Move effective_cpu_util() and effective_cpu_util() in fair.c Move effective_cpu_util() and sched_cpu_util() functions in fair.c file with others utilization related functions. No functional change. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240904092417.20660-1-vincent.guittot@linaro.org --- kernel/sched/fair.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/sched/syscalls.c | 101 ------------------------------------------------ 2 files changed, 99 insertions(+), 101 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d697a0a3fc73..9e19009da48f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8084,6 +8084,105 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) return cpu_util(cpu, p, -1, 0); } +/* + * This function computes an effective utilization for the given CPU, to be + * used for frequency selection given the linear relation: f = u * f_max. + * + * The scheduler tracks the following metrics: + * + * cpu_util_{cfs,rt,dl,irq}() + * cpu_bw_dl() + * + * Where the cfs,rt and dl util numbers are tracked with the same metric and + * synchronized windows and are thus directly comparable. + * + * The cfs,rt,dl utilization are the running times measured with rq->clock_task + * which excludes things like IRQ and steal-time. These latter are then accrued + * in the IRQ utilization. + * + * The DL bandwidth number OTOH is not a measured metric but a value computed + * based on the task model parameters and gives the minimal utilization + * required to meet deadlines. + */ +unsigned long effective_cpu_util(int cpu, unsigned long util_cfs, + unsigned long *min, + unsigned long *max) +{ + unsigned long util, irq, scale; + struct rq *rq = cpu_rq(cpu); + + scale = arch_scale_cpu_capacity(cpu); + + /* + * Early check to see if IRQ/steal time saturates the CPU, can be + * because of inaccuracies in how we track these -- see + * update_irq_load_avg(). + */ + irq = cpu_util_irq(rq); + if (unlikely(irq >= scale)) { + if (min) + *min = scale; + if (max) + *max = scale; + return scale; + } + + if (min) { + /* + * The minimum utilization returns the highest level between: + * - the computed DL bandwidth needed with the IRQ pressure which + * steals time to the deadline task. + * - The minimum performance requirement for CFS and/or RT. + */ + *min = max(irq + cpu_bw_dl(rq), uclamp_rq_get(rq, UCLAMP_MIN)); + + /* + * When an RT task is runnable and uclamp is not used, we must + * ensure that the task will run at maximum compute capacity. + */ + if (!uclamp_is_used() && rt_rq_is_runnable(&rq->rt)) + *min = max(*min, scale); + } + + /* + * Because the time spend on RT/DL tasks is visible as 'lost' time to + * CFS tasks and we use the same metric to track the effective + * utilization (PELT windows are synchronized) we can directly add them + * to obtain the CPU's actual utilization. + */ + util = util_cfs + cpu_util_rt(rq); + util += cpu_util_dl(rq); + + /* + * The maximum hint is a soft bandwidth requirement, which can be lower + * than the actual utilization because of uclamp_max requirements. + */ + if (max) + *max = min(scale, uclamp_rq_get(rq, UCLAMP_MAX)); + + if (util >= scale) + return scale; + + /* + * There is still idle time; further improve the number by using the + * IRQ metric. Because IRQ/steal time is hidden from the task clock we + * need to scale the task numbers: + * + * max - irq + * U' = irq + --------- * U + * max + */ + util = scale_irq_capacity(util, irq, scale); + util += irq; + + return min(scale, util); +} + +unsigned long sched_cpu_util(int cpu) +{ + return effective_cpu_util(cpu, cpu_util_cfs(cpu), NULL, NULL); +} + /* * energy_env - Utilization landscape for energy estimation. * @task_busy_time: Utilization contribution by the task for which we test the diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index 4fae3cf25a3a..c62acf509b74 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -258,107 +258,6 @@ int sched_core_idle_cpu(int cpu) #endif -#ifdef CONFIG_SMP -/* - * This function computes an effective utilization for the given CPU, to be - * used for frequency selection given the linear relation: f = u * f_max. - * - * The scheduler tracks the following metrics: - * - * cpu_util_{cfs,rt,dl,irq}() - * cpu_bw_dl() - * - * Where the cfs,rt and dl util numbers are tracked with the same metric and - * synchronized windows and are thus directly comparable. - * - * The cfs,rt,dl utilization are the running times measured with rq->clock_task - * which excludes things like IRQ and steal-time. These latter are then accrued - * in the IRQ utilization. - * - * The DL bandwidth number OTOH is not a measured metric but a value computed - * based on the task model parameters and gives the minimal utilization - * required to meet deadlines. - */ -unsigned long effective_cpu_util(int cpu, unsigned long util_cfs, - unsigned long *min, - unsigned long *max) -{ - unsigned long util, irq, scale; - struct rq *rq = cpu_rq(cpu); - - scale = arch_scale_cpu_capacity(cpu); - - /* - * Early check to see if IRQ/steal time saturates the CPU, can be - * because of inaccuracies in how we track these -- see - * update_irq_load_avg(). - */ - irq = cpu_util_irq(rq); - if (unlikely(irq >= scale)) { - if (min) - *min = scale; - if (max) - *max = scale; - return scale; - } - - if (min) { - /* - * The minimum utilization returns the highest level between: - * - the computed DL bandwidth needed with the IRQ pressure which - * steals time to the deadline task. - * - The minimum performance requirement for CFS and/or RT. - */ - *min = max(irq + cpu_bw_dl(rq), uclamp_rq_get(rq, UCLAMP_MIN)); - - /* - * When an RT task is runnable and uclamp is not used, we must - * ensure that the task will run at maximum compute capacity. - */ - if (!uclamp_is_used() && rt_rq_is_runnable(&rq->rt)) - *min = max(*min, scale); - } - - /* - * Because the time spend on RT/DL tasks is visible as 'lost' time to - * CFS tasks and we use the same metric to track the effective - * utilization (PELT windows are synchronized) we can directly add them - * to obtain the CPU's actual utilization. - */ - util = util_cfs + cpu_util_rt(rq); - util += cpu_util_dl(rq); - - /* - * The maximum hint is a soft bandwidth requirement, which can be lower - * than the actual utilization because of uclamp_max requirements. - */ - if (max) - *max = min(scale, uclamp_rq_get(rq, UCLAMP_MAX)); - - if (util >= scale) - return scale; - - /* - * There is still idle time; further improve the number by using the - * IRQ metric. Because IRQ/steal time is hidden from the task clock we - * need to scale the task numbers: - * - * max - irq - * U' = irq + --------- * U - * max - */ - util = scale_irq_capacity(util, irq, scale); - util += irq; - - return min(scale, util); -} - -unsigned long sched_cpu_util(int cpu) -{ - return effective_cpu_util(cpu, cpu_util_cfs(cpu), NULL, NULL); -} -#endif /* CONFIG_SMP */ - /** * find_process_by_pid - find a process with a matching PID value. * @pid: the pid in question. -- cgit v1.2.3-58-ga151 From 84d265281d6cea65353fc24146280e0d86ac50cb Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 27 Aug 2024 19:26:07 +0800 Subject: sched/pelt: Use rq_clock_task() for hw_pressure commit 97450eb90965 ("sched/pelt: Remove shift of thermal clock") removed the decay_shift for hw_pressure. This commit uses the sched_clock_task() in sched_tick() while it replaces the sched_clock_task() with rq_clock_pelt() in __update_blocked_others(). This could bring inconsistence. One possible scenario I can think of is in ___update_load_sum(): u64 delta = now - sa->last_update_time 'now' could be calculated by rq_clock_pelt() from __update_blocked_others(), and last_update_time was calculated by rq_clock_task() previously from sched_tick(). Usually the former chases after the latter, it cause a very large 'delta' and brings unexpected behavior. Fixes: 97450eb90965 ("sched/pelt: Remove shift of thermal clock") Signed-off-by: Chen Yu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Hongyan Xia Reviewed-by: Vincent Guittot Link: https://lkml.kernel.org/r/20240827112607.181206-1-yu.c.chen@intel.com --- kernel/sched/fair.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/sched') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9e19009da48f..e946ca0b1ecd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9719,9 +9719,10 @@ static bool __update_blocked_others(struct rq *rq, bool *done) hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); + /* hw_pressure doesn't care about invariance */ decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) | update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) | - update_hw_load_avg(now, rq, hw_pressure) | + update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure) | update_irq_load_avg(rq, 0); if (others_have_blocked(rq)) -- cgit v1.2.3-58-ga151 From 729288bc68560b4d5b094cb7a6f794c752ef22a2 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Thu, 5 Sep 2024 00:05:23 +0200 Subject: kernel/sched: Fix util_est accounting for DELAY_DEQUEUE Remove delayed tasks from util_est even they are runnable. Exclude delayed task which are (a) migrating between rq's or (b) in a SAVE/RESTORE dequeue/enqueue. Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/c49ef5fe-a909-43f1-b02f-a765ab9cedbf@arm.com --- kernel/sched/fair.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e946ca0b1ecd..922d69031661 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6948,18 +6948,19 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) int rq_h_nr_running = rq->cfs.h_nr_running; u64 slice = 0; - if (flags & ENQUEUE_DELAYED) { - requeue_delayed_entity(se); - return; - } - /* * The code below (indirectly) updates schedutil which looks at * the cfs_rq utilization to select a frequency. * Let's add the task's estimated utilization to the cfs_rq's * estimated utilization, before we update schedutil. */ - util_est_enqueue(&rq->cfs, p); + if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE)))) + util_est_enqueue(&rq->cfs, p); + + if (flags & ENQUEUE_DELAYED) { + requeue_delayed_entity(se); + return; + } /* * If in_iowait is set, the code below may not trigger any cpufreq @@ -7177,7 +7178,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) */ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) { - util_est_dequeue(&rq->cfs, p); + if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE)))) + util_est_dequeue(&rq->cfs, p); if (dequeue_entities(rq, &p->se, flags) < 0) { util_est_update(&rq->cfs, p, DEQUEUE_SLEEP); -- cgit v1.2.3-58-ga151 From c662e2b1e8cfc3b6329704dab06051f8c3ec2993 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Sep 2024 17:02:24 +0200 Subject: sched: Fix sched_delayed vs sched_core Completely analogous to commit dfa0a574cbc4 ("sched/uclamg: Handle delayed dequeue"), avoid double dequeue for the sched_core entries. Fixes: 152e11f6df29 ("sched/fair: Implement delayed dequeue") Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/sched') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2922facebae7..b4c5d83e54d4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -259,6 +259,9 @@ static inline int rb_sched_core_cmp(const void *key, const struct rb_node *node) void sched_core_enqueue(struct rq *rq, struct task_struct *p) { + if (p->se.sched_delayed) + return; + rq->core->core_task_seq++; if (!p->core_cookie) @@ -269,6 +272,9 @@ void sched_core_enqueue(struct rq *rq, struct task_struct *p) void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { + if (p->se.sched_delayed) + return; + rq->core->core_task_seq++; if (sched_core_enqueued(p)) { -- cgit v1.2.3-58-ga151 From 2cab4bd024d23f658e40dce209dfd012f4e8b19a Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 6 Sep 2024 13:30:19 +0800 Subject: sched/debug: Fix the runnable tasks output The current runnable tasks output looks like: runnable tasks: S task PID tree-key switches prio wait-time sum-exec sum-sleep ------------------------------------------------------------------------------------------------------------- Ikworker/R-rcu_g 4 0.129049 E 0.620179 0.750000 0.002920 2 100 0.000000 0.002920 0.000000 0.000000 0 0 / Ikworker/R-sync_ 5 0.125328 E 0.624147 0.750000 0.001840 2 100 0.000000 0.001840 0.000000 0.000000 0 0 / Ikworker/R-slub_ 6 0.120835 E 0.628680 0.750000 0.001800 2 100 0.000000 0.001800 0.000000 0.000000 0 0 / Ikworker/R-netns 7 0.114294 E 0.634701 0.750000 0.002400 2 100 0.000000 0.002400 0.000000 0.000000 0 0 / I kworker/0:1 9 508.781746 E 511.754666 3.000000 151.575240 224 120 0.000000 151.575240 0.000000 0.000000 0 0 / Which is messy. Remove the duplicate printing of sum_exec_runtime and tidy up the layout to make it look like: runnable tasks: S task PID vruntime eligible deadline slice sum-exec switches prio wait-time sum-sleep sum-block node group-id group-path ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- I kworker/0:3 1698 295.001459 E 297.977619 3.000000 38.862920 9 120 0.000000 0.000000 0.000000 0 0 / I kworker/0:4 1702 278.026303 E 281.026303 3.000000 9.918760 3 120 0.000000 0.000000 0.000000 0 0 / S NetworkManager 2646 0.377936 E 2.598104 3.000000 98.535880 314 120 0.000000 0.000000 0.000000 0 0 /system.slice/NetworkManager.service S virtqemud 2689 0.541016 E 2.440104 3.000000 50.967960 80 120 0.000000 0.000000 0.000000 0 0 /system.slice/virtqemud.service S gsd-smartcard 3058 73.604144 E 76.475904 3.000000 74.033320 88 120 0.000000 0.000000 0.000000 0 0 /user.slice/user-42.slice/session-c1.scope Reviewed-by: Christoph Lameter (Ampere) Signed-off-by: Huang Shijie Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240906053019.7874-1-shijie@os.amperecomputing.com --- kernel/sched/debug.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 01ce9a76164c..de1dc5264b3f 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -739,7 +739,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) else SEQ_printf(m, " %c", task_state_to_char(p)); - SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld %5d ", + SEQ_printf(m, " %15s %5d %9Ld.%06ld %c %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld %5d ", p->comm, task_pid_nr(p), SPLIT_NS(p->se.vruntime), entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N', @@ -750,17 +750,16 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) (long long)(p->nvcsw + p->nivcsw), p->prio); - SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld %9lld.%06ld", + SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld", SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)), - SPLIT_NS(p->se.sum_exec_runtime), SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); #ifdef CONFIG_NUMA_BALANCING - SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); + SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); #endif #ifdef CONFIG_CGROUP_SCHED - SEQ_printf_task_group_path(m, task_group(p), " %s") + SEQ_printf_task_group_path(m, task_group(p), " %s") #endif SEQ_printf(m, "\n"); @@ -772,10 +771,26 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) SEQ_printf(m, "\n"); SEQ_printf(m, "runnable tasks:\n"); - SEQ_printf(m, " S task PID tree-key switches prio" - " wait-time sum-exec sum-sleep\n"); + SEQ_printf(m, " S task PID vruntime eligible " + "deadline slice sum-exec switches " + "prio wait-time sum-sleep sum-block" +#ifdef CONFIG_NUMA_BALANCING + " node group-id" +#endif +#ifdef CONFIG_CGROUP_SCHED + " group-path" +#endif + "\n"); SEQ_printf(m, "-------------------------------------------------------" - "------------------------------------------------------\n"); + "------------------------------------------------------" + "------------------------------------------------------" +#ifdef CONFIG_NUMA_BALANCING + "--------------" +#endif +#ifdef CONFIG_CGROUP_SCHED + "--------------" +#endif + "\n"); rcu_read_lock(); for_each_process_thread(g, p) { -- cgit v1.2.3-58-ga151 From bc9057da1a220ff2cb6c8885fd5352558aceba2c Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Tue, 13 Aug 2024 15:43:48 +0100 Subject: sched/cpufreq: Use NSEC_PER_MSEC for deadline task Convert the sugov deadline task attributes to use the available definitions to make them more readable. No functional change. Signed-off-by: Christian Loehle Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Acked-by: Juri Lelli Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20240813144348.1180344-5-christian.loehle@arm.com --- kernel/sched/cpufreq_schedutil.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/sched') diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index eece6244f9d2..43111a515a28 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -654,9 +654,9 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) * Fake (unused) bandwidth; workaround to "fix" * priority inheritance. */ - .sched_runtime = 1000000, - .sched_deadline = 10000000, - .sched_period = 10000000, + .sched_runtime = NSEC_PER_MSEC, + .sched_deadline = 10 * NSEC_PER_MSEC, + .sched_period = 10 * NSEC_PER_MSEC, }; struct cpufreq_policy *policy = sg_policy->policy; int ret; -- cgit v1.2.3-58-ga151