diff options
author | Paul E. McKenney <paulmck@kernel.org> | 2024-05-08 20:11:58 -0700 |
---|---|---|
committer | Paul E. McKenney <paulmck@kernel.org> | 2024-07-04 13:47:39 -0700 |
commit | 68d124b0999919015e6d23008eafea106ec6bb40 (patch) | |
tree | 5320f1ddfff8db2db07e335affb29c1285c3b830 /kernel | |
parent | 4b56b0f5d50c01ffb1372183f5c55e09764ca90e (diff) |
rcu: Add rcutree.nohz_full_patience_delay to reduce nohz_full OS jitter
If a CPU is running either a userspace application or a guest OS in
nohz_full mode, it is possible for a system call to occur just as an
RCU grace period is starting. If that CPU also has the scheduling-clock
tick enabled for any reason (such as a second runnable task), and if the
system was booted with rcutree.use_softirq=0, then RCU can add insult to
injury by awakening that CPU's rcuc kthread, resulting in yet another
task and yet more OS jitter due to switching to that task, running it,
and switching back.
In addition, in the common case where that system call is not of
excessively long duration, awakening the rcuc task is pointless.
This pointlessness is due to the fact that the CPU will enter an extended
quiescent state upon returning to the userspace application or guest OS.
In this case, the rcuc kthread cannot do anything that the main RCU
grace-period kthread cannot do on its behalf, at least if it is given
a few additional milliseconds (for example, given the time duration
specified by rcutree.jiffies_till_first_fqs, give or take scheduling
delays).
This commit therefore adds a rcutree.nohz_full_patience_delay kernel
boot parameter that specifies the grace period age (in milliseconds,
rounded to jiffies) before which RCU will refrain from awakening the
rcuc kthread. Preliminary experimentation suggests a value of 1000,
that is, one second. Increasing rcutree.nohz_full_patience_delay will
increase grace-period latency and in turn increase memory footprint,
so systems with constrained memory might choose a smaller value.
Systems with less-aggressive OS-jitter requirements might choose the
default value of zero, which keeps the traditional immediate-wakeup
behavior, thus avoiding increases in grace-period latency.
[ paulmck: Apply Leonardo Bras feedback. ]
Link: https://lore.kernel.org/all/20240328171949.743211-1-leobras@redhat.com/
Reported-by: Leonardo Bras <leobras@redhat.com>
Suggested-by: Leonardo Bras <leobras@redhat.com>
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Reviewed-by: Leonardo Bras <leobras@redhat.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcu/tree.c | 11 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 10 |
2 files changed, 19 insertions, 2 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 18eba61a6f8b..e942dcf51008 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -176,6 +176,9 @@ static int gp_init_delay; module_param(gp_init_delay, int, 0444); static int gp_cleanup_delay; module_param(gp_cleanup_delay, int, 0444); +static int nohz_full_patience_delay; +module_param(nohz_full_patience_delay, int, 0444); +static int nohz_full_patience_delay_jiffies; // Add delay to rcu_read_unlock() for strict grace periods. static int rcu_unlock_delay; @@ -4335,11 +4338,15 @@ static int rcu_pending(int user) return 1; /* Is this a nohz_full CPU in userspace or idle? (Ignore RCU if so.) */ - if ((user || rcu_is_cpu_rrupt_from_idle()) && rcu_nohz_full_cpu()) + gp_in_progress = rcu_gp_in_progress(); + if ((user || rcu_is_cpu_rrupt_from_idle() || + (gp_in_progress && + time_before(jiffies, READ_ONCE(rcu_state.gp_start) + + nohz_full_patience_delay_jiffies))) && + rcu_nohz_full_cpu()) return 0; /* Is the RCU core waiting for a quiescent state from this CPU? */ - gp_in_progress = rcu_gp_in_progress(); if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress) return 1; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 340bbefe5f65..009ce0320f7f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -93,6 +93,16 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay); if (gp_cleanup_delay) pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay); + if (nohz_full_patience_delay < 0) { + pr_info("\tRCU NOCB CPU patience negative (%d), resetting to zero.\n", nohz_full_patience_delay); + nohz_full_patience_delay = 0; + } else if (nohz_full_patience_delay > 5 * MSEC_PER_SEC) { + pr_info("\tRCU NOCB CPU patience too large (%d), resetting to %ld.\n", nohz_full_patience_delay, 5 * MSEC_PER_SEC); + nohz_full_patience_delay = 5 * MSEC_PER_SEC; + } else if (nohz_full_patience_delay) { + pr_info("\tRCU NOCB CPU patience set to %d milliseconds.\n", nohz_full_patience_delay); + } + nohz_full_patience_delay_jiffies = msecs_to_jiffies(nohz_full_patience_delay); if (!use_softirq) pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n"); if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG)) |