From 83ab38ef0a0b2407d43af9575bb32333fdd74fb2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 14:46:36 +0200 Subject: jump_label: Fix concurrency issues in static_key_slow_dec() The commit which tried to fix the concurrency issues of concurrent static_key_slow_inc() failed to fix the equivalent issues vs. static_key_slow_dec(): CPU0 CPU1 static_key_slow_dec() static_key_slow_try_dec() key->enabled == 1 val = atomic_fetch_add_unless(&key->enabled, -1, 1); if (val == 1) return false; jump_label_lock(); if (atomic_dec_and_test(&key->enabled)) { --> key->enabled == 0 __jump_label_update() static_key_slow_dec() static_key_slow_try_dec() key->enabled == 0 val = atomic_fetch_add_unless(&key->enabled, -1, 1); --> key->enabled == -1 <- FAIL There is another bug in that code, when there is a concurrent static_key_slow_inc() which enables the key as that sets key->enabled to -1 so on the other CPU val = atomic_fetch_add_unless(&key->enabled, -1, 1); will succeed and decrement to -2, which is invalid. Cure all of this by replacing the atomic_fetch_add_unless() with a atomic_try_cmpxchg() loop similar to static_key_fast_inc_not_disabled(). [peterz: add WARN_ON_ONCE for the -1 race] Fixes: 4c5ea0a9cd02 ("locking/static_key: Fix concurrent static_key_slow_inc()") Reported-by: Yue Sun Reported-by: Xingwei Lee Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240610124406.422897838@linutronix.de --- kernel/jump_label.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 3218fa5688b9..1f05a19918f4 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -131,7 +131,7 @@ bool static_key_fast_inc_not_disabled(struct static_key *key) STATIC_KEY_CHECK_USE(key); /* * Negative key->enabled has a special meaning: it sends - * static_key_slow_inc() down the slow path, and it is non-zero + * static_key_slow_inc/dec() down the slow path, and it is non-zero * so it counts as "enabled" in jump_label_update(). Note that * atomic_inc_unless_negative() checks >= 0, so roll our own. */ @@ -150,7 +150,7 @@ bool static_key_slow_inc_cpuslocked(struct static_key *key) lockdep_assert_cpus_held(); /* - * Careful if we get concurrent static_key_slow_inc() calls; + * Careful if we get concurrent static_key_slow_inc/dec() calls; * later calls must wait for the first one to _finish_ the * jump_label_update() process. At the same time, however, * the jump_label_update() call below wants to see @@ -247,20 +247,32 @@ EXPORT_SYMBOL_GPL(static_key_disable); static bool static_key_slow_try_dec(struct static_key *key) { - int val; - - val = atomic_fetch_add_unless(&key->enabled, -1, 1); - if (val == 1) - return false; + int v; /* - * The negative count check is valid even when a negative - * key->enabled is in use by static_key_slow_inc(); a - * __static_key_slow_dec() before the first static_key_slow_inc() - * returns is unbalanced, because all other static_key_slow_inc() - * instances block while the update is in progress. + * Go into the slow path if key::enabled is less than or equal than + * one. One is valid to shut down the key, anything less than one + * is an imbalance, which is handled at the call site. + * + * That includes the special case of '-1' which is set in + * static_key_slow_inc_cpuslocked(), but that's harmless as it is + * fully serialized in the slow path below. By the time this task + * acquires the jump label lock the value is back to one and the + * retry under the lock must succeed. */ - WARN(val < 0, "jump label: negative count!\n"); + v = atomic_read(&key->enabled); + do { + /* + * Warn about the '-1' case though; since that means a + * decrement is concurrent with a first (0->1) increment. IOW + * people are trying to disable something that wasn't yet fully + * enabled. This suggests an ordering problem on the user side. + */ + WARN_ON_ONCE(v < 0); + if (v <= 1) + return false; + } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v - 1))); + return true; } @@ -271,10 +283,11 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key) if (static_key_slow_try_dec(key)) return; - jump_label_lock(); - if (atomic_dec_and_test(&key->enabled)) + guard(mutex)(&jump_label_mutex); + if (atomic_cmpxchg(&key->enabled, 1, 0)) jump_label_update(key); - jump_label_unlock(); + else + WARN_ON_ONCE(!static_key_slow_try_dec(key)); } static void __static_key_slow_dec(struct static_key *key) -- cgit v1.2.3-58-ga151 From 695ef796467ed228b60f1915995e390aea3d85c6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 14:46:37 +0200 Subject: jump_label: Clarify condition in static_key_fast_inc_not_disabled() The second part of if (v <= 0 || (v + 1) < 0) is not immediately obvious that it acts as overflow protection. Check explicitely for v == INT_MAX instead and add a proper comment how this is used at the call sites. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240610124406.484973160@linutronix.de --- kernel/jump_label.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 1f05a19918f4..4d06ec2f3e07 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -132,12 +132,15 @@ bool static_key_fast_inc_not_disabled(struct static_key *key) /* * Negative key->enabled has a special meaning: it sends * static_key_slow_inc/dec() down the slow path, and it is non-zero - * so it counts as "enabled" in jump_label_update(). Note that - * atomic_inc_unless_negative() checks >= 0, so roll our own. + * so it counts as "enabled" in jump_label_update(). + * + * The INT_MAX overflow condition is either used by the networking + * code to reset or detected in the slow path of + * static_key_slow_inc_cpuslocked(). */ v = atomic_read(&key->enabled); do { - if (v <= 0 || (v + 1) < 0) + if (v <= 0 || v == INT_MAX) return false; } while (!likely(atomic_try_cmpxchg(&key->enabled, &v, v + 1))); -- cgit v1.2.3-58-ga151 From 9bc2ff871f00437ad2f10c1eceff51aaa72b478f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 10 Jun 2024 14:46:39 +0200 Subject: jump_label: Simplify and clarify static_key_fast_inc_cpus_locked() Make the code more obvious and add proper comments to avoid future head scratching. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20240610124406.548322963@linutronix.de --- kernel/jump_label.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 4d06ec2f3e07..4ad5ed8adf96 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -162,22 +162,24 @@ bool static_key_slow_inc_cpuslocked(struct static_key *key) if (static_key_fast_inc_not_disabled(key)) return true; - jump_label_lock(); - if (atomic_read(&key->enabled) == 0) { - atomic_set(&key->enabled, -1); + guard(mutex)(&jump_label_mutex); + /* Try to mark it as 'enabling in progress. */ + if (!atomic_cmpxchg(&key->enabled, 0, -1)) { jump_label_update(key); /* - * Ensure that if the above cmpxchg loop observes our positive - * value, it must also observe all the text changes. + * Ensure that when static_key_fast_inc_not_disabled() or + * static_key_slow_try_dec() observe the positive value, + * they must also observe all the text changes. */ atomic_set_release(&key->enabled, 1); } else { - if (WARN_ON_ONCE(!static_key_fast_inc_not_disabled(key))) { - jump_label_unlock(); + /* + * While holding the mutex this should never observe + * anything else than a value >= 1 and succeed + */ + if (WARN_ON_ONCE(!static_key_fast_inc_not_disabled(key))) return false; - } } - jump_label_unlock(); return true; } -- cgit v1.2.3-58-ga151 From e81859fe64ad42dccefe134d1696e0635f78d763 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Mon, 8 Jul 2024 23:08:27 -0700 Subject: locking/rwsem: Add __always_inline annotation to __down_write_common() and inlined callers Apparently despite it being marked inline, the compiler may not inline __down_write_common() which makes it difficult to identify the cause of lock contention, as the wchan of the blocked function will always be listed as __down_write_common(). So add __always_inline annotation to the common function (as well as the inlined helper callers) to force it to be inlined so a more useful blocking function will be listed (via wchan). This mirrors commit 92cc5d00a431 ("locking/rwsem: Add __always_inline annotation to __down_read_common() and inlined callers") which did the same for __down_read_common. I sort of worry that I'm playing wack-a-mole here, and talking with compiler people, they tell me inline means nothing, which makes me want to cry a little. So I'm wondering if we need to replace all the inlines with __always_inline, or remove them because either we mean something by it, or not. Fixes: c995e638ccbb ("locking/rwsem: Fold __down_{read,write}*()") Reported-by: Tim Murray Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Link: https://lkml.kernel.org/r/20240709060831.495366-1-jstultz@google.com --- kernel/locking/rwsem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index c6d17aee4209..33cac79e3994 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1297,7 +1297,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline int __down_write_common(struct rw_semaphore *sem, int state) +static __always_inline int __down_write_common(struct rw_semaphore *sem, int state) { int ret = 0; @@ -1310,12 +1310,12 @@ static inline int __down_write_common(struct rw_semaphore *sem, int state) return ret; } -static inline void __down_write(struct rw_semaphore *sem) +static __always_inline void __down_write(struct rw_semaphore *sem) { __down_write_common(sem, TASK_UNINTERRUPTIBLE); } -static inline int __down_write_killable(struct rw_semaphore *sem) +static __always_inline int __down_write_killable(struct rw_semaphore *sem) { return __down_write_common(sem, TASK_KILLABLE); } -- cgit v1.2.3-58-ga151