summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/percpu-refcount.h8
-rw-r--r--lib/percpu-refcount.c141
2 files changed, 116 insertions, 33 deletions
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index bd9483d390b4..d1252e1335e8 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -78,9 +78,11 @@ struct percpu_ref {
int __must_check percpu_ref_init(struct percpu_ref *ref,
percpu_ref_func_t *release, gfp_t gfp);
void percpu_ref_exit(struct percpu_ref *ref);
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+ percpu_ref_func_t *confirm_switch);
+void percpu_ref_reinit(struct percpu_ref *ref);
void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
percpu_ref_func_t *confirm_kill);
-void percpu_ref_reinit(struct percpu_ref *ref);
/**
* percpu_ref_kill - drop the initial ref
@@ -111,7 +113,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
/* paired with smp_store_release() in percpu_ref_reinit() */
smp_read_barrier_depends();
- if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD))
+ if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
return false;
*percpu_countp = (unsigned long __percpu *)percpu_ptr;
@@ -193,6 +195,8 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
if (__ref_is_percpu(ref, &percpu_count)) {
this_cpu_inc(*percpu_count);
ret = true;
+ } else if (!(ACCESS_ONCE(ref->percpu_count_ptr) & __PERCPU_REF_DEAD)) {
+ ret = atomic_long_inc_not_zero(&ref->count);
}
rcu_read_unlock_sched();
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index e2ff19f970cf..6e0d14366c5d 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -1,6 +1,8 @@
#define pr_fmt(fmt) "%s: " fmt "\n", __func__
#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
#include <linux/percpu-refcount.h>
/*
@@ -31,6 +33,8 @@
#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))
+static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
+
static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
{
return (unsigned long __percpu *)
@@ -88,7 +92,19 @@ void percpu_ref_exit(struct percpu_ref *ref)
}
EXPORT_SYMBOL_GPL(percpu_ref_exit);
-static void percpu_ref_kill_rcu(struct rcu_head *rcu)
+static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
+{
+ struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
+
+ ref->confirm_switch(ref);
+ ref->confirm_switch = NULL;
+ wake_up_all(&percpu_ref_switch_waitq);
+
+ /* drop ref from percpu_ref_switch_to_atomic() */
+ percpu_ref_put(ref);
+}
+
+static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
{
struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
@@ -116,47 +132,79 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
WARN_ONCE(atomic_long_read(&ref->count) <= 0,
- "percpu ref (%pf) <= 0 (%ld) after killed",
+ "percpu ref (%pf) <= 0 (%ld) after switching to atomic",
ref->release, atomic_long_read(&ref->count));
- /* @ref is viewed as dead on all CPUs, send out kill confirmation */
- if (ref->confirm_switch)
- ref->confirm_switch(ref);
+ /* @ref is viewed as dead on all CPUs, send out switch confirmation */
+ percpu_ref_call_confirm_rcu(rcu);
+}
- /*
- * Now we're in single atomic_long_t mode with a consistent
- * refcount, so it's safe to drop our initial ref:
- */
- percpu_ref_put(ref);
+static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
+{
+}
+
+static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+ percpu_ref_func_t *confirm_switch)
+{
+ if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
+ /* switching from percpu to atomic */
+ ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+
+ /*
+ * Non-NULL ->confirm_switch is used to indicate that
+ * switching is in progress. Use noop one if unspecified.
+ */
+ WARN_ON_ONCE(ref->confirm_switch);
+ ref->confirm_switch =
+ confirm_switch ?: percpu_ref_noop_confirm_switch;
+
+ percpu_ref_get(ref); /* put after confirmation */
+ call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
+ } else if (confirm_switch) {
+ /*
+ * Somebody already set ATOMIC. Switching may still be in
+ * progress. @confirm_switch must be invoked after the
+ * switching is complete and a full sched RCU grace period
+ * has passed. Wait synchronously for the previous
+ * switching and schedule @confirm_switch invocation.
+ */
+ wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+ ref->confirm_switch = confirm_switch;
+
+ percpu_ref_get(ref); /* put after confirmation */
+ call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
+ }
}
/**
- * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
- * @ref: percpu_ref to kill
- * @confirm_kill: optional confirmation callback
+ * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
+ * @ref: percpu_ref to switch to atomic mode
+ * @confirm_switch: optional confirmation callback
*
- * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
- * @confirm_kill is not NULL. @confirm_kill, which may not block, will be
- * called after @ref is seen as dead from all CPUs - all further
- * invocations of percpu_ref_tryget_live() will fail. See
- * percpu_ref_tryget_live() for more details.
+ * There's no reason to use this function for the usual reference counting.
+ * Use percpu_ref_kill[_and_confirm]().
+ *
+ * Schedule switching of @ref to atomic mode. All its percpu counts will
+ * be collected to the main atomic counter. On completion, when all CPUs
+ * are guaraneed to be in atomic mode, @confirm_switch, which may not
+ * block, is invoked. This function may be invoked concurrently with all
+ * the get/put operations and can safely be mixed with kill and reinit
+ * operations.
*
- * Due to the way percpu_ref is implemented, @confirm_kill will be called
- * after at least one full RCU grace period has passed but this is an
- * implementation detail and callers must not depend on it.
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is already in
+ * the process of switching to atomic mode. In such cases, @confirm_switch
+ * will be invoked after the switching is complete.
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
*/
-void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
- percpu_ref_func_t *confirm_kill)
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+ percpu_ref_func_t *confirm_switch)
{
- WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC_DEAD,
- "%s called more than once on %pf!", __func__, ref->release);
-
- ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC_DEAD;
- ref->confirm_switch = confirm_kill;
-
- call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
+ __percpu_ref_switch_to_atomic(ref, confirm_switch);
}
-EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
/**
* percpu_ref_reinit - re-initialize a percpu refcount
@@ -192,3 +240,34 @@ void percpu_ref_reinit(struct percpu_ref *ref)
ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
}
EXPORT_SYMBOL_GPL(percpu_ref_reinit);
+
+/**
+ * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
+ * @ref: percpu_ref to kill
+ * @confirm_kill: optional confirmation callback
+ *
+ * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
+ * @confirm_kill is not NULL. @confirm_kill, which may not block, will be
+ * called after @ref is seen as dead from all CPUs at which point all
+ * further invocations of percpu_ref_tryget_live() will fail. See
+ * percpu_ref_tryget_live() for details.
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is already in
+ * the process of switching to atomic mode by percpu_ref_switch_atomic().
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
+ */
+void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
+ percpu_ref_func_t *confirm_kill)
+{
+ WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
+ "%s called more than once on %pf!", __func__, ref->release);
+
+ ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
+ __percpu_ref_switch_to_atomic(ref, confirm_kill);
+ percpu_ref_put(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);