diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-14 17:41:38 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-14 17:41:38 -0800 |
commit | 76d4acf22b4847f6c7b2f9042366fbdc3d20f578 (patch) | |
tree | 635466360e3716372dd91ad8c724b40d15c05753 /include/linux | |
parent | 8a8ca83ec3cf7ffc69020c189e3d368b1d4ba98a (diff) | |
parent | a70a04b3844f59c29573a8581d5c263225060dd6 (diff) |
Merge tag 'perf-kprobes-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf/kprobes updates from Thomas Gleixner:
"Make kretprobes lockless to avoid the rp->lock performance and
potential lock ordering issues"
* tag 'perf-kprobes-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
locking/atomics: Regenerate the atomics-check SHA1's
kprobes: Replace rp->free_instance with freelist
freelist: Implement lockless freelist
asm-generic/atomic: Add try_cmpxchg() fallbacks
kprobes: Remove kretprobe hash
llist: Add nonatomic __llist_add() and __llist_dell_all()
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/atomic-arch-fallback.h | 90 | ||||
-rw-r--r-- | include/linux/atomic-fallback.h | 90 | ||||
-rw-r--r-- | include/linux/freelist.h | 129 | ||||
-rw-r--r-- | include/linux/kprobes.h | 25 | ||||
-rw-r--r-- | include/linux/llist.h | 23 | ||||
-rw-r--r-- | include/linux/sched.h | 4 |
6 files changed, 336 insertions, 25 deletions
diff --git a/include/linux/atomic-arch-fallback.h b/include/linux/atomic-arch-fallback.h index bcb6aa27cfa6..a3dba31df01e 100644 --- a/include/linux/atomic-arch-fallback.h +++ b/include/linux/atomic-arch-fallback.h @@ -9,9 +9,9 @@ #include <linux/compiler.h> #ifndef arch_xchg_relaxed -#define arch_xchg_relaxed arch_xchg -#define arch_xchg_acquire arch_xchg -#define arch_xchg_release arch_xchg +#define arch_xchg_acquire arch_xchg +#define arch_xchg_release arch_xchg +#define arch_xchg_relaxed arch_xchg #else /* arch_xchg_relaxed */ #ifndef arch_xchg_acquire @@ -32,9 +32,9 @@ #endif /* arch_xchg_relaxed */ #ifndef arch_cmpxchg_relaxed -#define arch_cmpxchg_relaxed arch_cmpxchg -#define arch_cmpxchg_acquire arch_cmpxchg -#define arch_cmpxchg_release arch_cmpxchg +#define arch_cmpxchg_acquire arch_cmpxchg +#define arch_cmpxchg_release arch_cmpxchg +#define arch_cmpxchg_relaxed arch_cmpxchg #else /* arch_cmpxchg_relaxed */ #ifndef arch_cmpxchg_acquire @@ -55,9 +55,9 @@ #endif /* arch_cmpxchg_relaxed */ #ifndef arch_cmpxchg64_relaxed -#define arch_cmpxchg64_relaxed arch_cmpxchg64 -#define arch_cmpxchg64_acquire arch_cmpxchg64 -#define arch_cmpxchg64_release arch_cmpxchg64 +#define arch_cmpxchg64_acquire arch_cmpxchg64 +#define arch_cmpxchg64_release arch_cmpxchg64 +#define arch_cmpxchg64_relaxed arch_cmpxchg64 #else /* arch_cmpxchg64_relaxed */ #ifndef arch_cmpxchg64_acquire @@ -77,6 +77,76 @@ #endif /* arch_cmpxchg64_relaxed */ +#ifndef arch_try_cmpxchg_relaxed +#ifdef arch_try_cmpxchg +#define arch_try_cmpxchg_acquire arch_try_cmpxchg +#define arch_try_cmpxchg_release arch_try_cmpxchg +#define arch_try_cmpxchg_relaxed arch_try_cmpxchg +#endif /* arch_try_cmpxchg */ + +#ifndef arch_try_cmpxchg +#define arch_try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg */ + +#ifndef arch_try_cmpxchg_acquire +#define arch_try_cmpxchg_acquire(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg_acquire((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg_acquire */ + +#ifndef arch_try_cmpxchg_release +#define arch_try_cmpxchg_release(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg_release((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg_release */ + +#ifndef arch_try_cmpxchg_relaxed +#define arch_try_cmpxchg_relaxed(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg_relaxed((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg_relaxed */ + +#else /* arch_try_cmpxchg_relaxed */ + +#ifndef arch_try_cmpxchg_acquire +#define arch_try_cmpxchg_acquire(...) \ + __atomic_op_acquire(arch_try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef arch_try_cmpxchg_release +#define arch_try_cmpxchg_release(...) \ + __atomic_op_release(arch_try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef arch_try_cmpxchg +#define arch_try_cmpxchg(...) \ + __atomic_op_fence(arch_try_cmpxchg, __VA_ARGS__) +#endif + +#endif /* arch_try_cmpxchg_relaxed */ + #ifndef arch_atomic_read_acquire static __always_inline int arch_atomic_read_acquire(const atomic_t *v) @@ -2288,4 +2358,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 90cd26cfd69d2250303d654955a0cc12620fb91b +// cca554917d7ea73d5e3e7397dd70c484cad9b2c4 diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h index fd525c71d676..2a3f55d98be9 100644 --- a/include/linux/atomic-fallback.h +++ b/include/linux/atomic-fallback.h @@ -9,9 +9,9 @@ #include <linux/compiler.h> #ifndef xchg_relaxed -#define xchg_relaxed xchg -#define xchg_acquire xchg -#define xchg_release xchg +#define xchg_acquire xchg +#define xchg_release xchg +#define xchg_relaxed xchg #else /* xchg_relaxed */ #ifndef xchg_acquire @@ -32,9 +32,9 @@ #endif /* xchg_relaxed */ #ifndef cmpxchg_relaxed -#define cmpxchg_relaxed cmpxchg -#define cmpxchg_acquire cmpxchg -#define cmpxchg_release cmpxchg +#define cmpxchg_acquire cmpxchg +#define cmpxchg_release cmpxchg +#define cmpxchg_relaxed cmpxchg #else /* cmpxchg_relaxed */ #ifndef cmpxchg_acquire @@ -55,9 +55,9 @@ #endif /* cmpxchg_relaxed */ #ifndef cmpxchg64_relaxed -#define cmpxchg64_relaxed cmpxchg64 -#define cmpxchg64_acquire cmpxchg64 -#define cmpxchg64_release cmpxchg64 +#define cmpxchg64_acquire cmpxchg64 +#define cmpxchg64_release cmpxchg64 +#define cmpxchg64_relaxed cmpxchg64 #else /* cmpxchg64_relaxed */ #ifndef cmpxchg64_acquire @@ -77,6 +77,76 @@ #endif /* cmpxchg64_relaxed */ +#ifndef try_cmpxchg_relaxed +#ifdef try_cmpxchg +#define try_cmpxchg_acquire try_cmpxchg +#define try_cmpxchg_release try_cmpxchg +#define try_cmpxchg_relaxed try_cmpxchg +#endif /* try_cmpxchg */ + +#ifndef try_cmpxchg +#define try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg */ + +#ifndef try_cmpxchg_acquire +#define try_cmpxchg_acquire(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg_acquire((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg_acquire */ + +#ifndef try_cmpxchg_release +#define try_cmpxchg_release(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg_release((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg_release */ + +#ifndef try_cmpxchg_relaxed +#define try_cmpxchg_relaxed(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = cmpxchg_relaxed((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* try_cmpxchg_relaxed */ + +#else /* try_cmpxchg_relaxed */ + +#ifndef try_cmpxchg_acquire +#define try_cmpxchg_acquire(...) \ + __atomic_op_acquire(try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef try_cmpxchg_release +#define try_cmpxchg_release(...) \ + __atomic_op_release(try_cmpxchg, __VA_ARGS__) +#endif + +#ifndef try_cmpxchg +#define try_cmpxchg(...) \ + __atomic_op_fence(try_cmpxchg, __VA_ARGS__) +#endif + +#endif /* try_cmpxchg_relaxed */ + #define arch_atomic_read atomic_read #define arch_atomic_read_acquire atomic_read_acquire @@ -2522,4 +2592,4 @@ atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 9d95b56f98d82a2a26c7b79ccdd0c47572d50a6f +// d78e6c293c661c15188f0ec05bce45188c8d5892 diff --git a/include/linux/freelist.h b/include/linux/freelist.h new file mode 100644 index 000000000000..fc1842b96469 --- /dev/null +++ b/include/linux/freelist.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */ +#ifndef FREELIST_H +#define FREELIST_H + +#include <linux/atomic.h> + +/* + * Copyright: cameron@moodycamel.com + * + * A simple CAS-based lock-free free list. Not the fastest thing in the world + * under heavy contention, but simple and correct (assuming nodes are never + * freed until after the free list is destroyed), and fairly speedy under low + * contention. + * + * Adapted from: https://moodycamel.com/blog/2014/solving-the-aba-problem-for-lock-free-free-lists + */ + +struct freelist_node { + atomic_t refs; + struct freelist_node *next; +}; + +struct freelist_head { + struct freelist_node *head; +}; + +#define REFS_ON_FREELIST 0x80000000 +#define REFS_MASK 0x7FFFFFFF + +static inline void __freelist_add(struct freelist_node *node, struct freelist_head *list) +{ + /* + * Since the refcount is zero, and nobody can increase it once it's + * zero (except us, and we run only one copy of this method per node at + * a time, i.e. the single thread case), then we know we can safely + * change the next pointer of the node; however, once the refcount is + * back above zero, then other threads could increase it (happens under + * heavy contention, when the refcount goes to zero in between a load + * and a refcount increment of a node in try_get, then back up to + * something non-zero, then the refcount increment is done by the other + * thread) -- so if the CAS to add the node to the actual list fails, + * decrese the refcount and leave the add operation to the next thread + * who puts the refcount back to zero (which could be us, hence the + * loop). + */ + struct freelist_node *head = READ_ONCE(list->head); + + for (;;) { + WRITE_ONCE(node->next, head); + atomic_set_release(&node->refs, 1); + + if (!try_cmpxchg_release(&list->head, &head, node)) { + /* + * Hmm, the add failed, but we can only try again when + * the refcount goes back to zero. + */ + if (atomic_fetch_add_release(REFS_ON_FREELIST - 1, &node->refs) == 1) + continue; + } + return; + } +} + +static inline void freelist_add(struct freelist_node *node, struct freelist_head *list) +{ + /* + * We know that the should-be-on-freelist bit is 0 at this point, so + * it's safe to set it using a fetch_add. + */ + if (!atomic_fetch_add_release(REFS_ON_FREELIST, &node->refs)) { + /* + * Oh look! We were the last ones referencing this node, and we + * know we want to add it to the free list, so let's do it! + */ + __freelist_add(node, list); + } +} + +static inline struct freelist_node *freelist_try_get(struct freelist_head *list) +{ + struct freelist_node *prev, *next, *head = smp_load_acquire(&list->head); + unsigned int refs; + + while (head) { + prev = head; + refs = atomic_read(&head->refs); + if ((refs & REFS_MASK) == 0 || + !atomic_try_cmpxchg_acquire(&head->refs, &refs, refs+1)) { + head = smp_load_acquire(&list->head); + continue; + } + + /* + * Good, reference count has been incremented (it wasn't at + * zero), which means we can read the next and not worry about + * it changing between now and the time we do the CAS. + */ + next = READ_ONCE(head->next); + if (try_cmpxchg_acquire(&list->head, &head, next)) { + /* + * Yay, got the node. This means it was on the list, + * which means should-be-on-freelist must be false no + * matter the refcount (because nobody else knows it's + * been taken off yet, it can't have been put back on). + */ + WARN_ON_ONCE(atomic_read(&head->refs) & REFS_ON_FREELIST); + + /* + * Decrease refcount twice, once for our ref, and once + * for the list's ref. + */ + atomic_fetch_add(-2, &head->refs); + + return head; + } + + /* + * OK, the head must have changed on us, but we still need to decrement + * the refcount we increased. + */ + refs = atomic_fetch_add(-1, &prev->refs); + if (refs == REFS_ON_FREELIST + 1) + __freelist_add(prev, list); + } + + return NULL; +} + +#endif /* FREELIST_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 629abaf25681..a79404433812 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -27,6 +27,8 @@ #include <linux/rcupdate.h> #include <linux/mutex.h> #include <linux/ftrace.h> +#include <linux/refcount.h> +#include <linux/freelist.h> #include <asm/kprobes.h> #ifdef CONFIG_KPROBES @@ -144,6 +146,11 @@ static inline int kprobe_ftrace(struct kprobe *p) * ignored, due to maxactive being too low. * */ +struct kretprobe_holder { + struct kretprobe *rp; + refcount_t ref; +}; + struct kretprobe { struct kprobe kp; kretprobe_handler_t handler; @@ -151,18 +158,18 @@ struct kretprobe { int maxactive; int nmissed; size_t data_size; - struct hlist_head free_instances; - raw_spinlock_t lock; + struct freelist_head freelist; + struct kretprobe_holder *rph; }; struct kretprobe_instance { union { - struct hlist_node hlist; + struct freelist_node freelist; struct rcu_head rcu; }; - struct kretprobe *rp; + struct llist_node llist; + struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; - struct task_struct *task; void *fp; char data[]; }; @@ -221,6 +228,14 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, return ret; } +static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), + "Kretprobe is accessed from instance under preemptive context"); + + return READ_ONCE(ri->rph->rp); +} + #else /* CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) diff --git a/include/linux/llist.h b/include/linux/llist.h index 2e9c7215882b..24f207b0190b 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -197,6 +197,16 @@ static inline struct llist_node *llist_next(struct llist_node *node) extern bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last, struct llist_head *head); + +static inline bool __llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head) +{ + new_last->next = head->first; + head->first = new_first; + return new_last->next == NULL; +} + /** * llist_add - add a new entry * @new: new entry to be added @@ -209,6 +219,11 @@ static inline bool llist_add(struct llist_node *new, struct llist_head *head) return llist_add_batch(new, new, head); } +static inline bool __llist_add(struct llist_node *new, struct llist_head *head) +{ + return __llist_add_batch(new, new, head); +} + /** * llist_del_all - delete all entries from lock-less list * @head: the head of lock-less list to delete all entries @@ -222,6 +237,14 @@ static inline struct llist_node *llist_del_all(struct llist_head *head) return xchg(&head->first, NULL); } +static inline struct llist_node *__llist_del_all(struct llist_head *head) +{ + struct llist_node *first = head->first; + + head->first = NULL; + return first; +} + extern struct llist_node *llist_del_first(struct llist_head *head); struct llist_node *llist_reverse_order(struct llist_node *head); diff --git a/include/linux/sched.h b/include/linux/sched.h index 1cac7efabc83..bd4225b73a1f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1350,6 +1350,10 @@ struct task_struct { struct callback_head mce_kill_me; #endif +#ifdef CONFIG_KRETPROBES + struct llist_head kretprobe_instances; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. |