diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-24 10:18:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-24 10:18:23 -0700 |
commit | 2319be135672f6e45aa937bceaae6c2668c7867c (patch) | |
tree | 5d2210243fac4345bbb204064f9602fe9477b785 | |
parent | 143a6252e1b8ab424b4b293512a97cca7295c182 (diff) | |
parent | 8491d1bdf5de152f27fc941e2dcdc4e66c950542 (diff) |
Merge tag 'locking-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar:
- rwsem cleanups & optimizations/fixes:
- Conditionally wake waiters in reader/writer slowpaths
- Always try to wake waiters in out_nolock path
- Add try_cmpxchg64() implementation, with arch optimizations - and use
it to micro-optimize sched_clock_{local,remote}()
- Various force-inlining fixes to address objdump instrumentation-check
warnings
- Add lock contention tracepoints:
lock:contention_begin
lock:contention_end
- Misc smaller fixes & cleanups
* tag 'locking-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/clock: Use try_cmpxchg64 in sched_clock_{local,remote}
locking/atomic/x86: Introduce arch_try_cmpxchg64
locking/atomic: Add generic try_cmpxchg64 support
futex: Remove a PREEMPT_RT_FULL reference.
locking/qrwlock: Change "queue rwlock" to "queued rwlock"
lockdep: Delete local_irq_enable_in_hardirq()
locking/mutex: Make contention tracepoints more consistent wrt adaptive spinning
locking: Apply contention tracepoints in the slow path
locking: Add lock contention tracepoints
locking/rwsem: Always try to wake waiters in out_nolock path
locking/rwsem: Conditionally wake waiters in reader/writer slowpaths
locking/rwsem: No need to check for handoff bit if wait queue empty
lockdep: Fix -Wunused-parameter for _THIS_IP_
x86/mm: Force-inline __phys_addr_nodebug()
x86/kvm/svm: Force-inline GHCB accessors
task_stack, x86/cea: Force-inline stack helpers
31 files changed, 412 insertions, 148 deletions
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 05cedd42103c..56cefd33eb8e 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -75,7 +75,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs) if (interrupts_enabled(regs)) { if (regs->exit_rcu) { trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); rcu_irq_exit(); lockdep_hardirqs_on(CALLER_ADDR0); return; @@ -121,7 +121,7 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs) static __always_inline void __exit_to_user_mode(void) { trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); user_enter_irqoff(); lockdep_hardirqs_on(CALLER_ADDR0); } @@ -179,7 +179,7 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs) ftrace_nmi_exit(); if (restore) { trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); } rcu_nmi_exit(); @@ -215,7 +215,7 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) if (restore) { trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); } rcu_nmi_exit(); diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 0a7fe0321613..215f5a65790f 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -42,6 +42,9 @@ static inline void set_64bit(volatile u64 *ptr, u64 value) #define arch_cmpxchg64_local(ptr, o, n) \ ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \ (unsigned long long)(n))) +#define arch_try_cmpxchg64(ptr, po, n) \ + __try_cmpxchg64((ptr), (unsigned long long *)(po), \ + (unsigned long long)(n)) #endif static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) @@ -70,6 +73,24 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) return prev; } +static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new) +{ + bool success; + u64 old = *pold; + asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]" + CC_SET(z) + : CC_OUT(z) (success), + [ptr] "+m" (*ptr), + "+A" (old) + : "b" ((u32)new), + "c" ((u32)(new >> 32)) + : "memory"); + + if (unlikely(!success)) + *pold = old; + return success; +} + #ifndef CONFIG_X86_CMPXCHG64 /* * Building a kernel capable running on 80386 and 80486. It may be necessary diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 072e5459fe2f..250187ac8248 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -19,6 +19,12 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) arch_cmpxchg_local((ptr), (o), (n)); \ }) +#define arch_try_cmpxchg64(ptr, po, n) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + arch_try_cmpxchg((ptr), (po), (n)); \ +}) + #define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) #endif /* _ASM_X86_CMPXCHG_64_H */ diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index dd5ea1bdf04c..75efc4c6f076 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -143,7 +143,7 @@ extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags); extern struct cpu_entry_area *get_cpu_entry_area(int cpu); -static inline struct entry_stack *cpu_entry_stack(int cpu) +static __always_inline struct entry_stack *cpu_entry_stack(int cpu) { return &get_cpu_entry_area(cpu)->entry_stack_page.stack; } diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index e9c86299b835..baa70451b8df 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -16,7 +16,7 @@ extern unsigned long page_offset_base; extern unsigned long vmalloc_base; extern unsigned long vmemmap_base; -static inline unsigned long __phys_addr_nodebug(unsigned long x) +static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) { unsigned long y = x - __START_KERNEL_map; diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index ec623e7da33d..1b07fba11704 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -569,23 +569,23 @@ struct vmcb { (offsetof(struct ghcb_save_area, field) / sizeof(u64)) #define DEFINE_GHCB_ACCESSORS(field) \ - static inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \ + static __always_inline bool ghcb_##field##_is_valid(const struct ghcb *ghcb) \ { \ return test_bit(GHCB_BITMAP_IDX(field), \ (unsigned long *)&ghcb->save.valid_bitmap); \ } \ \ - static inline u64 ghcb_get_##field(struct ghcb *ghcb) \ + static __always_inline u64 ghcb_get_##field(struct ghcb *ghcb) \ { \ return ghcb->save.field; \ } \ \ - static inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \ + static __always_inline u64 ghcb_get_##field##_if_valid(struct ghcb *ghcb) \ { \ return ghcb_##field##_is_valid(ghcb) ? ghcb->save.field : 0; \ } \ \ - static inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \ + static __always_inline void ghcb_set_##field(struct ghcb *ghcb, u64 value) \ { \ __set_bit(GHCB_BITMAP_IDX(field), \ (unsigned long *)&ghcb->save.valid_bitmap); \ diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 7ae0ece07b4e..d4cd4cc4389c 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -33,8 +33,8 @@ extern void queued_read_lock_slowpath(struct qrwlock *lock); extern void queued_write_lock_slowpath(struct qrwlock *lock); /** - * queued_read_trylock - try to acquire read lock of a queue rwlock - * @lock : Pointer to queue rwlock structure + * queued_read_trylock - try to acquire read lock of a queued rwlock + * @lock : Pointer to queued rwlock structure * Return: 1 if lock acquired, 0 if failed */ static inline int queued_read_trylock(struct qrwlock *lock) @@ -52,8 +52,8 @@ static inline int queued_read_trylock(struct qrwlock *lock) } /** - * queued_write_trylock - try to acquire write lock of a queue rwlock - * @lock : Pointer to queue rwlock structure + * queued_write_trylock - try to acquire write lock of a queued rwlock + * @lock : Pointer to queued rwlock structure * Return: 1 if lock acquired, 0 if failed */ static inline int queued_write_trylock(struct qrwlock *lock) @@ -68,8 +68,8 @@ static inline int queued_write_trylock(struct qrwlock *lock) _QW_LOCKED)); } /** - * queued_read_lock - acquire read lock of a queue rwlock - * @lock: Pointer to queue rwlock structure + * queued_read_lock - acquire read lock of a queued rwlock + * @lock: Pointer to queued rwlock structure */ static inline void queued_read_lock(struct qrwlock *lock) { @@ -84,8 +84,8 @@ static inline void queued_read_lock(struct qrwlock *lock) } /** - * queued_write_lock - acquire write lock of a queue rwlock - * @lock : Pointer to queue rwlock structure + * queued_write_lock - acquire write lock of a queued rwlock + * @lock : Pointer to queued rwlock structure */ static inline void queued_write_lock(struct qrwlock *lock) { @@ -98,8 +98,8 @@ static inline void queued_write_lock(struct qrwlock *lock) } /** - * queued_read_unlock - release read lock of a queue rwlock - * @lock : Pointer to queue rwlock structure + * queued_read_unlock - release read lock of a queued rwlock + * @lock : Pointer to queued rwlock structure */ static inline void queued_read_unlock(struct qrwlock *lock) { @@ -110,8 +110,8 @@ static inline void queued_read_unlock(struct qrwlock *lock) } /** - * queued_write_unlock - release write lock of a queue rwlock - * @lock : Pointer to queue rwlock structure + * queued_write_unlock - release write lock of a queued rwlock + * @lock : Pointer to queued rwlock structure */ static inline void queued_write_unlock(struct qrwlock *lock) { @@ -120,7 +120,7 @@ static inline void queued_write_unlock(struct qrwlock *lock) /** * queued_rwlock_is_contended - check if the lock is contended - * @lock : Pointer to queue rwlock structure + * @lock : Pointer to queued rwlock structure * Return: 1 if lock contended, 0 otherwise */ static inline int queued_rwlock_is_contended(struct qrwlock *lock) @@ -130,7 +130,7 @@ static inline int queued_rwlock_is_contended(struct qrwlock *lock) /* * Remapping rwlock architecture specific functions to the corresponding - * queue rwlock functions. + * queued rwlock functions. */ #define arch_read_lock(l) queued_read_lock(l) #define arch_write_lock(l) queued_write_lock(l) diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h index c36f1d5a2572..12392c14c4d0 100644 --- a/include/asm-generic/qrwlock_types.h +++ b/include/asm-generic/qrwlock_types.h @@ -7,7 +7,7 @@ #include <asm/spinlock_types.h> /* - * The queue read/write lock data structure + * The queued read/write lock data structure */ typedef struct qrwlock { diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index 6db58d180866..77bc5522e61c 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -147,6 +147,76 @@ #endif /* arch_try_cmpxchg_relaxed */ +#ifndef arch_try_cmpxchg64_relaxed +#ifdef arch_try_cmpxchg64 +#define arch_try_cmpxchg64_acquire arch_try_cmpxchg64 +#define arch_try_cmpxchg64_release arch_try_cmpxchg64 +#define arch_try_cmpxchg64_relaxed arch_try_cmpxchg64 +#endif /* arch_try_cmpxchg64 */ + +#ifndef arch_try_cmpxchg64 +#define arch_try_cmpxchg64(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg64((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg64 */ + +#ifndef arch_try_cmpxchg64_acquire +#define arch_try_cmpxchg64_acquire(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg64_acquire((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg64_acquire */ + +#ifndef arch_try_cmpxchg64_release +#define arch_try_cmpxchg64_release(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg64_release((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg64_release */ + +#ifndef arch_try_cmpxchg64_relaxed +#define arch_try_cmpxchg64_relaxed(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = arch_cmpxchg64_relaxed((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif /* arch_try_cmpxchg64_relaxed */ + +#else /* arch_try_cmpxchg64_relaxed */ + +#ifndef arch_try_cmpxchg64_acquire +#define arch_try_cmpxchg64_acquire(...) \ + __atomic_op_acquire(arch_try_cmpxchg64, __VA_ARGS__) +#endif + +#ifndef arch_try_cmpxchg64_release +#define arch_try_cmpxchg64_release(...) \ + __atomic_op_release(arch_try_cmpxchg64, __VA_ARGS__) +#endif + +#ifndef arch_try_cmpxchg64 +#define arch_try_cmpxchg64(...) \ + __atomic_op_fence(arch_try_cmpxchg64, __VA_ARGS__) +#endif + +#endif /* arch_try_cmpxchg64_relaxed */ + #ifndef arch_atomic_read_acquire static __always_inline int arch_atomic_read_acquire(const atomic_t *v) @@ -2386,4 +2456,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v) #endif #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 8e2cc06bc0d2c0967d2f8424762bd48555ee40ae +// b5e87bdd5ede61470c29f7a7e4de781af3770f09 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index 5d69b143c28e..7a139ec030b0 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -2006,6 +2006,44 @@ atomic_long_dec_if_positive(atomic_long_t *v) arch_try_cmpxchg_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) +#define try_cmpxchg64(ptr, oldp, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + typeof(oldp) __ai_oldp = (oldp); \ + kcsan_mb(); \ + instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ + instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ + arch_try_cmpxchg64(__ai_ptr, __ai_oldp, __VA_ARGS__); \ +}) + +#define try_cmpxchg64_acquire(ptr, oldp, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + typeof(oldp) __ai_oldp = (oldp); \ + instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ + instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ + arch_try_cmpxchg64_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \ +}) + +#define try_cmpxchg64_release(ptr, oldp, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + typeof(oldp) __ai_oldp = (oldp); \ + kcsan_release(); \ + instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ + instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ + arch_try_cmpxchg64_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \ +}) + +#define try_cmpxchg64_relaxed(ptr, oldp, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + typeof(oldp) __ai_oldp = (oldp); \ + instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ + instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \ + arch_try_cmpxchg64_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \ +}) + #define cmpxchg_local(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ @@ -2045,4 +2083,4 @@ atomic_long_dec_if_positive(atomic_long_t *v) }) #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 87c974b93032afd42143613434d1a7788fa598f9 +// 764f741eb77a7ad565dc8d99ce2837d5542e8aee diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f40754caaefa..b5e06a6e4019 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -222,24 +222,6 @@ devm_request_any_context_irq(struct device *dev, unsigned int irq, extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); -/* - * On lockdep we dont want to enable hardirqs in hardirq - * context. Use local_irq_enable_in_hardirq() to annotate - * kernel code that has to do this nevertheless (pretty much - * the only valid case is for old/broken hardware that is - * insanely slow). - * - * NOTE: in theory this might break fragile code that relies - * on hardirq delivery - in practice we dont seem to have such - * places left. So the only effect should be slightly increased - * irqs-off latencies. - */ -#ifdef CONFIG_LOCKDEP -# define local_irq_enable_in_hardirq() do { } while (0) -#else -# define local_irq_enable_in_hardirq() local_irq_enable() -#endif - bool irq_has_action(unsigned int irq); extern void disable_irq_nosync(unsigned int irq); extern bool disable_hardirq(unsigned int irq); diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 4b140938b03e..5ec0fa71399e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -20,13 +20,13 @@ #ifdef CONFIG_PROVE_LOCKING extern void lockdep_softirqs_on(unsigned long ip); extern void lockdep_softirqs_off(unsigned long ip); - extern void lockdep_hardirqs_on_prepare(unsigned long ip); + extern void lockdep_hardirqs_on_prepare(void); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); #else static inline void lockdep_softirqs_on(unsigned long ip) { } static inline void lockdep_softirqs_off(unsigned long ip) { } - static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { } + static inline void lockdep_hardirqs_on_prepare(void) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 34eed5f85ed6..4640393f20ab 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -453,7 +453,7 @@ static __always_inline void guest_state_enter_irqoff(void) { instrumentation_begin(); trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); instrumentation_end(); guest_context_enter_irqoff(); diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 892562ebbd3a..5e799a47431e 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -16,7 +16,7 @@ * try_get_task_stack() instead. task_stack_page will return a pointer * that could get freed out from under you. */ -static inline void *task_stack_page(const struct task_struct *task) +static __always_inline void *task_stack_page(const struct task_struct *task) { return task->stack; } diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h index d7512129a324..9ebd081e057e 100644 --- a/include/trace/events/lock.h +++ b/include/trace/events/lock.h @@ -5,11 +5,22 @@ #if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_LOCK_H -#include <linux/lockdep.h> +#include <linux/sched.h> #include <linux/tracepoint.h> +/* flags for lock:contention_begin */ +#define LCB_F_SPIN (1U << 0) +#define LCB_F_READ (1U << 1) +#define LCB_F_WRITE (1U << 2) +#define LCB_F_RT (1U << 3) +#define LCB_F_PERCPU (1U << 4) +#define LCB_F_MUTEX (1U << 5) + + #ifdef CONFIG_LOCKDEP +#include <linux/lockdep.h> + TRACE_EVENT(lock_acquire, TP_PROTO(struct lockdep_map *lock, unsigned int subclass, @@ -78,8 +89,54 @@ DEFINE_EVENT(lock, lock_acquired, TP_ARGS(lock, ip) ); -#endif -#endif +#endif /* CONFIG_LOCK_STAT */ +#endif /* CONFIG_LOCKDEP */ + +TRACE_EVENT(contention_begin, + + TP_PROTO(void *lock, unsigned int flags), + + TP_ARGS(lock, flags), + + TP_STRUCT__entry( + __field(void *, lock_addr) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->lock_addr = lock; + __entry->flags = flags; + ), + + TP_printk("%p (flags=%s)", __entry->lock_addr, + __print_flags(__entry->flags, "|", + { LCB_F_SPIN, "SPIN" }, + { LCB_F_READ, "READ" }, + { LCB_F_WRITE, "WRITE" }, + { LCB_F_RT, "RT" }, + { LCB_F_PERCPU, "PERCPU" }, + { LCB_F_MUTEX, "MUTEX" } + )) +); + +TRACE_EVENT(contention_end, + + TP_PROTO(void *lock, int ret), + + TP_ARGS(lock, ret), + + TP_STRUCT__entry( + __field(void *, lock_addr) + __field(int, ret) + ), + + TP_fast_assign( + __entry->lock_addr = lock; + __entry->ret = ret; + ), + + TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret) +); #endif /* _TRACE_LOCK_H */ diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 9e63923c5a0f..032f164abe7c 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -126,7 +126,7 @@ static __always_inline void __exit_to_user_mode(void) { instrumentation_begin(); trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); instrumentation_end(); user_enter_irqoff(); @@ -416,7 +416,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) instrumentation_begin(); /* Tell the tracer that IRET will enable interrupts */ trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); instrumentation_end(); rcu_irq_exit(); lockdep_hardirqs_on(CALLER_ADDR0); @@ -465,7 +465,7 @@ void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) ftrace_nmi_exit(); if (irq_state.lockdep) { trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); } instrumentation_end(); diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index 183b28c32c83..ce2889f12375 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -1005,7 +1005,7 @@ retry_private: rt_mutex_init_waiter(&rt_waiter); /* - * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not + * On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not * hold it while doing rt_mutex_start_proxy(), because then it will * include hb->lock in the blocking chain, even through we'll not in * fact hold it while blocking. This will lead it to report -EDEADLK diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c06cab6546ed..a6e671b8608d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -60,7 +60,6 @@ #include "lockdep_internals.h" -#define CREATE_TRACE_POINTS #include <trace/events/lock.h> #ifdef CONFIG_PROVE_LOCKING @@ -1380,7 +1379,7 @@ static struct lock_list *alloc_list_entry(void) */ static int add_lock_to_list(struct lock_class *this, struct lock_class *links_to, struct list_head *head, - unsigned long ip, u16 distance, u8 dep, + u16 distance, u8 dep, const struct lock_trace *trace) { struct lock_list *entry; @@ -3133,19 +3132,15 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * to the previous lock's dependency list: */ ret = add_lock_to_list(hlock_class(next), hlock_class(prev), - &hlock_class(prev)->locks_after, - next->acquire_ip, distance, - calc_dep(prev, next), - *trace); + &hlock_class(prev)->locks_after, distance, + calc_dep(prev, next), *trace); if (!ret) return 0; ret = add_lock_to_list(hlock_class(prev), hlock_class(next), - &hlock_class(next)->locks_before, - next->acquire_ip, distance, - calc_depb(prev, next), - *trace); + &hlock_class(next)->locks_before, distance, + calc_depb(prev, next), *trace); if (!ret) return 0; @@ -4236,14 +4231,13 @@ static void __trace_hardirqs_on_caller(void) /** * lockdep_hardirqs_on_prepare - Prepare for enabling interrupts - * @ip: Caller address * * Invoked before a possible transition to RCU idle from exit to user or * guest mode. This ensures that all RCU operations are done before RCU * stops watching. After the RCU transition lockdep_hardirqs_on() has to be * invoked to set the final state. */ -void lockdep_hardirqs_on_prepare(unsigned long ip) +void lockdep_hardirqs_on_prepare(void) { if (unlikely(!debug_locks)) return; @@ -4840,8 +4834,7 @@ EXPORT_SYMBOL_GPL(__lockdep_no_validate__); static void print_lock_nested_lock_not_held(struct task_struct *curr, - struct held_lock *hlock, - unsigned long ip) + struct held_lock *hlock) { if (!debug_locks_off()) return; @@ -5017,7 +5010,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, chain_key = iterate_chain_key(chain_key, hlock_id(hlock)); if (nest_lock && !__lock_is_held(nest_lock, -1)) { - print_lock_nested_lock_not_held(curr, hlock, ip); + print_lock_nested_lock_not_held(curr, hlock); return 0; } diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 5e3585950ec8..d973fe6041bf 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -30,6 +30,9 @@ #include <linux/debug_locks.h> #include <linux/osq_lock.h> +#define CREATE_TRACE_POINTS +#include <trace/events/lock.h> + #ifndef CONFIG_PREEMPT_RT #include "mutex.h" @@ -599,12 +602,14 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); + trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN); if (__mutex_trylock(lock) || mutex_optimistic_spin(lock, ww_ctx, NULL)) { /* got the lock, yay! */ lock_acquired(&lock->dep_map, ip); if (ww_ctx) ww_mutex_set_context_fastpath(ww, ww_ctx); + trace_contention_end(lock, 0); preempt_enable(); return 0; } @@ -641,6 +646,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas } set_current_state(state); + trace_contention_begin(lock, LCB_F_MUTEX); for (;;) { bool first; @@ -680,10 +686,16 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas * state back to RUNNING and fall through the next schedule(), * or we must see its unlock and acquire. */ - if (__mutex_trylock_or_handoff(lock, first) || - (first && mutex_optimistic_spin(lock, ww_ctx, &waiter))) + if (__mutex_trylock_or_handoff(lock, first)) break; + if (first) { + trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN); + if (mutex_optimistic_spin(lock, ww_ctx, &waiter)) + break; + trace_contention_begin(lock, LCB_F_MUTEX); + } + raw_spin_lock(&lock->wait_lock); } raw_spin_lock(&lock->wait_lock); @@ -707,6 +719,7 @@ acquired: skip_wait: /* got the lock - cleanup and rejoice! */ lock_acquired(&lock->dep_map, ip); + trace_contention_end(lock, 0); if (ww_ctx) ww_mutex_lock_acquired(ww, ww_ctx); @@ -719,6 +732,7 @@ err: __set_current_state(TASK_RUNNING); __mutex_remove_waiter(lock, &waiter); err_early_kill: + trace_contention_end(lock, ret); raw_spin_unlock(&lock->wait_lock); debug_mutex_free_waiter(&waiter); mutex_release(&lock->dep_map, ip); diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index c9fdae94e098..5fe4c5495ba3 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -9,6 +9,7 @@ #include <linux/sched/task.h> #include <linux/sched/debug.h> #include <linux/errno.h> +#include <trace/events/lock.h> int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, const char *name, struct lock_class_key *key) @@ -171,9 +172,11 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) if (try) return false; + trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_READ); preempt_enable(); percpu_rwsem_wait(sem, /* .reader = */ true); preempt_disable(); + trace_contention_end(sem, 0); return true; } @@ -216,6 +219,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem) { might_sleep(); rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); + trace_contention_begin(sem, LCB_F_PERCPU | LCB_F_WRITE); /* Notify readers to take the slow path. */ rcu_sync_enter(&sem->rss); @@ -237,6 +241,7 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem) /* Wait for all active readers to complete. */ rcuwait_wait_event(&sem->writer, readers_active_check(sem), TASK_UNINTERRUPTIBLE); + trace_contention_end(sem, 0); } EXPORT_SYMBOL_GPL(percpu_down_write); diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index ec36b73f4733..2e1600906c9f 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -12,10 +12,11 @@ #include <linux/percpu.h> #include <linux/hardirq.h> #include <linux/spinlock.h> +#include <trace/events/lock.h> /** - * queued_read_lock_slowpath - acquire read lock of a queue rwlock - * @lock: Pointer to queue rwlock structure + * queued_read_lock_slowpath - acquire read lock of a queued rwlock + * @lock: Pointer to queued rwlock structure */ void queued_read_lock_slowpath(struct qrwlock *lock) { @@ -34,6 +35,8 @@ void queued_read_lock_slowpath(struct qrwlock *lock) } atomic_sub(_QR_BIAS, &lock->cnts); + trace_contention_begin(lock, LCB_F_SPIN | LCB_F_READ); + /* * Put the reader into the wait queue */ @@ -51,17 +54,21 @@ void queued_read_lock_slowpath(struct qrwlock *lock) * Signal the next one in queue to become queue head */ arch_spin_unlock(&lock->wait_lock); + + trace_contention_end(lock, 0); } EXPORT_SYMBOL(queued_read_lock_slowpath); /** - * queued_write_lock_slowpath - acquire write lock of a queue rwlock - * @lock : Pointer to queue rwlock structure + * queued_write_lock_slowpath - acquire write lock of a queued rwlock + * @lock : Pointer to queued rwlock structure */ void queued_write_lock_slowpath(struct qrwlock *lock) { int cnts; + trace_contention_begin(lock, LCB_F_SPIN | LCB_F_WRITE); + /* Put the writer into the wait queue */ arch_spin_lock(&lock->wait_lock); @@ -79,5 +86,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock) } while (!atomic_try_cmpxchg_acquire(&lock->cnts, &cnts, _QW_LOCKED)); unlock: arch_spin_unlock(&lock->wait_lock); + + trace_contention_end(lock, 0); } EXPORT_SYMBOL(queued_write_lock_slowpath); diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index cbff6ba53d56..65a9a10caa6f 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -22,6 +22,7 @@ #include <linux/prefetch.h> #include <asm/byteorder.h> #include <asm/qspinlock.h> +#include <trace/events/lock.h> /* * Include queued spinlock statistics code @@ -401,6 +402,8 @@ pv_queue: idx = node->count++; tail = encode_tail(smp_processor_id(), idx); + trace_contention_begin(lock, LCB_F_SPIN); + /* * 4 nodes are allocated based on the assumption that there will * not be nested NMIs taking spinlocks. That may not be true in @@ -554,6 +557,8 @@ locked: pv_kick_node(lock, next); release: + trace_contention_end(lock, 0); + /* * release the node */ diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 8555c4efe97c..7779ee8abc2a 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -24,6 +24,8 @@ #include <linux/sched/wake_q.h> #include <linux/ww_mutex.h> +#include <trace/events/lock.h> + #include "rtmutex_common.h" #ifndef WW_RT @@ -1579,6 +1581,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, set_current_state(state); + trace_contention_begin(lock, LCB_F_RT); + ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); if (likely(!ret)) ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); @@ -1601,6 +1605,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, * unconditionally. We might have to fix that up. */ fixup_rt_mutex_waiters(lock); + + trace_contention_end(lock, ret); + return ret; } @@ -1683,6 +1690,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) /* Save current state and set state to TASK_RTLOCK_WAIT */ current_save_and_set_rtlock_wait_state(); + trace_contention_begin(lock, LCB_F_RT); + task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); for (;;) { @@ -1712,6 +1721,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) */ fixup_rt_mutex_waiters(lock); debug_rt_mutex_free_waiter(&waiter); + + trace_contention_end(lock, 0); } static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 6fd3162e4098..c201aadb9301 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -112,6 +112,8 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, * Reader2 to call up_read(), which might be unbound. */ + trace_contention_begin(rwb, LCB_F_RT | LCB_F_READ); + /* * For rwlocks this returns 0 unconditionally, so the below * !ret conditionals are optimized out. @@ -130,6 +132,8 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, raw_spin_unlock_irq(&rtm->wait_lock); if (!ret) rwbase_rtmutex_unlock(rtm); + + trace_contention_end(rwb, ret); return ret; } @@ -247,11 +251,13 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, goto out_unlock; rwbase_set_and_save_current_state(state); + trace_contention_begin(rwb, LCB_F_RT | LCB_F_WRITE); for (;;) { /* Optimized out for rwlocks */ if (rwbase_signal_pending_state(state, current)) { rwbase_restore_current_state(); __rwbase_write_unlock(rwb, 0, flags); + trace_contention_end(rwb, -EINTR); return -EINTR; } @@ -265,6 +271,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, set_current_state(state); } rwbase_restore_current_state(); + trace_contention_end(rwb, 0); out_unlock: raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index acde5d6f1254..9d1db4a54d34 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -27,6 +27,7 @@ #include <linux/export.h> #include <linux/rwsem.h> #include <linux/atomic.h> +#include <trace/events/lock.h> #ifndef CONFIG_PREEMPT_RT #include "lock_events.h" @@ -375,16 +376,19 @@ rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) * * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of * this function. Modify with care. + * + * Return: true if wait_list isn't empty and false otherwise */ -static inline void +static inline bool rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) { lockdep_assert_held(&sem->wait_lock); list_del(&waiter->list); if (likely(!list_empty(&sem->wait_list))) - return; + return true; atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count); + return false; } /* @@ -559,6 +563,33 @@ static void rwsem_mark_wake(struct rw_semaphore *sem, } /* + * Remove a waiter and try to wake up other waiters in the wait queue + * This function is called from the out_nolock path of both the reader and + * writer slowpaths with wait_lock held. It releases the wait_lock and + * optionally wake up waiters before it returns. + */ +static inline void +rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter, + struct wake_q_head *wake_q) + __releases(&sem->wait_lock) +{ + bool first = rwsem_first_waiter(sem) == waiter; + + wake_q_init(wake_q); + + /* + * If the wait_list isn't empty and the waiter to be deleted is + * the first waiter, we wake up the remaining waiters as they may + * be eligible to acquire or spin on the lock. + */ + if (rwsem_del_waiter(sem, waiter) && first) + rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q); + raw_spin_unlock_irq(&sem->wait_lock); + if (!wake_q_empty(wake_q)) + wake_up_q(wake_q); +} + +/* * This function must be called with the sem->wait_lock held to prevent * race conditions between checking the rwsem wait list and setting the * sem->count accordingly. @@ -901,7 +932,7 @@ done: */ static inline void clear_nonspinnable(struct rw_semaphore *sem) { - if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)) + if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))) atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner); } @@ -926,6 +957,31 @@ rwsem_spin_on_owner(struct rw_semaphore *sem) #endif /* + * Prepare to wake up waiter(s) in the wait queue by putting them into the + * given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely + * reader-owned, wake up read lock waiters in queue front or wake up any + * front waiter otherwise. + + * This is being called from both reader and writer slow paths. + */ +static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count, + struct wake_q_head *wake_q) +{ + enum rwsem_wake_type wake_type; + + if (count & RWSEM_WRITER_MASK) + return; + + if (count & RWSEM_READER_MASK) { + wake_type = RWSEM_WAKE_READERS; + } else { + wake_type = RWSEM_WAKE_ANY; + clear_nonspinnable(sem); + } + rwsem_mark_wake(sem, wake_type, wake_q); +} + +/* * Wait for the read lock to be granted */ static struct rw_semaphore __sched * @@ -935,7 +991,6 @@ rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int stat long rcnt = (count >> RWSEM_READER_SHIFT); struct rwsem_waiter waiter; DEFINE_WAKE_Q(wake_q); - bool wake = false; /* * To prevent a constant stream of readers from starving a sleeping @@ -977,12 +1032,11 @@ queue: if (list_empty(&sem->wait_list)) { /* * In case the wait queue is empty and the lock isn't owned - * by a writer or has the handoff bit set, this reader can - * exit the slowpath and return immediately as its - * RWSEM_READER_BIAS has already been set in the count. + * by a writer, this reader can exit the slowpath and return + * immediately as its RWSEM_READER_BIAS has already been set + * in the count. */ - if (!(atomic_long_read(&sem->count) & - (RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) { + if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) { /* Provide lock ACQUIRE */ smp_acquire__after_ctrl_dep(); raw_spin_unlock_irq(&sem->wait_lock); @@ -997,22 +1051,13 @@ queue: /* we're now waiting on the lock, but no longer actively locking */ count = atomic_long_add_return(adjustment, &sem->count); - /* - * If there are no active locks, wake the front queued process(es). - * - * If there are no writers and we are first in the queue, - * wake our own waiter to join the existing active readers ! - */ - if (!(count & RWSEM_LOCK_MASK)) { - clear_nonspinnable(sem); - wake = true; - } - if (wake || (!(count & RWSEM_WRITER_MASK) && - (adjustment & RWSEM_FLAG_WAITERS))) - rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); - + rwsem_cond_wake_waiter(sem, count, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); - wake_up_q(&wake_q); + + if (!wake_q_empty(&wake_q)) + wake_up_q(&wake_q); + + trace_contention_begin(sem, LCB_F_READ); /* wait to be given the lock */ for (;;) { @@ -1035,13 +1080,14 @@ queue: __set_current_state(TASK_RUNNING); lockevent_inc(rwsem_rlock); + trace_contention_end(sem, 0); return sem; out_nolock: - rwsem_del_waiter(sem, &waiter); - raw_spin_unlock_irq(&sem->wait_lock); + rwsem_del_wake_waiter(sem, &waiter, &wake_q); __set_current_state(TASK_RUNNING); lockevent_inc(rwsem_rlock_fail); + trace_contention_end(sem, -EINTR); return ERR_PTR(-EINTR); } @@ -1051,7 +1097,6 @@ out_nolock: static struct rw_semaphore __sched * rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) { - long count; struct rwsem_waiter waiter; DEFINE_WAKE_Q(wake_q); @@ -1075,23 +1120,8 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) /* we're now waiting on the lock */ if (rwsem_first_waiter(sem) != &waiter) { - count = atomic_long_read(&sem->count); - - /* - * If there were already threads queued before us and: - * 1) there are no active locks, wake the front - * queued process(es) as the handoff bit might be set. - * 2) there are no active writers and some readers, the lock - * must be read owned; so we try to wake any read lock - * waiters that were queued ahead of us. - */ - if (count & RWSEM_WRITER_MASK) - goto wait; - - rwsem_mark_wake(sem, (count & RWSEM_READER_MASK) - ? RWSEM_WAKE_READERS - : RWSEM_WAKE_ANY, &wake_q); - + rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count), + &wake_q); if (!wake_q_empty(&wake_q)) { /* * We want to minimize wait_lock hold time especially @@ -1099,16 +1129,16 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) */ raw_spin_unlock_irq(&sem->wait_lock); wake_up_q(&wake_q); - wake_q_init(&wake_q); /* Used again, reinit */ raw_spin_lock_irq(&sem->wait_lock); } } else { atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); } -wait: /* wait until we successfully acquire the lock */ set_current_state(state); + trace_contention_begin(sem, LCB_F_WRITE); + for (;;) { if (rwsem_try_write_lock(sem, &waiter)) { /* rwsem_try_write_lock() implies ACQUIRE on success */ @@ -1148,17 +1178,15 @@ trylock_again: __set_current_state(TASK_RUNNING); raw_spin_unlock_irq(&sem->wait_lock); lockevent_inc(rwsem_wlock); + trace_contention_end(sem, 0); return sem; out_nolock: __set_current_state(TASK_RUNNING); raw_spin_lock_irq(&sem->wait_lock); - rwsem_del_waiter(sem, &waiter); - if (!list_empty(&sem->wait_list)) - rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); - raw_spin_unlock_irq(&sem->wait_lock); - wake_up_q(&wake_q); + rwsem_del_wake_waiter(sem, &waiter, &wake_q); lockevent_inc(rwsem_wlock_fail); + trace_contention_end(sem, -EINTR); return ERR_PTR(-EINTR); } diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 9ee381e4d2a4..f2654d2fe43a 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -32,6 +32,7 @@ #include <linux/semaphore.h> #include <linux/spinlock.h> #include <linux/ftrace.h> +#include <trace/events/lock.h> static noinline void __down(struct semaphore *sem); static noinline int __down_interruptible(struct semaphore *sem); @@ -205,7 +206,7 @@ struct semaphore_waiter { * constant, and thus optimised away by the compiler. Likewise the * 'timeout' parameter for the cases without timeouts. */ -static inline int __sched __down_common(struct semaphore *sem, long state, +static inline int __sched ___down_common(struct semaphore *sem, long state, long timeout) { struct semaphore_waiter waiter; @@ -236,6 +237,18 @@ static inline int __sched __down_common(struct semaphore *sem, long state, return -EINTR; } +static inline int __sched __down_common(struct semaphore *sem, long state, + long timeout) +{ + int ret; + + trace_contention_begin(sem, 0); + ret = ___down_common(sem, state, timeout); + trace_contention_end(sem, ret); + + return ret; +} + static noinline void __sched __down(struct semaphore *sem) { __down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index d9272d9061a3..e374c0c923da 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -287,7 +287,7 @@ again: clock = wrap_max(clock, min_clock); clock = wrap_min(clock, max_clock); - if (cmpxchg64(&scd->clock, old_clock, clock) != old_clock) + if (!try_cmpxchg64(&scd->clock, &old_clock, clock)) goto again; return clock; @@ -349,7 +349,7 @@ again: val = remote_clock; } - if (cmpxchg64(ptr, old_val, val) != old_val) + if (!try_cmpxchg64(ptr, &old_val, val)) goto again; return val; diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index ecb0d7052877..f85d7c7caab2 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -102,7 +102,7 @@ void __cpuidle default_idle_call(void) * last -- this is very similar to the entry code. */ trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(_THIS_IP_); + lockdep_hardirqs_on_prepare(); rcu_idle_enter(); lockdep_hardirqs_on(_THIS_IP_); diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index f4938040c228..95b58bd757ce 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -46,7 +46,7 @@ void trace_hardirqs_on(void) this_cpu_write(tracing_irq_cpu, 0); } - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); lockdep_hardirqs_on(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_on); @@ -94,7 +94,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr) this_cpu_write(tracing_irq_cpu, 0); } - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); lockdep_hardirqs_on(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_on_caller); diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh index 8e2da71f1d5f..3a07695e3c89 100755 --- a/scripts/atomic/gen-atomic-fallback.sh +++ b/scripts/atomic/gen-atomic-fallback.sh @@ -164,41 +164,44 @@ gen_xchg_fallbacks() gen_try_cmpxchg_fallback() { + local cmpxchg="$1"; shift; local order="$1"; shift; cat <<EOF -#ifndef arch_try_cmpxchg${order} -#define arch_try_cmpxchg${order}(_ptr, _oldp, _new) \\ +#ifndef arch_try_${cmpxchg}${order} +#define arch_try_${cmpxchg}${order}(_ptr, _oldp, _new) \\ ({ \\ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \\ - ___r = arch_cmpxchg${order}((_ptr), ___o, (_new)); \\ + ___r = arch_${cmpxchg}${order}((_ptr), ___o, (_new)); \\ if (unlikely(___r != ___o)) \\ *___op = ___r; \\ likely(___r == ___o); \\ }) -#endif /* arch_try_cmpxchg${order} */ +#endif /* arch_try_${cmpxchg}${order} */ EOF } gen_try_cmpxchg_fallbacks() { - printf "#ifndef arch_try_cmpxchg_relaxed\n" - printf "#ifdef arch_try_cmpxchg\n" + local cmpxchg="$1"; shift; - gen_basic_fallbacks "arch_try_cmpxchg" + printf "#ifndef arch_try_${cmpxchg}_relaxed\n" + printf "#ifdef arch_try_${cmpxchg}\n" - printf "#endif /* arch_try_cmpxchg */\n\n" + gen_basic_fallbacks "arch_try_${cmpxchg}" + + printf "#endif /* arch_try_${cmpxchg} */\n\n" for order in "" "_acquire" "_release" "_relaxed"; do - gen_try_cmpxchg_fallback "${order}" + gen_try_cmpxchg_fallback "${cmpxchg}" "${order}" done - printf "#else /* arch_try_cmpxchg_relaxed */\n" + printf "#else /* arch_try_${cmpxchg}_relaxed */\n" - gen_order_fallbacks "arch_try_cmpxchg" + gen_order_fallbacks "arch_try_${cmpxchg}" - printf "#endif /* arch_try_cmpxchg_relaxed */\n\n" + printf "#endif /* arch_try_${cmpxchg}_relaxed */\n\n" } cat << EOF @@ -218,7 +221,9 @@ for xchg in "arch_xchg" "arch_cmpxchg" "arch_cmpxchg64"; do gen_xchg_fallbacks "${xchg}" done -gen_try_cmpxchg_fallbacks +for cmpxchg in "cmpxchg" "cmpxchg64"; do + gen_try_cmpxchg_fallbacks "${cmpxchg}" +done grep '^[a-z]' "$1" | while read name meta args; do gen_proto "${meta}" "${name}" "atomic" "int" ${args} diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh index 68f902731d01..77c06526a574 100755 --- a/scripts/atomic/gen-atomic-instrumented.sh +++ b/scripts/atomic/gen-atomic-instrumented.sh @@ -166,7 +166,7 @@ grep '^[a-z]' "$1" | while read name meta args; do done -for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg"; do +for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg" "try_cmpxchg64"; do for order in "" "_acquire" "_release" "_relaxed"; do gen_xchg "${xchg}" "${order}" "" printf "\n" |