diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/entry/common.c | 41 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/gcov/Kconfig | 1 | ||||
-rw-r--r-- | kernel/gcov/gcc_4_7.c | 4 | ||||
-rw-r--r-- | kernel/kprobes.c | 36 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 35 | ||||
-rw-r--r-- | kernel/locking/lockdep_internals.h | 2 | ||||
-rw-r--r-- | kernel/locking/percpu-rwsem.c | 4 | ||||
-rw-r--r-- | kernel/padata.c | 5 | ||||
-rw-r--r-- | kernel/rcu/tasks.h | 2 | ||||
-rw-r--r-- | kernel/seccomp.c | 24 | ||||
-rw-r--r-- | kernel/stackleak.c | 2 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace.c | 38 | ||||
-rw-r--r-- | kernel/trace/trace_events_hist.c | 1 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace_preemptirq.c | 4 |
18 files changed, 165 insertions, 62 deletions
diff --git a/kernel/entry/common.c b/kernel/entry/common.c index fcae019158ca..6fdb6105e6d6 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -60,31 +60,56 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, return ret; } + /* Either of the above might have changed the syscall number */ + syscall = syscall_get_nr(current, regs); + if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, syscall); syscall_enter_audit(regs, syscall); - /* The above might have changed the syscall number */ - return ret ? : syscall_get_nr(current, regs); + return ret ? : syscall; } -noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +static __always_inline long +__syscall_enter_from_user_work(struct pt_regs *regs, long syscall) { unsigned long ti_work; - enter_from_user_mode(regs); - instrumentation_begin(); - - local_irq_enable(); ti_work = READ_ONCE(current_thread_info()->flags); if (ti_work & SYSCALL_ENTER_WORK) syscall = syscall_trace_enter(regs, syscall, ti_work); - instrumentation_end(); return syscall; } +long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall) +{ + return __syscall_enter_from_user_work(regs, syscall); +} + +noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall) +{ + long ret; + + enter_from_user_mode(regs); + + instrumentation_begin(); + local_irq_enable(); + ret = __syscall_enter_from_user_work(regs, syscall); + instrumentation_end(); + + return ret; +} + +noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs) +{ + enter_from_user_mode(regs); + instrumentation_begin(); + local_irq_enable(); + instrumentation_end(); +} + /** * exit_to_user_mode - Fixup state when exiting to user mode * diff --git a/kernel/fork.c b/kernel/fork.c index 4d32190861bd..49677d668de4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3014,7 +3014,7 @@ int unshare_files(struct files_struct **displaced) } int sysctl_max_threads(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int ret; diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index bb4b680e8455..3110c77230c7 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -4,7 +4,6 @@ menu "GCOV-based kernel profiling" config GCOV_KERNEL bool "Enable gcov-based kernel profiling" depends on DEBUG_FS - depends on !CC_IS_GCC || GCC_VERSION < 100000 select CONSTRUCTORS if !UML default n help diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index 908fdf5098c3..53c67c87f141 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -19,7 +19,9 @@ #include <linux/vmalloc.h> #include "gcov.h" -#if (__GNUC__ >= 7) +#if (__GNUC__ >= 10) +#define GCOV_COUNTERS 8 +#elif (__GNUC__ >= 7) #define GCOV_COUNTERS 9 #elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) #define GCOV_COUNTERS 10 diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 287b263c9cb9..e995541d277d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2140,6 +2140,9 @@ static void kill_kprobe(struct kprobe *p) lockdep_assert_held(&kprobe_mutex); + if (WARN_ON_ONCE(kprobe_gone(p))) + return; + p->flags |= KPROBE_FLAG_GONE; if (kprobe_aggrprobe(p)) { /* @@ -2159,9 +2162,10 @@ static void kill_kprobe(struct kprobe *p) /* * The module is going away. We should disarm the kprobe which - * is using ftrace. + * is using ftrace, because ftrace framework is still available at + * MODULE_STATE_GOING notification. */ - if (kprobe_ftrace(p)) + if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed) disarm_kprobe_ftrace(p); } @@ -2419,7 +2423,10 @@ static int kprobes_module_callback(struct notifier_block *nb, mutex_lock(&kprobe_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry(p, head, hlist) + hlist_for_each_entry(p, head, hlist) { + if (kprobe_gone(p)) + continue; + if (within_module_init((unsigned long)p->addr, mod) || (checkcore && within_module_core((unsigned long)p->addr, mod))) { @@ -2436,6 +2443,7 @@ static int kprobes_module_callback(struct notifier_block *nb, */ kill_kprobe(p); } + } } if (val == MODULE_STATE_GOING) remove_module_kprobe_blacklist(mod); @@ -2452,6 +2460,28 @@ static struct notifier_block kprobe_module_nb = { extern unsigned long __start_kprobe_blacklist[]; extern unsigned long __stop_kprobe_blacklist[]; +void kprobe_free_init_mem(void) +{ + void *start = (void *)(&__init_begin); + void *end = (void *)(&__init_end); + struct hlist_head *head; + struct kprobe *p; + int i; + + mutex_lock(&kprobe_mutex); + + /* Kill all kprobes on initmem */ + for (i = 0; i < KPROBE_TABLE_SIZE; i++) { + head = &kprobe_table[i]; + hlist_for_each_entry(p, head, hlist) { + if (start <= (void *)p->addr && (void *)p->addr < end) + kill_kprobe(p); + } + } + + mutex_unlock(&kprobe_mutex); +} + static int __init init_kprobes(void) { int i, err = 0; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 54b74fabf40c..2facbbd146ec 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3969,13 +3969,18 @@ static int separate_irq_context(struct task_struct *curr, static int mark_lock(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit new_bit) { - unsigned int new_mask = 1 << new_bit, ret = 1; + unsigned int old_mask, new_mask, ret = 1; if (new_bit >= LOCK_USAGE_STATES) { DEBUG_LOCKS_WARN_ON(1); return 0; } + if (new_bit == LOCK_USED && this->read) + new_bit = LOCK_USED_READ; + + new_mask = 1 << new_bit; + /* * If already set then do not dirty the cacheline, * nor do any checks: @@ -3988,13 +3993,22 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, /* * Make sure we didn't race: */ - if (unlikely(hlock_class(this)->usage_mask & new_mask)) { - graph_unlock(); - return 1; - } + if (unlikely(hlock_class(this)->usage_mask & new_mask)) + goto unlock; + old_mask = hlock_class(this)->usage_mask; hlock_class(this)->usage_mask |= new_mask; + /* + * Save one usage_traces[] entry and map both LOCK_USED and + * LOCK_USED_READ onto the same entry. + */ + if (new_bit == LOCK_USED || new_bit == LOCK_USED_READ) { + if (old_mask & (LOCKF_USED | LOCKF_USED_READ)) + goto unlock; + new_bit = LOCK_USED; + } + if (!(hlock_class(this)->usage_traces[new_bit] = save_trace())) return 0; @@ -4008,6 +4022,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, return 0; } +unlock: graph_unlock(); /* @@ -4942,12 +4957,20 @@ static void verify_lock_unused(struct lockdep_map *lock, struct held_lock *hlock { #ifdef CONFIG_PROVE_LOCKING struct lock_class *class = look_up_lock_class(lock, subclass); + unsigned long mask = LOCKF_USED; /* if it doesn't have a class (yet), it certainly hasn't been used yet */ if (!class) return; - if (!(class->usage_mask & LOCK_USED)) + /* + * READ locks only conflict with USED, such that if we only ever use + * READ locks, there is no deadlock possible -- RCU. + */ + if (!hlock->read) + mask |= LOCKF_USED_READ; + + if (!(class->usage_mask & mask)) return; hlock->class_idx = class - lock_classes; diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index baca699b94e9..b0be1560ed17 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h @@ -19,6 +19,7 @@ enum lock_usage_bit { #include "lockdep_states.h" #undef LOCKDEP_STATE LOCK_USED, + LOCK_USED_READ, LOCK_USAGE_STATES }; @@ -40,6 +41,7 @@ enum { #include "lockdep_states.h" #undef LOCKDEP_STATE __LOCKF(USED) + __LOCKF(USED_READ) }; #define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE | diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 8bbafe3e5203..70a32a576f3f 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -45,7 +45,7 @@ EXPORT_SYMBOL_GPL(percpu_free_rwsem); static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { - __this_cpu_inc(*sem->read_count); + this_cpu_inc(*sem->read_count); /* * Due to having preemption disabled the decrement happens on @@ -71,7 +71,7 @@ static bool __percpu_down_read_trylock(struct percpu_rw_semaphore *sem) if (likely(!atomic_read_acquire(&sem->block))) return true; - __this_cpu_dec(*sem->read_count); + this_cpu_dec(*sem->read_count); /* Prod writer to re-evaluate readers_active_check() */ rcuwait_wake_up(&sem->writer); diff --git a/kernel/padata.c b/kernel/padata.c index 16cb894dc272..d4d3ba6e1728 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -215,12 +215,13 @@ int padata_do_parallel(struct padata_shell *ps, padata->pd = pd; padata->cb_cpu = *cb_cpu; - rcu_read_unlock_bh(); - spin_lock(&padata_works_lock); padata->seq_nr = ++pd->seq_nr; pw = padata_work_alloc(); spin_unlock(&padata_works_lock); + + rcu_read_unlock_bh(); + if (pw) { padata_work_init(pw, padata_parallel_worker, padata, 0); queue_work(pinst->parallel_wq, &pw->pw_work); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 835e2df8590a..05d3e1375e4c 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -590,7 +590,7 @@ void exit_tasks_rcu_finish(void) __releases(&tasks_rcu_exit_srcu) } #else /* #ifdef CONFIG_TASKS_RCU */ -static void show_rcu_tasks_classic_gp_kthread(void) { } +static inline void show_rcu_tasks_classic_gp_kthread(void) { } void exit_tasks_rcu_start(void) { } void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } #endif /* #else #ifdef CONFIG_TASKS_RCU */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 3ee59ce0a323..676d4af62103 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1109,13 +1109,18 @@ out: } #ifdef CONFIG_SECCOMP_FILTER -static int seccomp_notify_release(struct inode *inode, struct file *file) +static void seccomp_notify_free(struct seccomp_filter *filter) +{ + kfree(filter->notif); + filter->notif = NULL; +} + +static void seccomp_notify_detach(struct seccomp_filter *filter) { - struct seccomp_filter *filter = file->private_data; struct seccomp_knotif *knotif; if (!filter) - return 0; + return; mutex_lock(&filter->notify_lock); @@ -1139,9 +1144,15 @@ static int seccomp_notify_release(struct inode *inode, struct file *file) complete(&knotif->ready); } - kfree(filter->notif); - filter->notif = NULL; + seccomp_notify_free(filter); mutex_unlock(&filter->notify_lock); +} + +static int seccomp_notify_release(struct inode *inode, struct file *file) +{ + struct seccomp_filter *filter = file->private_data; + + seccomp_notify_detach(filter); __put_seccomp_filter(filter); return 0; } @@ -1488,7 +1499,7 @@ static struct file *init_listener(struct seccomp_filter *filter) out_notif: if (IS_ERR(ret)) - kfree(filter->notif); + seccomp_notify_free(filter); out: return ret; } @@ -1581,6 +1592,7 @@ out_put_fd: listener_f->private_data = NULL; fput(listener_f); put_unused_fd(listener); + seccomp_notify_detach(prepared); } else { fd_install(listener, listener_f); ret = listener; diff --git a/kernel/stackleak.c b/kernel/stackleak.c index a8fc9ae1d03d..ce161a8e8d97 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -20,7 +20,7 @@ static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass); int stack_erasing_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; int state = !static_branch_unlikely(&stack_erasing_bypass); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 09e70ee2332e..afad085960b8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2912,6 +2912,14 @@ static struct ctl_table vm_table[] = { .proc_handler = percpu_pagelist_fraction_sysctl_handler, .extra1 = SYSCTL_ZERO, }, + { + .procname = "page_lock_unfairness", + .data = &sysctl_page_lock_unfairness, + .maxlen = sizeof(sysctl_page_lock_unfairness), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, #ifdef CONFIG_MMU { .procname = "max_map_count", diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 275441254bb5..603255f5f085 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2782,6 +2782,7 @@ static void ftrace_remove_trampoline_from_kallsyms(struct ftrace_ops *ops) { lockdep_assert_held(&ftrace_lock); list_del_rcu(&ops->list); + synchronize_rcu(); } /* @@ -2862,6 +2863,8 @@ int ftrace_startup(struct ftrace_ops *ops, int command) __unregister_ftrace_function(ops); ftrace_start_up--; ops->flags &= ~FTRACE_OPS_FL_ENABLED; + if (ops->flags & FTRACE_OPS_FL_DYNAMIC) + ftrace_trampoline_free(ops); return ret; } @@ -7531,8 +7534,7 @@ static bool is_permanent_ops_registered(void) int ftrace_enable_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = -ENODEV; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f40d850ebabc..2a7c26345e83 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3782,14 +3782,14 @@ unsigned long trace_total_entries(struct trace_array *tr) static void print_lat_help_header(struct seq_file *m) { - seq_puts(m, "# _------=> CPU# \n" - "# / _-----=> irqs-off \n" - "# | / _----=> need-resched \n" - "# || / _---=> hardirq/softirq \n" - "# ||| / _--=> preempt-depth \n" - "# |||| / delay \n" - "# cmd pid ||||| time | caller \n" - "# \\ / ||||| \\ | / \n"); + seq_puts(m, "# _------=> CPU# \n" + "# / _-----=> irqs-off \n" + "# | / _----=> need-resched \n" + "# || / _---=> hardirq/softirq \n" + "# ||| / _--=> preempt-depth \n" + "# |||| / delay \n" + "# cmd pid ||||| time | caller \n" + "# \\ / ||||| \\ | / \n"); } static void print_event_info(struct array_buffer *buf, struct seq_file *m) @@ -3810,26 +3810,26 @@ static void print_func_help_header(struct array_buffer *buf, struct seq_file *m, print_event_info(buf, m); - seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); - seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); + seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : ""); + seq_printf(m, "# | | %s | | |\n", tgid ? " | " : ""); } static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m, unsigned int flags) { bool tgid = flags & TRACE_ITER_RECORD_TGID; - const char *space = " "; - int prec = tgid ? 10 : 2; + const char *space = " "; + int prec = tgid ? 12 : 2; print_event_info(buf, m); - seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); - seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); - seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); - seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); - seq_printf(m, "# %.*s||| / delay\n", prec, space); - seq_printf(m, "# TASK-PID %.*sCPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); - seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); + seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space); + seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); + seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); + seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); + seq_printf(m, "# %.*s||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); } void diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0b933546142e..1b2ef6490229 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3865,7 +3865,6 @@ static int parse_var_defs(struct hist_trigger_data *hist_data) s = kstrdup(field_str, GFP_KERNEL); if (!s) { - kfree(hist_data->attrs->var_defs.name[n_vars]); ret = -ENOMEM; goto free; } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 4d1893564912..000e9dc224c6 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -497,7 +497,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%8.8s-%-5d %3d", + trace_seq_printf(s, "%8.8s-%-7d %3d", comm, entry->pid, cpu); return trace_print_lat_fmt(s, entry); @@ -588,15 +588,15 @@ int trace_print_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); - trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid); + trace_seq_printf(s, "%16s-%-7d ", comm, entry->pid); if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { unsigned int tgid = trace_find_tgid(entry->pid); if (!tgid) - trace_seq_printf(s, "(-----) "); + trace_seq_printf(s, "(-------) "); else - trace_seq_printf(s, "(%5d) ", tgid); + trace_seq_printf(s, "(%7d) ", tgid); } trace_seq_printf(s, "[%03d] ", iter->cpu); @@ -636,7 +636,7 @@ int trace_print_lat_context(struct trace_iterator *iter) trace_find_cmdline(entry->pid, comm); trace_seq_printf( - s, "%16s %5d %3d %d %08x %08lx ", + s, "%16s %7d %3d %d %08x %08lx ", comm, entry->pid, iter->cpu, entry->flags, entry->preempt_count, iter->idx); } else { @@ -917,7 +917,7 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, S = task_index_to_char(field->prev_state); trace_find_cmdline(field->next_pid, comm); trace_seq_printf(&iter->seq, - " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", + " %7d:%3d:%c %s [%03d] %7d:%3d:%c %s\n", field->prev_pid, field->prev_prio, S, delim, diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index f10073e62603..f4938040c228 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -102,14 +102,14 @@ NOKPROBE_SYMBOL(trace_hardirqs_on_caller); __visible void trace_hardirqs_off_caller(unsigned long caller_addr) { + lockdep_hardirqs_off(CALLER_ADDR0); + if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, caller_addr); if (!in_nmi()) trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); } - - lockdep_hardirqs_off(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_off_caller); NOKPROBE_SYMBOL(trace_hardirqs_off_caller); |