diff options
author | Ingo Molnar <mingo@kernel.org> | 2012-08-28 18:05:55 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2012-08-28 18:05:55 +0200 |
commit | 508dc4f8eece1a70c493afc6511fdf5934fef567 (patch) | |
tree | b852a6a5bf9977a33a87158985de02d188ba71d7 /kernel | |
parent | 734e9a26d612f64e1c9cfd92256969b773954ae2 (diff) | |
parent | e3e45c01ae690e65f2650e5288b9af802e95a136 (diff) |
Merge branch 'perf/urgent' into perf/core
Pick up the latest fixes because upcoming uprobes changes will rely on it.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit_tree.c | 19 | ||||
-rw-r--r-- | kernel/fork.c | 4 | ||||
-rw-r--r-- | kernel/power/suspend.c | 3 | ||||
-rw-r--r-- | kernel/printk.c | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 35 | ||||
-rw-r--r-- | kernel/sched/fair.c | 11 | ||||
-rw-r--r-- | kernel/sched/rt.c | 13 | ||||
-rw-r--r-- | kernel/sched/sched.h | 8 | ||||
-rw-r--r-- | kernel/sched/stop_task.c | 22 | ||||
-rw-r--r-- | kernel/task_work.c | 1 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 68 | ||||
-rw-r--r-- | kernel/timer.c | 9 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 4 | ||||
-rw-r--r-- | kernel/watchdog.c | 21 |
14 files changed, 142 insertions, 78 deletions
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 3a5ca582ba1e..ed206fd88cca 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -250,7 +250,6 @@ static void untag_chunk(struct node *p) spin_unlock(&hash_lock); spin_unlock(&entry->lock); fsnotify_destroy_mark(entry); - fsnotify_put_mark(entry); goto out; } @@ -259,7 +258,7 @@ static void untag_chunk(struct node *p) fsnotify_duplicate_mark(&new->mark, entry); if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { - free_chunk(new); + fsnotify_put_mark(&new->mark); goto Fallback; } @@ -293,7 +292,7 @@ static void untag_chunk(struct node *p) spin_unlock(&hash_lock); spin_unlock(&entry->lock); fsnotify_destroy_mark(entry); - fsnotify_put_mark(entry); + fsnotify_put_mark(&new->mark); /* drop initial reference */ goto out; Fallback: @@ -322,7 +321,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) entry = &chunk->mark; if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { - free_chunk(chunk); + fsnotify_put_mark(entry); return -ENOSPC; } @@ -347,6 +346,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree) insert_hash(chunk); spin_unlock(&hash_lock); spin_unlock(&entry->lock); + fsnotify_put_mark(entry); /* drop initial reference */ return 0; } @@ -396,7 +396,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) fsnotify_duplicate_mark(chunk_entry, old_entry); if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) { spin_unlock(&old_entry->lock); - free_chunk(chunk); + fsnotify_put_mark(chunk_entry); fsnotify_put_mark(old_entry); return -ENOSPC; } @@ -444,8 +444,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) spin_unlock(&chunk_entry->lock); spin_unlock(&old_entry->lock); fsnotify_destroy_mark(old_entry); + fsnotify_put_mark(chunk_entry); /* drop initial reference */ fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ - fsnotify_put_mark(old_entry); /* and kill it */ return 0; } @@ -916,7 +916,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark); evict_chunk(chunk); - fsnotify_put_mark(entry); + + /* + * We are guaranteed to have at least one reference to the mark from + * either the inode or the caller of fsnotify_destroy_mark(). + */ + BUG_ON(atomic_read(&entry->refcnt) < 1); } static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode, diff --git a/kernel/fork.c b/kernel/fork.c index 3bd2280d79f6..2c8857e12855 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -455,8 +455,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; - if (file && uprobe_mmap(tmp)) - goto out; + if (file) + uprobe_mmap(tmp); } /* a new mm has just been created */ arch_dup_mmap(oldmm, mm); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 1da39ea248fd..c8b7446b27df 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -178,9 +178,6 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); - /* Kick the lockup detector */ - lockup_detector_bootcpu_resume(); - Enable_cpus: enable_nonboot_cpus(); diff --git a/kernel/printk.c b/kernel/printk.c index 6a76ab9d4476..66a2ea37b576 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1034,6 +1034,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) struct log *msg = log_from_idx(idx); len += msg_print_text(msg, prev, true, NULL, 0); + prev = msg->flags; idx = log_next(idx); seq++; } @@ -1046,6 +1047,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) struct log *msg = log_from_idx(idx); len -= msg_print_text(msg, prev, true, NULL, 0); + prev = msg->flags; idx = log_next(idx); seq++; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 82ad284f823b..fbf1fd098dc6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) #endif +static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) +{ + u64 temp = (__force u64) rtime; + + temp *= (__force u64) utime; + + if (sizeof(cputime_t) == 4) + temp = div_u64(temp, (__force u32) total); + else + temp = div64_u64(temp, (__force u64) total); + + return (__force cputime_t) temp; +} + void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { cputime_t rtime, utime = p->utime, total = utime + p->stime; @@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) */ rtime = nsecs_to_cputime(p->se.sum_exec_runtime); - if (total) { - u64 temp = (__force u64) rtime; - - temp *= (__force u64) utime; - do_div(temp, (__force u32) total); - utime = (__force cputime_t) temp; - } else + if (total) + utime = scale_utime(utime, rtime, total); + else utime = rtime; /* @@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) total = cputime.utime + cputime.stime; rtime = nsecs_to_cputime(cputime.sum_exec_runtime); - if (total) { - u64 temp = (__force u64) rtime; - - temp *= (__force u64) cputime.utime; - do_div(temp, (__force u32) total); - utime = (__force cputime_t) temp; - } else + if (total) + utime = scale_utime(cputime.utime, rtime, total); + else utime = rtime; sig->prev_utime = max(sig->prev_utime, utime); @@ -7246,6 +7252,7 @@ int in_sched_functions(unsigned long addr) #ifdef CONFIG_CGROUP_SCHED struct task_group root_task_group; +LIST_HEAD(task_groups); #endif DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d0cc03b3e70b..c219bf8d704c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3387,6 +3387,14 @@ static int tg_load_down(struct task_group *tg, void *data) static void update_h_load(long cpu) { + struct rq *rq = cpu_rq(cpu); + unsigned long now = jiffies; + + if (rq->h_load_throttle == now) + return; + + rq->h_load_throttle = now; + rcu_read_lock(); walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); rcu_read_unlock(); @@ -4293,11 +4301,10 @@ redo: env.src_rq = busiest; env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running); + update_h_load(env.src_cpu); more_balance: local_irq_save(flags); double_rq_lock(this_rq, busiest); - if (!env.loop) - update_h_load(env.src_cpu); /* * cur_ld_moved - load moved in current iteration diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 573e1ca01102..944cb68420e9 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -788,6 +788,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) const struct cpumask *span; span = sched_rt_period_mask(); +#ifdef CONFIG_RT_GROUP_SCHED + /* + * FIXME: isolated CPUs should really leave the root task group, + * whether they are isolcpus or were isolated via cpusets, lest + * the timer run on a CPU which does not service all runqueues, + * potentially leaving other CPUs indefinitely throttled. If + * isolation is really required, the user will turn the throttle + * off to kill the perturbations it causes anyway. Meanwhile, + * this maintains functionality for boot and/or troubleshooting. + */ + if (rt_b == &root_task_group.rt_bandwidth) + span = cpu_online_mask; +#endif for_each_cpu(i, span) { int enqueue = 0; struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c35a1a7dd4d6..f6714d009e77 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex; struct cfs_rq; struct rt_rq; -static LIST_HEAD(task_groups); +extern struct list_head task_groups; struct cfs_bandwidth { #ifdef CONFIG_CFS_BANDWIDTH @@ -374,7 +374,11 @@ struct rq { #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ struct list_head leaf_cfs_rq_list; -#endif +#ifdef CONFIG_SMP + unsigned long h_load_throttle; +#endif /* CONFIG_SMP */ +#endif /* CONFIG_FAIR_GROUP_SCHED */ + #ifdef CONFIG_RT_GROUP_SCHED struct list_head leaf_rt_rq_list; #endif diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 7b386e86fd23..da5eb5bed84a 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) { struct task_struct *stop = rq->stop; - if (stop && stop->on_rq) + if (stop && stop->on_rq) { + stop->se.exec_start = rq->clock_task; return stop; + } return NULL; } @@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq) static void put_prev_task_stop(struct rq *rq, struct task_struct *prev) { + struct task_struct *curr = rq->curr; + u64 delta_exec; + + delta_exec = rq->clock_task - curr->se.exec_start; + if (unlikely((s64)delta_exec < 0)) + delta_exec = 0; + + schedstat_set(curr->se.statistics.exec_max, + max(curr->se.statistics.exec_max, delta_exec)); + + curr->se.sum_exec_runtime += delta_exec; + account_group_exec_runtime(curr, delta_exec); + + curr->se.exec_start = rq->clock_task; + cpuacct_charge(curr, delta_exec); } static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) @@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) static void set_curr_task_stop(struct rq *rq) { + struct task_struct *stop = rq->stop; + + stop->se.exec_start = rq->clock_task; } static void switched_to_stop(struct rq *rq, struct task_struct *p) diff --git a/kernel/task_work.c b/kernel/task_work.c index 91d4e1742a0c..d320d44903bd 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -75,6 +75,7 @@ void task_work_run(void) p = q->next; q->func(q); q = p; + cond_resched(); } } } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2988bc819187..0c1485e42be6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -115,6 +115,7 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) { tk->xtime_sec += ts->tv_sec; tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; + tk_normalize_xtime(tk); } static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) @@ -276,7 +277,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) tk->xtime_nsec += cycle_delta * tk->mult; /* If arch requires, add in gettimeoffset() */ - tk->xtime_nsec += arch_gettimeoffset() << tk->shift; + tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift; tk_normalize_xtime(tk); @@ -427,7 +428,7 @@ int do_settimeofday(const struct timespec *tv) struct timespec ts_delta, xt; unsigned long flags; - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) + if (!timespec_valid(tv)) return -EINVAL; write_seqlock_irqsave(&tk->lock, flags); @@ -463,6 +464,8 @@ int timekeeping_inject_offset(struct timespec *ts) { struct timekeeper *tk = &timekeeper; unsigned long flags; + struct timespec tmp; + int ret = 0; if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; @@ -471,10 +474,17 @@ int timekeeping_inject_offset(struct timespec *ts) timekeeping_forward_now(tk); + /* Make sure the proposed value is valid */ + tmp = timespec_add(tk_xtime(tk), *ts); + if (!timespec_valid(&tmp)) { + ret = -EINVAL; + goto error; + } tk_xtime_add(tk, ts); tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); +error: /* even if we error out, we forwarded the time, so call update */ timekeeping_update(tk, true); write_sequnlock_irqrestore(&tk->lock, flags); @@ -482,7 +492,7 @@ int timekeeping_inject_offset(struct timespec *ts) /* signal hrtimers about time change */ clock_was_set(); - return 0; + return ret; } EXPORT_SYMBOL(timekeeping_inject_offset); @@ -649,7 +659,20 @@ void __init timekeeping_init(void) struct timespec now, boot, tmp; read_persistent_clock(&now); + if (!timespec_valid(&now)) { + pr_warn("WARNING: Persistent clock returned invalid value!\n" + " Check your CMOS/BIOS settings.\n"); + now.tv_sec = 0; + now.tv_nsec = 0; + } + read_boot_clock(&boot); + if (!timespec_valid(&boot)) { + pr_warn("WARNING: Boot clock returned invalid value!\n" + " Check your CMOS/BIOS settings.\n"); + boot.tv_sec = 0; + boot.tv_nsec = 0; + } seqlock_init(&tk->lock); @@ -923,20 +946,22 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) if (likely(error <= interval)) adj = 1; else - adj = timekeeping_bigadjust(tk, error, &interval, - &offset); - } else if (error < -interval) { - /* See comment above, this is just switched for the negative */ - error >>= 2; - if (likely(error >= -interval)) { - adj = -1; - interval = -interval; - offset = -offset; - } else - adj = timekeeping_bigadjust(tk, error, &interval, - &offset); - } else - return; + adj = timekeeping_bigadjust(tk, error, &interval, &offset); + } else { + if (error < -interval) { + /* See comment above, this is just switched for the negative */ + error >>= 2; + if (likely(error >= -interval)) { + adj = -1; + interval = -interval; + offset = -offset; + } else { + adj = timekeeping_bigadjust(tk, error, &interval, &offset); + } + } else { + goto out_adjust; + } + } if (unlikely(tk->clock->maxadj && (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { @@ -999,6 +1024,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) tk->xtime_nsec -= offset; tk->ntp_error -= (interval - offset) << tk->ntp_error_shift; +out_adjust: /* * It may be possible that when we entered this function, xtime_nsec * was very small. Further, if we're slightly speeding the clocksource @@ -1126,6 +1152,10 @@ static void update_wall_time(void) offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #endif + /* Check if there's really nothing to do */ + if (offset < tk->cycle_interval) + goto out; + /* * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, @@ -1158,9 +1188,9 @@ static void update_wall_time(void) * the vsyscall implementations are converted to use xtime_nsec * (shifted nanoseconds), this can be killed. */ - remainder = tk->xtime_nsec & ((1 << tk->shift) - 1); + remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); tk->xtime_nsec -= remainder; - tk->xtime_nsec += 1 << tk->shift; + tk->xtime_nsec += 1ULL << tk->shift; tk->ntp_error += remainder << tk->ntp_error_shift; /* diff --git a/kernel/timer.c b/kernel/timer.c index a61c09374eba..8c5e7b908c68 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1407,13 +1407,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds) #endif -#ifndef __alpha__ - -/* - * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this - * should be moved into arch/i386 instead? - */ - /** * sys_getpid - return the thread group id of the current process * @@ -1469,8 +1462,6 @@ SYSCALL_DEFINE0(getegid) return from_kgid_munged(current_user_ns(), current_egid()); } -#endif - static void process_timeout(unsigned long __data) { wake_up_process((struct task_struct *)__data); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 60e4d7875672..6b245f64c8dd 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -506,6 +506,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) int size; syscall_nr = syscall_get_nr(current, regs); + if (syscall_nr < 0) + return; if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) return; @@ -580,6 +582,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) int size; syscall_nr = syscall_get_nr(current, regs); + if (syscall_nr < 0) + return; if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) return; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 69add8a9da68..4b1dfba70f7c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -575,7 +575,7 @@ out: /* * Create/destroy watchdog threads as CPUs come and go: */ -static int +static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; @@ -610,27 +610,10 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -static struct notifier_block cpu_nfb = { +static struct notifier_block __cpuinitdata cpu_nfb = { .notifier_call = cpu_callback }; -#ifdef CONFIG_SUSPEND -/* - * On exit from suspend we force an offline->online transition on the boot CPU - * so that the PMU state that was lost while in suspended state gets set up - * properly for the boot CPU. This information is required for restarting the - * NMI watchdog. - */ -void lockup_detector_bootcpu_resume(void) -{ - void *cpu = (void *)(long)smp_processor_id(); - - cpu_callback(&cpu_nfb, CPU_DEAD_FROZEN, cpu); - cpu_callback(&cpu_nfb, CPU_UP_PREPARE_FROZEN, cpu); - cpu_callback(&cpu_nfb, CPU_ONLINE_FROZEN, cpu); -} -#endif - void __init lockup_detector_init(void) { void *cpu = (void *)(long)smp_processor_id(); |