diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-09-06 15:31:03 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-09-06 15:31:03 +0200 |
commit | 77dd3b3bd23111040c504be6bd873a5ad09f02df (patch) | |
tree | 119ddcdf89d6a593a8fefea530249084559eeb44 /kernel | |
parent | 916c7a855174e3b53d182b97a26b2e27a29726a1 (diff) | |
parent | 70bb08962ea9bd50797ae9f16b2493f5f7c65053 (diff) |
Merge branch 'linus' into timers/ntp
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditsc.c | 3 | ||||
-rw-r--r-- | kernel/exit.c | 88 | ||||
-rw-r--r-- | kernel/lockdep.c | 6 | ||||
-rw-r--r-- | kernel/lockdep_proc.c | 3 | ||||
-rw-r--r-- | kernel/module.c | 2 | ||||
-rw-r--r-- | kernel/nsproxy.c | 1 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 3 | ||||
-rw-r--r-- | kernel/pm_qos_params.c | 25 | ||||
-rw-r--r-- | kernel/power/disk.c | 13 | ||||
-rw-r--r-- | kernel/power/main.c | 5 | ||||
-rw-r--r-- | kernel/power/swap.c | 1 | ||||
-rw-r--r-- | kernel/rcupdate.c | 1 | ||||
-rw-r--r-- | kernel/resource.c | 88 | ||||
-rw-r--r-- | kernel/sched.c | 59 | ||||
-rw-r--r-- | kernel/sched_clock.c | 84 | ||||
-rw-r--r-- | kernel/sched_features.h | 2 | ||||
-rw-r--r-- | kernel/sched_rt.c | 13 | ||||
-rw-r--r-- | kernel/smp.c | 10 | ||||
-rw-r--r-- | kernel/softlockup.c | 3 | ||||
-rw-r--r-- | kernel/sysctl.c | 1 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 9 | ||||
-rw-r--r-- | kernel/user_namespace.c | 1 | ||||
-rw-r--r-- | kernel/utsname.c | 1 | ||||
-rw-r--r-- | kernel/utsname_sysctl.c | 1 |
24 files changed, 253 insertions, 170 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 972f8e61d36a..59cedfb040e7 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -243,10 +243,11 @@ static inline int open_arg(int flags, int mask) static int audit_match_perm(struct audit_context *ctx, int mask) { + unsigned n; if (unlikely(!ctx)) return 0; - unsigned n = ctx->major; + n = ctx->major; switch (audit_classify_syscall(ctx->arch, n)) { case 0: /* native */ if ((mask & AUDIT_PERM_WRITE) && diff --git a/kernel/exit.c b/kernel/exit.c index 38ec40630149..16395644a98f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -112,9 +112,9 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime = cputime_add(sig->utime, tsk->utime); - sig->stime = cputime_add(sig->stime, tsk->stime); - sig->gtime = cputime_add(sig->gtime, tsk->gtime); + sig->utime = cputime_add(sig->utime, task_utime(tsk)); + sig->stime = cputime_add(sig->stime, task_stime(tsk)); + sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; @@ -831,26 +831,50 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father) * the child reaper process (ie "init") in our pid * space. */ +static struct task_struct *find_new_reaper(struct task_struct *father) +{ + struct pid_namespace *pid_ns = task_active_pid_ns(father); + struct task_struct *thread; + + thread = father; + while_each_thread(father, thread) { + if (thread->flags & PF_EXITING) + continue; + if (unlikely(pid_ns->child_reaper == father)) + pid_ns->child_reaper = thread; + return thread; + } + + if (unlikely(pid_ns->child_reaper == father)) { + write_unlock_irq(&tasklist_lock); + if (unlikely(pid_ns == &init_pid_ns)) + panic("Attempted to kill init!"); + + zap_pid_ns_processes(pid_ns); + write_lock_irq(&tasklist_lock); + /* + * We can not clear ->child_reaper or leave it alone. + * There may by stealth EXIT_DEAD tasks on ->children, + * forget_original_parent() must move them somewhere. + */ + pid_ns->child_reaper = init_pid_ns.child_reaper; + } + + return pid_ns->child_reaper; +} + static void forget_original_parent(struct task_struct *father) { - struct task_struct *p, *n, *reaper = father; + struct task_struct *p, *n, *reaper; LIST_HEAD(ptrace_dead); write_lock_irq(&tasklist_lock); - + reaper = find_new_reaper(father); /* * First clean up ptrace if we were using it. */ ptrace_exit(father, &ptrace_dead); - do { - reaper = next_thread(reaper); - if (reaper == father) { - reaper = task_child_reaper(father); - break; - } - } while (reaper->flags & PF_EXITING); - list_for_each_entry_safe(p, n, &father->children, sibling) { p->real_parent = reaper; if (p->parent == father) { @@ -918,8 +942,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead) /* mt-exec, de_thread() is waiting for us */ if (thread_group_leader(tsk) && - tsk->signal->notify_count < 0 && - tsk->signal->group_exit_task) + tsk->signal->group_exit_task && + tsk->signal->notify_count < 0) wake_up_process(tsk->signal->group_exit_task); write_unlock_irq(&tasklist_lock); @@ -959,39 +983,6 @@ static void check_stack_usage(void) static inline void check_stack_usage(void) {} #endif -static inline void exit_child_reaper(struct task_struct *tsk) -{ - if (likely(tsk->group_leader != task_child_reaper(tsk))) - return; - - if (tsk->nsproxy->pid_ns == &init_pid_ns) - panic("Attempted to kill init!"); - - /* - * @tsk is the last thread in the 'cgroup-init' and is exiting. - * Terminate all remaining processes in the namespace and reap them - * before exiting @tsk. - * - * Note that @tsk (last thread of cgroup-init) may not necessarily - * be the child-reaper (i.e main thread of cgroup-init) of the - * namespace i.e the child_reaper may have already exited. - * - * Even after a child_reaper exits, we let it inherit orphaned children, - * because, pid_ns->child_reaper remains valid as long as there is - * at least one living sub-thread in the cgroup init. - - * This living sub-thread of the cgroup-init will be notified when - * a child inherited by the 'child-reaper' exits (do_notify_parent() - * uses __group_send_sig_info()). Further, when reaping child processes, - * do_wait() iterates over children of all living sub threads. - - * i.e even though 'child_reaper' thread is listed as the parent of the - * orphaned children, any living sub-thread in the cgroup-init can - * perform the role of the child_reaper. - */ - zap_pid_ns_processes(tsk->nsproxy->pid_ns); -} - NORET_TYPE void do_exit(long code) { struct task_struct *tsk = current; @@ -1051,7 +1042,6 @@ NORET_TYPE void do_exit(long code) } group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { - exit_child_reaper(tsk); hrtimer_cancel(&tsk->signal->real_timer); exit_itimers(tsk->signal); } diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 3bfb1877a003..dbda475b13bd 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -875,11 +875,11 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, if (!entry) return 0; - entry->class = this; - entry->distance = distance; if (!save_trace(&entry->trace)) return 0; + entry->class = this; + entry->distance = distance; /* * Since we never remove from the dependency list, the list can * be walked lockless by other CPUs, it's only allocation @@ -3029,7 +3029,7 @@ found_it: stats = get_lock_stats(hlock_class(hlock)); if (point < ARRAY_SIZE(stats->contention_point)) - stats->contention_point[i]++; + stats->contention_point[point]++; if (lock->cpu != smp_processor_id()) stats->bounces[bounce_contended + !!hlock->read]++; put_lock_stats(stats); diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 4b194d34d77f..20dbcbf9c7dd 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -472,8 +472,9 @@ static void snprint_time(char *buf, size_t bufsiz, s64 nr) { unsigned long rem; + nr += 5; /* for display rounding */ rem = do_div(nr, 1000); /* XXX: do_div_signed */ - snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, ((int)rem+5)/10); + snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10); } static void seq_time(struct seq_file *m, s64 time) diff --git a/kernel/module.c b/kernel/module.c index 08864d257eb0..9db11911e04b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1799,7 +1799,7 @@ static void *module_alloc_update_bounds(unsigned long size) /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ -static struct module *load_module(void __user *umod, +static noinline struct module *load_module(void __user *umod, unsigned long len, const char __user *uargs) { diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 21575fc46d05..1d3ef29a2583 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -14,7 +14,6 @@ */ #include <linux/module.h> -#include <linux/version.h> #include <linux/nsproxy.h> #include <linux/init_task.h> #include <linux/mnt_namespace.h> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index ea567b78d1aa..fab8ea86fac3 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -179,9 +179,6 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) rc = sys_wait4(-1, NULL, __WALL, NULL); } while (rc != -ECHILD); - - /* Child reaper for the pid namespace is going away */ - pid_ns->child_reaper = NULL; acct_exit_ns(pid_ns); return; } diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index da9c2dda6a4e..dfdec524d1b7 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -43,7 +43,7 @@ #include <linux/uaccess.h> /* - * locking rule: all changes to target_value or requirements or notifiers lists + * locking rule: all changes to requirements or notifiers lists * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock * held, taken with _irqsave. One lock to rule them all */ @@ -66,7 +66,7 @@ struct pm_qos_object { struct miscdevice pm_qos_power_miscdev; char *name; s32 default_value; - s32 target_value; + atomic_t target_value; s32 (*comparitor)(s32, s32); }; @@ -77,7 +77,7 @@ static struct pm_qos_object cpu_dma_pm_qos = { .notifiers = &cpu_dma_lat_notifier, .name = "cpu_dma_latency", .default_value = 2000 * USEC_PER_SEC, - .target_value = 2000 * USEC_PER_SEC, + .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC), .comparitor = min_compare }; @@ -87,7 +87,7 @@ static struct pm_qos_object network_lat_pm_qos = { .notifiers = &network_lat_notifier, .name = "network_latency", .default_value = 2000 * USEC_PER_SEC, - .target_value = 2000 * USEC_PER_SEC, + .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC), .comparitor = min_compare }; @@ -99,7 +99,7 @@ static struct pm_qos_object network_throughput_pm_qos = { .notifiers = &network_throughput_notifier, .name = "network_throughput", .default_value = 0, - .target_value = 0, + .target_value = ATOMIC_INIT(0), .comparitor = max_compare }; @@ -150,11 +150,11 @@ static void update_target(int target) extreme_value = pm_qos_array[target]->comparitor( extreme_value, node->value); } - if (pm_qos_array[target]->target_value != extreme_value) { + if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) { call_notifier = 1; - pm_qos_array[target]->target_value = extreme_value; + atomic_set(&pm_qos_array[target]->target_value, extreme_value); pr_debug(KERN_ERR "new target for qos %d is %d\n", target, - pm_qos_array[target]->target_value); + atomic_read(&pm_qos_array[target]->target_value)); } spin_unlock_irqrestore(&pm_qos_lock, flags); @@ -193,14 +193,7 @@ static int find_pm_qos_object_by_minor(int minor) */ int pm_qos_requirement(int pm_qos_class) { - int ret_val; - unsigned long flags; - - spin_lock_irqsave(&pm_qos_lock, flags); - ret_val = pm_qos_array[pm_qos_class]->target_value; - spin_unlock_irqrestore(&pm_qos_lock, flags); - - return ret_val; + return atomic_read(&pm_qos_array[pm_qos_class]->target_value); } EXPORT_SYMBOL_GPL(pm_qos_requirement); diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f011e0870b52..bbd85c60f741 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -21,6 +21,7 @@ #include <linux/console.h> #include <linux/cpu.h> #include <linux/freezer.h> +#include <linux/ftrace.h> #include "power.h" @@ -255,7 +256,7 @@ static int create_image(int platform_mode) int hibernation_snapshot(int platform_mode) { - int error; + int error, ftrace_save; /* Free memory before shutting down devices. */ error = swsusp_shrink_memory(); @@ -267,6 +268,7 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); + ftrace_save = __ftrace_enabled_save(); error = device_suspend(PMSG_FREEZE); if (error) goto Recover_platform; @@ -296,6 +298,7 @@ int hibernation_snapshot(int platform_mode) Resume_devices: device_resume(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + __ftrace_enabled_restore(ftrace_save); resume_console(); Close: platform_end(platform_mode); @@ -366,10 +369,11 @@ static int resume_target_kernel(void) int hibernation_restore(int platform_mode) { - int error; + int error, ftrace_save; pm_prepare_console(); suspend_console(); + ftrace_save = __ftrace_enabled_save(); error = device_suspend(PMSG_QUIESCE); if (error) goto Finish; @@ -384,6 +388,7 @@ int hibernation_restore(int platform_mode) platform_restore_cleanup(platform_mode); device_resume(PMSG_RECOVER); Finish: + __ftrace_enabled_restore(ftrace_save); resume_console(); pm_restore_console(); return error; @@ -396,7 +401,7 @@ int hibernation_restore(int platform_mode) int hibernation_platform_enter(void) { - int error; + int error, ftrace_save; if (!hibernation_ops) return -ENOSYS; @@ -411,6 +416,7 @@ int hibernation_platform_enter(void) goto Close; suspend_console(); + ftrace_save = __ftrace_enabled_save(); error = device_suspend(PMSG_HIBERNATE); if (error) { if (hibernation_ops->recover) @@ -445,6 +451,7 @@ int hibernation_platform_enter(void) hibernation_ops->finish(); Resume_devices: device_resume(PMSG_RESTORE); + __ftrace_enabled_restore(ftrace_save); resume_console(); Close: hibernation_ops->end(); diff --git a/kernel/power/main.c b/kernel/power/main.c index 0b7476f5d2a6..540b16b68565 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -21,6 +21,7 @@ #include <linux/freezer.h> #include <linux/vmstat.h> #include <linux/syscalls.h> +#include <linux/ftrace.h> #include "power.h" @@ -310,7 +311,7 @@ static int suspend_enter(suspend_state_t state) */ int suspend_devices_and_enter(suspend_state_t state) { - int error; + int error, ftrace_save; if (!suspend_ops) return -ENOSYS; @@ -321,6 +322,7 @@ int suspend_devices_and_enter(suspend_state_t state) goto Close; } suspend_console(); + ftrace_save = __ftrace_enabled_save(); suspend_test_start(); error = device_suspend(PMSG_SUSPEND); if (error) { @@ -352,6 +354,7 @@ int suspend_devices_and_enter(suspend_state_t state) suspend_test_start(); device_resume(PMSG_RESUME); suspend_test_finish("resume devices"); + __ftrace_enabled_restore(ftrace_save); resume_console(); Close: if (suspend_ops->end) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index a0abf9a463f9..80ccac849e46 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -14,7 +14,6 @@ #include <linux/module.h> #include <linux/file.h> #include <linux/utsname.h> -#include <linux/version.h> #include <linux/delay.h> #include <linux/bitops.h> #include <linux/genhd.h> diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f14f372cf6f5..467d5940f624 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -77,6 +77,7 @@ void wakeme_after_rcu(struct rcu_head *head) * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ +void synchronize_rcu(void); /* Makes kernel-doc tools happy */ synchronize_rcu_xxx(synchronize_rcu, call_rcu) EXPORT_SYMBOL_GPL(synchronize_rcu); diff --git a/kernel/resource.c b/kernel/resource.c index f5b518eabefe..03d796c1b2e9 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -362,35 +362,21 @@ int allocate_resource(struct resource *root, struct resource *new, EXPORT_SYMBOL(allocate_resource); -/** - * insert_resource - Inserts a resource in the resource tree - * @parent: parent of the new resource - * @new: new resource to insert - * - * Returns 0 on success, -EBUSY if the resource can't be inserted. - * - * This function is equivalent to request_resource when no conflict - * happens. If a conflict happens, and the conflicting resources - * entirely fit within the range of the new resource, then the new - * resource is inserted and the conflicting resources become children of - * the new resource. +/* + * Insert a resource into the resource tree. If successful, return NULL, + * otherwise return the conflicting resource (compare to __request_resource()) */ -int insert_resource(struct resource *parent, struct resource *new) +static struct resource * __insert_resource(struct resource *parent, struct resource *new) { - int result; struct resource *first, *next; - write_lock(&resource_lock); - for (;; parent = first) { - result = 0; first = __request_resource(parent, new); if (!first) - goto out; + return first; - result = -EBUSY; if (first == parent) - goto out; + return first; if ((first->start > new->start) || (first->end < new->end)) break; @@ -401,15 +387,13 @@ int insert_resource(struct resource *parent, struct resource *new) for (next = first; ; next = next->sibling) { /* Partial overlap? Bad, and unfixable */ if (next->start < new->start || next->end > new->end) - goto out; + return next; if (!next->sibling) break; if (next->sibling->start > new->end) break; } - result = 0; - new->parent = parent; new->sibling = next->sibling; new->child = first; @@ -426,10 +410,64 @@ int insert_resource(struct resource *parent, struct resource *new) next = next->sibling; next->sibling = new; } + return NULL; +} - out: +/** + * insert_resource - Inserts a resource in the resource tree + * @parent: parent of the new resource + * @new: new resource to insert + * + * Returns 0 on success, -EBUSY if the resource can't be inserted. + * + * This function is equivalent to request_resource when no conflict + * happens. If a conflict happens, and the conflicting resources + * entirely fit within the range of the new resource, then the new + * resource is inserted and the conflicting resources become children of + * the new resource. + */ +int insert_resource(struct resource *parent, struct resource *new) +{ + struct resource *conflict; + + write_lock(&resource_lock); + conflict = __insert_resource(parent, new); + write_unlock(&resource_lock); + return conflict ? -EBUSY : 0; +} + +/** + * insert_resource_expand_to_fit - Insert a resource into the resource tree + * @root: root resource descriptor + * @new: new resource to insert + * + * Insert a resource into the resource tree, possibly expanding it in order + * to make it encompass any conflicting resources. + */ +void insert_resource_expand_to_fit(struct resource *root, struct resource *new) +{ + if (new->parent) + return; + + write_lock(&resource_lock); + for (;;) { + struct resource *conflict; + + conflict = __insert_resource(root, new); + if (!conflict) + break; + if (conflict == root) + break; + + /* Ok, expand resource to cover the conflict, then try again .. */ + if (conflict->start < new->start) + new->start = conflict->start; + if (conflict->end > new->end) + new->end = conflict->end; + + printk("Expanded resource %s due to conflict with %s\n", new->name, conflict->name); + } write_unlock(&resource_lock); - return result; } /** diff --git a/kernel/sched.c b/kernel/sched.c index 9a1ddb84e26d..1a5f73c1fcdc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4179,6 +4179,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal) } /* + * Use precise platform statistics if available: + */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +cputime_t task_utime(struct task_struct *p) +{ + return p->utime; +} + +cputime_t task_stime(struct task_struct *p) +{ + return p->stime; +} +#else +cputime_t task_utime(struct task_struct *p) +{ + clock_t utime = cputime_to_clock_t(p->utime), + total = utime + cputime_to_clock_t(p->stime); + u64 temp; + + /* + * Use CFS's precise accounting: + */ + temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); + + if (total) { + temp *= utime; + do_div(temp, total); + } + utime = (clock_t)temp; + + p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); + return p->prev_utime; +} + +cputime_t task_stime(struct task_struct *p) +{ + clock_t stime; + + /* + * Use CFS's precise accounting. (we subtract utime from + * the total, to make sure the total observed by userspace + * grows monotonically - apps rely on that): + */ + stime = nsec_to_clock_t(p->se.sum_exec_runtime) - + cputime_to_clock_t(task_utime(p)); + + if (stime >= 0) + p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); + + return p->prev_stime; +} +#endif + +inline cputime_t task_gtime(struct task_struct *p) +{ + return p->gtime; +} + +/* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. * diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 204991a0bfa7..e8ab096ddfe3 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c @@ -12,19 +12,17 @@ * * Create a semi stable clock from a mixture of other events, including: * - gtod - * - jiffies * - sched_clock() * - explicit idle events * * We use gtod as base and the unstable clock deltas. The deltas are filtered, - * making it monotonic and keeping it within an expected window. This window - * is set up using jiffies. + * making it monotonic and keeping it within an expected window. * * Furthermore, explicit sleep and wakeup hooks allow us to account for time * that is otherwise invisible (TSC gets stopped). * * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat - * consistent between cpus (never more than 1 jiffies difference). + * consistent between cpus (never more than 2 jiffies difference). */ #include <linux/sched.h> #include <linux/percpu.h> @@ -54,7 +52,6 @@ struct sched_clock_data { */ raw_spinlock_t lock; - unsigned long tick_jiffies; u64 tick_raw; u64 tick_gtod; u64 clock; @@ -75,14 +72,12 @@ static inline struct sched_clock_data *cpu_sdc(int cpu) void sched_clock_init(void) { u64 ktime_now = ktime_to_ns(ktime_get()); - unsigned long now_jiffies = jiffies; int cpu; for_each_possible_cpu(cpu) { struct sched_clock_data *scd = cpu_sdc(cpu); scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; - scd->tick_jiffies = now_jiffies; scd->tick_raw = 0; scd->tick_gtod = ktime_now; scd->clock = ktime_now; @@ -92,46 +87,51 @@ void sched_clock_init(void) } /* + * min,max except they take wrapping into account + */ + +static inline u64 wrap_min(u64 x, u64 y) +{ + return (s64)(x - y) < 0 ? x : y; +} + +static inline u64 wrap_max(u64 x, u64 y) +{ + return (s64)(x - y) > 0 ? x : y; +} + +/* * update the percpu scd from the raw @now value * * - filter out backward motion - * - use jiffies to generate a min,max window to clip the raw values + * - use the GTOD tick value to create a window to filter crazy TSC values */ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) { - unsigned long now_jiffies = jiffies; - long delta_jiffies = now_jiffies - scd->tick_jiffies; - u64 clock = scd->clock; - u64 min_clock, max_clock; s64 delta = now - scd->tick_raw; + u64 clock, min_clock, max_clock; WARN_ON_ONCE(!irqs_disabled()); - min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC; - if (unlikely(delta < 0)) { - clock++; - goto out; - } + if (unlikely(delta < 0)) + delta = 0; - max_clock = min_clock + TICK_NSEC; + /* + * scd->clock = clamp(scd->tick_gtod + delta, + * max(scd->tick_gtod, scd->clock), + * scd->tick_gtod + TICK_NSEC); + */ - if (unlikely(clock + delta > max_clock)) { - if (clock < max_clock) - clock = max_clock; - else - clock++; - } else { - clock += delta; - } + clock = scd->tick_gtod + delta; + min_clock = wrap_max(scd->tick_gtod, scd->clock); + max_clock = scd->tick_gtod + TICK_NSEC; - out: - if (unlikely(clock < min_clock)) - clock = min_clock; + clock = wrap_max(clock, min_clock); + clock = wrap_min(clock, max_clock); - scd->tick_jiffies = now_jiffies; scd->clock = clock; - return clock; + return scd->clock; } static void lock_double_clock(struct sched_clock_data *data1, @@ -171,7 +171,7 @@ u64 sched_clock_cpu(int cpu) * larger time as the latest time for both * runqueues. (this creates monotonic movement) */ - if (likely(remote_clock < this_clock)) { + if (likely((s64)(remote_clock - this_clock) < 0)) { clock = this_clock; scd->clock = clock; } else { @@ -207,14 +207,9 @@ void sched_clock_tick(void) now = sched_clock(); __raw_spin_lock(&scd->lock); - __update_sched_clock(scd, now); - /* - * update tick_gtod after __update_sched_clock() because that will - * already observe 1 new jiffy; adding a new tick_gtod to that would - * increase the clock 2 jiffies. - */ scd->tick_raw = now; scd->tick_gtod = now_gtod; + __update_sched_clock(scd, now); __raw_spin_unlock(&scd->lock); } @@ -232,18 +227,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); */ void sched_clock_idle_wakeup_event(u64 delta_ns) { - struct sched_clock_data *scd = this_scd(); - - /* - * Override the previous timestamp and ignore all - * sched_clock() deltas that occured while we idled, - * and use the PM-provided delta_ns to advance the - * rq clock: - */ - __raw_spin_lock(&scd->lock); - scd->clock += delta_ns; - __raw_spin_unlock(&scd->lock); - + sched_clock_tick(); touch_softlockup_watchdog(); } EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 862b06bd560a..9353ca78154e 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -8,6 +8,6 @@ SCHED_FEAT(SYNC_WAKEUPS, 1) SCHED_FEAT(HRTICK, 1) SCHED_FEAT(DOUBLE_TICK, 0) SCHED_FEAT(ASYM_GRAN, 1) -SCHED_FEAT(LB_BIAS, 0) +SCHED_FEAT(LB_BIAS, 1) SCHED_FEAT(LB_WAKEUP_UPDATE, 1) SCHED_FEAT(ASYM_EFF_LOAD, 1) diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 998ba54b4543..552310798dad 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -199,6 +199,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { + if (rt_rq->rt_nr_running) + resched_task(rq_of_rt_rq(rt_rq)->curr); } static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) @@ -438,9 +440,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) { u64 runtime = sched_rt_runtime(rt_rq); - if (runtime == RUNTIME_INF) - return 0; - if (rt_rq->rt_throttled) return rt_rq_throttled(rt_rq); @@ -491,9 +490,11 @@ static void update_curr_rt(struct rq *rq) rt_rq = rt_rq_of_se(rt_se); spin_lock(&rt_rq->rt_runtime_lock); - rt_rq->rt_time += delta_exec; - if (sched_rt_runtime_exceeded(rt_rq)) - resched_task(curr); + if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { + rt_rq->rt_time += delta_exec; + if (sched_rt_runtime_exceeded(rt_rq)) + resched_task(curr); + } spin_unlock(&rt_rq->rt_runtime_lock); } } diff --git a/kernel/smp.c b/kernel/smp.c index 782e2b93e465..f362a8553777 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -210,8 +210,10 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, { struct call_single_data d; unsigned long flags; - /* prevent preemption and reschedule on another processor */ + /* prevent preemption and reschedule on another processor, + as well as CPU removal */ int me = get_cpu(); + int err = 0; /* Can deadlock when called with interrupts disabled */ WARN_ON(irqs_disabled()); @@ -220,7 +222,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, local_irq_save(flags); func(info); local_irq_restore(flags); - } else { + } else if ((unsigned)cpu < NR_CPUS && cpu_online(cpu)) { struct call_single_data *data = NULL; if (!wait) { @@ -236,10 +238,12 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, data->func = func; data->info = info; generic_exec_single(cpu, data); + } else { + err = -ENXIO; /* CPU not online */ } put_cpu(); - return 0; + return err; } EXPORT_SYMBOL(smp_call_function_single); diff --git a/kernel/softlockup.c b/kernel/softlockup.c index b75b492fbfcf..cb838ee93a82 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -233,7 +233,8 @@ static void check_hung_uninterruptible_tasks(int this_cpu) do_each_thread(g, t) { if (!--max_count) goto unlock; - if (t->state & TASK_UNINTERRUPTIBLE) + /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ + if (t->state == TASK_UNINTERRUPTIBLE) check_hung_task(t, now); } while_each_thread(g, t); unlock: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index fe4713347275..50ec0886fa3d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -159,6 +159,7 @@ static int proc_dointvec_taint(struct ctl_table *table, int write, struct file * static struct ctl_table root_table[]; static struct ctl_table_root sysctl_table_root; static struct ctl_table_header root_table_header = { + .count = 1, .ctl_table = root_table, .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), .root = &sysctl_table_root, diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f5da526424a9..a87b0468568b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -162,6 +162,8 @@ void tick_nohz_stop_idle(int cpu) ts->idle_lastupdate = now; ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); ts->idle_active = 0; + + sched_clock_idle_wakeup_event(0); } } @@ -177,6 +179,7 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts) } ts->idle_entrytime = now; ts->idle_active = 1; + sched_clock_idle_sleep_event(); return now; } @@ -643,17 +646,21 @@ void tick_setup_sched_timer(void) ts->nohz_mode = NOHZ_MODE_HIGHRES; #endif } +#endif /* HIGH_RES_TIMERS */ +#if defined CONFIG_NO_HZ || defined CONFIG_HIGH_RES_TIMERS void tick_cancel_sched_timer(int cpu) { struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); +# ifdef CONFIG_HIGH_RES_TIMERS if (ts->sched_timer.base) hrtimer_cancel(&ts->sched_timer); +# endif ts->nohz_mode = NOHZ_MODE_INACTIVE; } -#endif /* HIGH_RES_TIMERS */ +#endif /** * Async notification about clocksource changes diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index a9ab0596de44..532858fa5b88 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -6,7 +6,6 @@ */ #include <linux/module.h> -#include <linux/version.h> #include <linux/nsproxy.h> #include <linux/slab.h> #include <linux/user_namespace.h> diff --git a/kernel/utsname.c b/kernel/utsname.c index 64d398f12444..815237a55af8 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -12,7 +12,6 @@ #include <linux/module.h> #include <linux/uts.h> #include <linux/utsname.h> -#include <linux/version.h> #include <linux/err.h> #include <linux/slab.h> diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index fe3a56c2256d..4ab9659d269e 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -12,7 +12,6 @@ #include <linux/module.h> #include <linux/uts.h> #include <linux/utsname.h> -#include <linux/version.h> #include <linux/sysctl.h> static void *get_uts(ctl_table *table, int write) |