diff options
author | Peter Zijlstra <peterz@infradead.org> | 2020-07-08 11:38:59 +0200 |
---|---|---|
committer | Peter Zijlstra <peterz@infradead.org> | 2020-07-08 11:38:59 +0200 |
commit | faa2fd7cbad4609d06d7904c0a80cf2f8cd23678 (patch) | |
tree | 3a5bfcc8879b719aec97cb73ed6e020cde502546 /kernel/sched | |
parent | 01e377c539ca52a6c753d0fdbe93b3b8fcd66a1c (diff) | |
parent | ce3614daabea8a2d01c1dd17ae41d1ec5e5ae7db (diff) |
Merge branch 'sched/urgent'
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 113 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 1 | ||||
-rw-r--r-- | kernel/sched/fair.c | 2 | ||||
-rw-r--r-- | kernel/sched/idle.c | 15 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 |
5 files changed, 101 insertions, 32 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0a08d0c235d4..15c980af63db 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1313,9 +1313,6 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) void activate_task(struct rq *rq, struct task_struct *p, int flags) { - if (task_contributes_to_load(p)) - rq->nr_uninterruptible--; - enqueue_task(rq, p, flags); p->on_rq = TASK_ON_RQ_QUEUED; @@ -1325,9 +1322,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) { p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING; - if (task_contributes_to_load(p)) - rq->nr_uninterruptible++; - dequeue_task(rq, p, flags); } @@ -1629,7 +1623,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, goto out; } - if (cpumask_equal(p->cpus_ptr, new_mask)) + if (cpumask_equal(&p->cpus_mask, new_mask)) goto out; /* @@ -2228,10 +2222,10 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, lockdep_assert_held(&rq->lock); -#ifdef CONFIG_SMP if (p->sched_contributes_to_load) rq->nr_uninterruptible--; +#ifdef CONFIG_SMP if (wake_flags & WF_MIGRATED) en_flags |= ENQUEUE_MIGRATED; #endif @@ -2285,8 +2279,15 @@ void sched_ttwu_pending(void *arg) rq_lock_irqsave(rq, &rf); update_rq_clock(rq); - llist_for_each_entry_safe(p, t, llist, wake_entry) + llist_for_each_entry_safe(p, t, llist, wake_entry.llist) { + if (WARN_ON_ONCE(p->on_cpu)) + smp_cond_load_acquire(&p->on_cpu, !VAL); + + if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq))) + set_task_cpu(p, cpu_of(rq)); + ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf); + } rq_unlock_irqrestore(rq, &rf); } @@ -2314,7 +2315,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED); WRITE_ONCE(rq->ttwu_pending, 1); - __smp_call_single_queue(cpu, &p->wake_entry); + __smp_call_single_queue(cpu, &p->wake_entry.llist); } void wake_up_if_idle(int cpu) @@ -2361,7 +2362,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) * the soon-to-be-idle CPU as the current CPU is likely busy. * nr_running is checked to avoid unnecessary task stacking. */ - if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1) + if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1) return true; return false; @@ -2370,6 +2371,9 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) { if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) { + if (WARN_ON_ONCE(cpu == smp_processor_id())) + return false; + sched_clock_cpu(cpu); /* Sync clocks across CPUs */ __ttwu_queue_wakelist(p, cpu, wake_flags); return true; @@ -2520,7 +2524,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) goto out; success = 1; - cpu = task_cpu(p); trace_sched_waking(p); p->state = TASK_RUNNING; trace_sched_wakeup(p); @@ -2542,7 +2545,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) /* We're going to change ->state: */ success = 1; - cpu = task_cpu(p); /* * Ensure we load p->on_rq _after_ p->state, otherwise it would @@ -2567,7 +2569,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * A similar smb_rmb() lives in try_invoke_on_locked_down_task(). */ smp_rmb(); - if (p->on_rq && ttwu_remote(p, wake_flags)) + if (READ_ONCE(p->on_rq) && ttwu_remote(p, wake_flags)) goto unlock; if (p->in_iowait) { @@ -2576,9 +2578,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) } #ifdef CONFIG_SMP - p->sched_contributes_to_load = !!task_contributes_to_load(p); - p->state = TASK_WAKING; - /* * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be * possible to, falsely, observe p->on_cpu == 0. @@ -2597,8 +2596,20 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in * __schedule(). See the comment for smp_mb__after_spinlock(). + * + * Form a control-dep-acquire with p->on_rq == 0 above, to ensure + * schedule()'s deactivate_task() has 'happened' and p will no longer + * care about it's own p->state. See the comment in __schedule(). */ - smp_rmb(); + smp_acquire__after_ctrl_dep(); + + /* + * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq + * == 0), which means we need to do an enqueue, change p->state to + * TASK_WAKING such that we can unlock p->pi_lock before doing the + * enqueue, such as ttwu_queue_wakelist(). + */ + p->state = TASK_WAKING; /* * If the owning (remote) CPU is still in the middle of schedule() with @@ -2606,8 +2617,21 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * which potentially sends an IPI instead of spinning on p->on_cpu to * let the waker make forward progress. This is safe because IRQs are * disabled and the IPI will deliver after on_cpu is cleared. + * + * Ensure we load task_cpu(p) after p->on_cpu: + * + * set_task_cpu(p, cpu); + * STORE p->cpu = @cpu + * __schedule() (switch to task 'p') + * LOCK rq->lock + * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu) + * STORE p->on_cpu = 1 LOAD p->cpu + * + * to ensure we observe the correct CPU on which the task is currently + * scheduling. */ - if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ)) + if (smp_load_acquire(&p->on_cpu) && + ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) goto unlock; /* @@ -2627,6 +2651,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) psi_ttwu_dequeue(p); set_task_cpu(p, cpu); } +#else + cpu = task_cpu(p); #endif /* CONFIG_SMP */ ttwu_queue(p, cpu, wake_flags); @@ -2634,7 +2660,7 @@ unlock: raw_spin_unlock_irqrestore(&p->pi_lock, flags); out: if (success) - ttwu_stat(p, cpu, wake_flags); + ttwu_stat(p, task_cpu(p), wake_flags); preempt_enable(); return success; @@ -2755,7 +2781,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) #endif init_numa_balancing(clone_flags, p); #ifdef CONFIG_SMP - p->wake_entry_type = CSD_TYPE_TTWU; + p->wake_entry.u_flags = CSD_TYPE_TTWU; #endif } @@ -2931,6 +2957,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) * Silence PROVE_RCU. */ raw_spin_lock_irqsave(&p->pi_lock, flags); + rseq_migrate(p); /* * We're setting the CPU for the first time, we don't migrate, * so use __set_task_cpu(). @@ -2995,6 +3022,7 @@ void wake_up_new_task(struct task_struct *p) * as we're not fully set-up yet. */ p->recent_used_cpu = task_cpu(p); + rseq_migrate(p); __set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); #endif rq = __task_rq_lock(p, &rf); @@ -4065,6 +4093,7 @@ static void __sched notrace __schedule(bool preempt) { struct task_struct *prev, *next; unsigned long *switch_count; + unsigned long prev_state; struct rq_flags rf; struct rq *rq; int cpu; @@ -4081,12 +4110,22 @@ static void __sched notrace __schedule(bool preempt) local_irq_disable(); rcu_note_context_switch(preempt); + /* See deactivate_task() below. */ + prev_state = prev->state; + /* * Make sure that signal_pending_state()->signal_pending() below * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) - * done by the caller to avoid the race with signal_wake_up(). + * done by the caller to avoid the race with signal_wake_up(): + * + * __set_current_state(@state) signal_wake_up() + * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING) + * wake_up_state(p, state) + * LOCK rq->lock LOCK p->pi_state + * smp_mb__after_spinlock() smp_mb__after_spinlock() + * if (signal_pending_state()) if (p->state & @state) * - * The membarrier system call requires a full memory barrier + * Also, the membarrier system call requires a full memory barrier * after coming from user-space, before storing to rq->curr. */ rq_lock(rq, &rf); @@ -4097,10 +4136,31 @@ static void __sched notrace __schedule(bool preempt) update_rq_clock(rq); switch_count = &prev->nivcsw; - if (!preempt && prev->state) { - if (signal_pending_state(prev->state, prev)) { + /* + * We must re-load prev->state in case ttwu_remote() changed it + * before we acquired rq->lock. + */ + if (!preempt && prev_state && prev_state == prev->state) { + if (signal_pending_state(prev_state, prev)) { prev->state = TASK_RUNNING; } else { + prev->sched_contributes_to_load = + (prev_state & TASK_UNINTERRUPTIBLE) && + !(prev_state & TASK_NOLOAD) && + !(prev->flags & PF_FROZEN); + + if (prev->sched_contributes_to_load) + rq->nr_uninterruptible++; + + /* + * __schedule() ttwu() + * prev_state = prev->state; if (READ_ONCE(p->on_rq) && ...) + * LOCK rq->lock goto out; + * smp_mb__after_spinlock(); smp_acquire__after_ctrl_dep(); + * p->on_rq = 0; p->state = TASK_WAKING; + * + * After this, schedule() must not care about p->state any more. + */ deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); if (prev->in_iowait) { @@ -4524,7 +4584,8 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) */ if (dl_prio(prio)) { if (!dl_prio(p->normal_prio) || - (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { + (pi_task && dl_prio(pi_task->prio) && + dl_entity_preempt(&pi_task->dl, &p->dl))) { p->dl.dl_boosted = 1; queue_flag |= ENQUEUE_REPLENISH; } else diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index c9cc1d6fa363..3862a28cd05d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -2763,6 +2763,7 @@ void __dl_clear_params(struct task_struct *p) dl_se->dl_bw = 0; dl_se->dl_density = 0; + dl_se->dl_boosted = 0; dl_se->dl_throttled = 0; dl_se->dl_yielded = 0; dl_se->dl_non_contending = 0; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b9b9f19e80c1..6fab1d17c575 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -806,7 +806,7 @@ void post_init_entity_util_avg(struct task_struct *p) } } - sa->runnable_avg = cpu_scale; + sa->runnable_avg = sa->util_avg; if (p->sched_class != &fair_sched_class) { /* diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 336d478bddc8..6bf34986f45c 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -96,6 +96,15 @@ void __cpuidle default_idle_call(void) } } +static int call_cpuidle_s2idle(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + if (current_clr_polling_and_test()) + return -EBUSY; + + return cpuidle_enter_s2idle(drv, dev); +} + static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, int next_state) { @@ -171,11 +180,9 @@ static void cpuidle_idle_call(void) if (idle_should_enter_s2idle()) { rcu_idle_enter(); - entered_state = cpuidle_enter_s2idle(drv, dev); - if (entered_state > 0) { - local_irq_enable(); + entered_state = call_cpuidle_s2idle(drv, dev); + if (entered_state > 0) goto exit_idle; - } rcu_idle_exit(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 549e7e6e0a66..5aa6661ecaf1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1697,7 +1697,7 @@ static inline int task_on_rq_migrating(struct task_struct *p) #define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */ #define WF_FORK 0x02 /* Child wakeup after fork */ #define WF_MIGRATED 0x04 /* Internal use, task got migrated */ -#define WF_ON_RQ 0x08 /* Wakee is on_rq */ +#define WF_ON_CPU 0x08 /* Wakee is on_cpu */ /* * To aid in avoiding the subversion of "niceness" due to uneven distribution |