diff options
author | Peter Zijlstra <peterz@infradead.org> | 2019-11-08 11:11:52 +0100 |
---|---|---|
committer | Peter Zijlstra <peterz@infradead.org> | 2019-11-08 22:34:14 +0100 |
commit | 6e2df0581f569038719cf2bc2b3baa3fcc83cab4 (patch) | |
tree | 91a337f916b868f9a73864949698dd27762d9a8e /kernel/sched/sched.h | |
parent | e3b8b6a0d12cccf772113d6b5c1875192186fbd4 (diff) |
sched: Fix pick_next_task() vs 'change' pattern race
Commit 67692435c411 ("sched: Rework pick_next_task() slow-path")
inadvertly introduced a race because it changed a previously
unexplored dependency between dropping the rq->lock and
sched_class::put_prev_task().
The comments about dropping rq->lock, in for example
newidle_balance(), only mentions the task being current and ->on_cpu
being set. But when we look at the 'change' pattern (in for example
sched_setnuma()):
queued = task_on_rq_queued(p); /* p->on_rq == TASK_ON_RQ_QUEUED */
running = task_current(rq, p); /* rq->curr == p */
if (queued)
dequeue_task(...);
if (running)
put_prev_task(...);
/* change task properties */
if (queued)
enqueue_task(...);
if (running)
set_next_task(...);
It becomes obvious that if we do this after put_prev_task() has
already been called on @p, things go sideways. This is exactly what
the commit in question allows to happen when it does:
prev->sched_class->put_prev_task(rq, prev, rf);
if (!rq->nr_running)
newidle_balance(rq, rf);
The newidle_balance() call will drop rq->lock after we've called
put_prev_task() and that allows the above 'change' pattern to
interleave and mess up the state.
Furthermore, it turns out we lost the RT-pull when we put the last DL
task.
Fix both problems by extracting the balancing from put_prev_task() and
doing a multi-class balance() pass before put_prev_task().
Fixes: 67692435c411 ("sched: Rework pick_next_task() slow-path")
Reported-by: Quentin Perret <qperret@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Quentin Perret <qperret@google.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r-- | kernel/sched/sched.h | 30 |
1 files changed, 27 insertions, 3 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0db2c1b3361e..c8870c5bd7df 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1727,10 +1727,11 @@ struct sched_class { struct task_struct * (*pick_next_task)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); - void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct rq_flags *rf); + void (*put_prev_task)(struct rq *rq, struct task_struct *p); void (*set_next_task)(struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP + int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); void (*migrate_task_rq)(struct task_struct *p, int new_cpu); @@ -1773,7 +1774,7 @@ struct sched_class { static inline void put_prev_task(struct rq *rq, struct task_struct *prev) { WARN_ON_ONCE(rq->curr != prev); - prev->sched_class->put_prev_task(rq, prev, NULL); + prev->sched_class->put_prev_task(rq, prev); } static inline void set_next_task(struct rq *rq, struct task_struct *next) @@ -1787,8 +1788,12 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next) #else #define sched_class_highest (&dl_sched_class) #endif + +#define for_class_range(class, _from, _to) \ + for (class = (_from); class != (_to); class = class->next) + #define for_each_class(class) \ - for (class = sched_class_highest; class; class = class->next) + for_class_range(class, sched_class_highest, NULL) extern const struct sched_class stop_sched_class; extern const struct sched_class dl_sched_class; @@ -1796,6 +1801,25 @@ extern const struct sched_class rt_sched_class; extern const struct sched_class fair_sched_class; extern const struct sched_class idle_sched_class; +static inline bool sched_stop_runnable(struct rq *rq) +{ + return rq->stop && task_on_rq_queued(rq->stop); +} + +static inline bool sched_dl_runnable(struct rq *rq) +{ + return rq->dl.dl_nr_running > 0; +} + +static inline bool sched_rt_runnable(struct rq *rq) +{ + return rq->rt.rt_queued > 0; +} + +static inline bool sched_fair_runnable(struct rq *rq) +{ + return rq->cfs.nr_running > 0; +} #ifdef CONFIG_SMP |