summaryrefslogtreecommitdiff
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
authorCatalin Marinas <catalin.marinas@arm.com>2021-08-31 09:10:00 +0100
committerCatalin Marinas <catalin.marinas@arm.com>2021-08-31 09:10:00 +0100
commit65266a7c6abfa1ad915a362c41bf38576607f1f9 (patch)
tree046d86fc88c50fcdeaddb9ab4c709ae0878b1e3f /kernel/sched/fair.c
parent1a7f67e618d42e9870dcd9fb0c7b2682d71fd631 (diff)
parent702f43872665e3b1cc6fdb77d238533274fc9d18 (diff)
Merge remote-tracking branch 'tip/sched/arm64' into for-next/core
* tip/sched/arm64: (785 commits) Documentation: arm64: describe asymmetric 32-bit support arm64: Remove logic to kill 32-bit tasks on 64-bit-only cores arm64: Hook up cmdline parameter to allow mismatched 32-bit EL0 arm64: Advertise CPUs capable of running 32-bit applications in sysfs arm64: Prevent offlining first CPU with 32-bit EL0 on mismatched system arm64: exec: Adjust affinity for compat tasks with mismatched 32-bit EL0 arm64: Implement task_cpu_possible_mask() sched: Introduce dl_task_check_affinity() to check proposed affinity sched: Allow task CPU affinity to be restricted on asymmetric systems sched: Split the guts of sched_setaffinity() into a helper function sched: Introduce task_struct::user_cpus_ptr to track requested affinity sched: Reject CPU affinity changes based on task_cpu_possible_mask() cpuset: Cleanup cpuset_cpus_allowed_fallback() use in select_fallback_rq() cpuset: Honour task_cpu_possible_mask() in guarantee_online_cpus() cpuset: Don't use the cpu_possible_mask as a last resort for cgroup v1 sched: Introduce task_cpu_possible_mask() to limit fallback rq selection sched: Cgroup SCHED_IDLE support sched/topology: Skip updating masks for non-online nodes Linux 5.14-rc6 lib: use PFN_PHYS() in devmem_is_allowed() ...
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c211
1 files changed, 179 insertions, 32 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 44c452072a1b..5aa3cfd15a2e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -431,6 +431,23 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
}
}
+static int tg_is_idle(struct task_group *tg)
+{
+ return tg->idle > 0;
+}
+
+static int cfs_rq_is_idle(struct cfs_rq *cfs_rq)
+{
+ return cfs_rq->idle > 0;
+}
+
+static int se_is_idle(struct sched_entity *se)
+{
+ if (entity_is_task(se))
+ return task_has_idle_policy(task_of(se));
+ return cfs_rq_is_idle(group_cfs_rq(se));
+}
+
#else /* !CONFIG_FAIR_GROUP_SCHED */
#define for_each_sched_entity(se) \
@@ -468,6 +485,21 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
{
}
+static int tg_is_idle(struct task_group *tg)
+{
+ return 0;
+}
+
+static int cfs_rq_is_idle(struct cfs_rq *cfs_rq)
+{
+ return 0;
+}
+
+static int se_is_idle(struct sched_entity *se)
+{
+ return 0;
+}
+
#endif /* CONFIG_FAIR_GROUP_SCHED */
static __always_inline
@@ -1486,7 +1518,7 @@ static inline bool is_core_idle(int cpu)
if (cpu == sibling)
continue;
- if (!idle_cpu(cpu))
+ if (!idle_cpu(sibling))
return false;
}
#endif
@@ -4841,6 +4873,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
+ if (cfs_rq_is_idle(group_cfs_rq(se)))
+ idle_task_delta = cfs_rq->h_nr_running;
+
qcfs_rq->h_nr_running -= task_delta;
qcfs_rq->idle_h_nr_running -= idle_task_delta;
@@ -4860,6 +4895,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
update_load_avg(qcfs_rq, se, 0);
se_update_runnable(se);
+ if (cfs_rq_is_idle(group_cfs_rq(se)))
+ idle_task_delta = cfs_rq->h_nr_running;
+
qcfs_rq->h_nr_running -= task_delta;
qcfs_rq->idle_h_nr_running -= idle_task_delta;
}
@@ -4904,39 +4942,45 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
task_delta = cfs_rq->h_nr_running;
idle_task_delta = cfs_rq->idle_h_nr_running;
for_each_sched_entity(se) {
+ struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+
if (se->on_rq)
break;
- cfs_rq = cfs_rq_of(se);
- enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
+ enqueue_entity(qcfs_rq, se, ENQUEUE_WAKEUP);
+
+ if (cfs_rq_is_idle(group_cfs_rq(se)))
+ idle_task_delta = cfs_rq->h_nr_running;
- cfs_rq->h_nr_running += task_delta;
- cfs_rq->idle_h_nr_running += idle_task_delta;
+ qcfs_rq->h_nr_running += task_delta;
+ qcfs_rq->idle_h_nr_running += idle_task_delta;
/* end evaluation on encountering a throttled cfs_rq */
- if (cfs_rq_throttled(cfs_rq))
+ if (cfs_rq_throttled(qcfs_rq))
goto unthrottle_throttle;
}
for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
+ struct cfs_rq *qcfs_rq = cfs_rq_of(se);
- update_load_avg(cfs_rq, se, UPDATE_TG);
+ update_load_avg(qcfs_rq, se, UPDATE_TG);
se_update_runnable(se);
- cfs_rq->h_nr_running += task_delta;
- cfs_rq->idle_h_nr_running += idle_task_delta;
+ if (cfs_rq_is_idle(group_cfs_rq(se)))
+ idle_task_delta = cfs_rq->h_nr_running;
+ qcfs_rq->h_nr_running += task_delta;
+ qcfs_rq->idle_h_nr_running += idle_task_delta;
/* end evaluation on encountering a throttled cfs_rq */
- if (cfs_rq_throttled(cfs_rq))
+ if (cfs_rq_throttled(qcfs_rq))
goto unthrottle_throttle;
/*
* One parent has been throttled and cfs_rq removed from the
* list. Add it back to not break the leaf list.
*/
- if (throttled_hierarchy(cfs_rq))
- list_add_leaf_cfs_rq(cfs_rq);
+ if (throttled_hierarchy(qcfs_rq))
+ list_add_leaf_cfs_rq(qcfs_rq);
}
/* At this point se is NULL and we are at root level*/
@@ -4949,9 +4993,9 @@ unthrottle_throttle:
* assertion below.
*/
for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
+ struct cfs_rq *qcfs_rq = cfs_rq_of(se);
- if (list_add_leaf_cfs_rq(cfs_rq))
+ if (list_add_leaf_cfs_rq(qcfs_rq))
break;
}
@@ -5574,6 +5618,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running++;
cfs_rq->idle_h_nr_running += idle_h_nr_running;
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = 1;
+
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto enqueue_throttle;
@@ -5591,6 +5638,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running++;
cfs_rq->idle_h_nr_running += idle_h_nr_running;
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = 1;
+
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto enqueue_throttle;
@@ -5668,6 +5718,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running--;
cfs_rq->idle_h_nr_running -= idle_h_nr_running;
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = 1;
+
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto dequeue_throttle;
@@ -5697,6 +5750,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
cfs_rq->h_nr_running--;
cfs_rq->idle_h_nr_running -= idle_h_nr_running;
+ if (cfs_rq_is_idle(cfs_rq))
+ idle_h_nr_running = 1;
+
/* end evaluation on encountering a throttled cfs_rq */
if (cfs_rq_throttled(cfs_rq))
goto dequeue_throttle;
@@ -6249,7 +6305,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
time = cpu_clock(this);
}
- for_each_cpu_wrap(cpu, cpus, target) {
+ for_each_cpu_wrap(cpu, cpus, target + 1) {
if (has_idle_core) {
i = select_idle_core(p, cpu, cpus, &idle_cpu);
if ((unsigned int)i < nr_cpumask_bits)
@@ -6376,6 +6432,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
/* Check a recently used CPU as a potential idle candidate: */
recent_used_cpu = p->recent_used_cpu;
+ p->recent_used_cpu = prev;
if (recent_used_cpu != prev &&
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
@@ -6902,9 +6959,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
} else if (wake_flags & WF_TTWU) { /* XXX always ? */
/* Fast path */
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
-
- if (want_affine)
- current->recent_used_cpu = cpu;
}
rcu_read_unlock();
@@ -7041,24 +7095,22 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
static void set_last_buddy(struct sched_entity *se)
{
- if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se))))
- return;
-
for_each_sched_entity(se) {
if (SCHED_WARN_ON(!se->on_rq))
return;
+ if (se_is_idle(se))
+ return;
cfs_rq_of(se)->last = se;
}
}
static void set_next_buddy(struct sched_entity *se)
{
- if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se))))
- return;
-
for_each_sched_entity(se) {
if (SCHED_WARN_ON(!se->on_rq))
return;
+ if (se_is_idle(se))
+ return;
cfs_rq_of(se)->next = se;
}
}
@@ -7079,6 +7131,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
int scale = cfs_rq->nr_running >= sched_nr_latency;
int next_buddy_marked = 0;
+ int cse_is_idle, pse_is_idle;
if (unlikely(se == pse))
return;
@@ -7123,8 +7176,21 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
return;
find_matching_se(&se, &pse);
- update_curr(cfs_rq_of(se));
BUG_ON(!pse);
+
+ cse_is_idle = se_is_idle(se);
+ pse_is_idle = se_is_idle(pse);
+
+ /*
+ * Preempt an idle group in favor of a non-idle group (and don't preempt
+ * in the inverse case).
+ */
+ if (cse_is_idle && !pse_is_idle)
+ goto preempt;
+ if (cse_is_idle != pse_is_idle)
+ return;
+
+ update_curr(cfs_rq_of(se));
if (wakeup_preempt_entity(se, pse) == 1) {
/*
* Bias pick_next to pick the sched entity that is
@@ -10217,9 +10283,11 @@ static inline int on_null_domain(struct rq *rq)
static inline int find_new_ilb(void)
{
int ilb;
+ const struct cpumask *hk_mask;
+
+ hk_mask = housekeeping_cpumask(HK_FLAG_MISC);
- for_each_cpu_and(ilb, nohz.idle_cpus_mask,
- housekeeping_cpumask(HK_FLAG_MISC)) {
+ for_each_cpu_and(ilb, nohz.idle_cpus_mask, hk_mask) {
if (ilb == smp_processor_id())
continue;
@@ -11416,10 +11484,12 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
static DEFINE_MUTEX(shares_mutex);
-int sched_group_set_shares(struct task_group *tg, unsigned long shares)
+static int __sched_group_set_shares(struct task_group *tg, unsigned long shares)
{
int i;
+ lockdep_assert_held(&shares_mutex);
+
/*
* We can't change the weight of the root cgroup.
*/
@@ -11428,9 +11498,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
- mutex_lock(&shares_mutex);
if (tg->shares == shares)
- goto done;
+ return 0;
tg->shares = shares;
for_each_possible_cpu(i) {
@@ -11448,10 +11517,88 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
rq_unlock_irqrestore(rq, &rf);
}
-done:
+ return 0;
+}
+
+int sched_group_set_shares(struct task_group *tg, unsigned long shares)
+{
+ int ret;
+
+ mutex_lock(&shares_mutex);
+ if (tg_is_idle(tg))
+ ret = -EINVAL;
+ else
+ ret = __sched_group_set_shares(tg, shares);
+ mutex_unlock(&shares_mutex);
+
+ return ret;
+}
+
+int sched_group_set_idle(struct task_group *tg, long idle)
+{
+ int i;
+
+ if (tg == &root_task_group)
+ return -EINVAL;
+
+ if (idle < 0 || idle > 1)
+ return -EINVAL;
+
+ mutex_lock(&shares_mutex);
+
+ if (tg->idle == idle) {
+ mutex_unlock(&shares_mutex);
+ return 0;
+ }
+
+ tg->idle = idle;
+
+ for_each_possible_cpu(i) {
+ struct rq *rq = cpu_rq(i);
+ struct sched_entity *se = tg->se[i];
+ struct cfs_rq *grp_cfs_rq = tg->cfs_rq[i];
+ bool was_idle = cfs_rq_is_idle(grp_cfs_rq);
+ long idle_task_delta;
+ struct rq_flags rf;
+
+ rq_lock_irqsave(rq, &rf);
+
+ grp_cfs_rq->idle = idle;
+ if (WARN_ON_ONCE(was_idle == cfs_rq_is_idle(grp_cfs_rq)))
+ goto next_cpu;
+
+ idle_task_delta = grp_cfs_rq->h_nr_running -
+ grp_cfs_rq->idle_h_nr_running;
+ if (!cfs_rq_is_idle(grp_cfs_rq))
+ idle_task_delta *= -1;
+
+ for_each_sched_entity(se) {
+ struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+ if (!se->on_rq)
+ break;
+
+ cfs_rq->idle_h_nr_running += idle_task_delta;
+
+ /* Already accounted at parent level and above. */
+ if (cfs_rq_is_idle(cfs_rq))
+ break;
+ }
+
+next_cpu:
+ rq_unlock_irqrestore(rq, &rf);
+ }
+
+ /* Idle groups have minimum weight. */
+ if (tg_is_idle(tg))
+ __sched_group_set_shares(tg, scale_load(WEIGHT_IDLEPRIO));
+ else
+ __sched_group_set_shares(tg, NICE_0_LOAD);
+
mutex_unlock(&shares_mutex);
return 0;
}
+
#else /* CONFIG_FAIR_GROUP_SCHED */
void free_fair_sched_group(struct task_group *tg) { }