summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-06-05 14:49:58 +0200
committerIngo Molnar <mingo@elte.hu>2008-06-10 12:17:28 +0200
commit7def2be1dc679984f4c4fb3ef19a8a081b2454ec (patch)
tree5be15957919d095cdbbc9ed3217d9dec939b234b
parente958b3600484533ff801920290468adc8135f89d (diff)
sched: fix hotplug cpus on ia64
Cliff Wickman wrote: > I built an ia64 kernel from Andrew's tree (2.6.26-rc2-mm1) > and get a very predictable hotplug cpu problem. > billberry1:/tmp/cpw # ./dis > disabled cpu 17 > enabled cpu 17 > billberry1:/tmp/cpw # ./dis > disabled cpu 17 > enabled cpu 17 > billberry1:/tmp/cpw # ./dis > > The script that disables the cpu always hangs (unkillable) > on the 3rd attempt. > > And a bit further: > The kstopmachine thread always sits on the run queue (real time) for about > 30 minutes before running. this fix solves some (but not all) issues between CPU hotplug and RT bandwidth throttling. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/sched.c15
-rw-r--r--kernel/sched_rt.c109
2 files changed, 115 insertions, 9 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 727bdef76161..e9c24a128655 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7513,21 +7513,28 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
static int update_sched_domains(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
+ int cpu = (int)(long)hcpu;
+
switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
+ disable_runtime(cpu_rq(cpu));
+ /* fall-through */
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
detach_destroy_domains(&cpu_online_map);
free_sched_domains();
return NOTIFY_OK;
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
+
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
+ enable_runtime(cpu_rq(cpu));
+ /* fall-through */
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index eaa606071d51..8ae3416e0bb4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -286,6 +286,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
continue;
spin_lock(&iter->rt_runtime_lock);
+ if (iter->rt_runtime == RUNTIME_INF)
+ goto next;
+
diff = iter->rt_runtime - iter->rt_time;
if (diff > 0) {
do_div(diff, weight);
@@ -299,12 +302,105 @@ static int balance_runtime(struct rt_rq *rt_rq)
break;
}
}
+next:
spin_unlock(&iter->rt_runtime_lock);
}
spin_unlock(&rt_b->rt_runtime_lock);
return more;
}
+
+static void __disable_runtime(struct rq *rq)
+{
+ struct root_domain *rd = rq->rd;
+ struct rt_rq *rt_rq;
+
+ if (unlikely(!scheduler_running))
+ return;
+
+ for_each_leaf_rt_rq(rt_rq, rq) {
+ struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
+ s64 want;
+ int i;
+
+ spin_lock(&rt_b->rt_runtime_lock);
+ spin_lock(&rt_rq->rt_runtime_lock);
+ if (rt_rq->rt_runtime == RUNTIME_INF ||
+ rt_rq->rt_runtime == rt_b->rt_runtime)
+ goto balanced;
+ spin_unlock(&rt_rq->rt_runtime_lock);
+
+ want = rt_b->rt_runtime - rt_rq->rt_runtime;
+
+ for_each_cpu_mask(i, rd->span) {
+ struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
+ s64 diff;
+
+ if (iter == rt_rq)
+ continue;
+
+ spin_lock(&iter->rt_runtime_lock);
+ if (want > 0) {
+ diff = min_t(s64, iter->rt_runtime, want);
+ iter->rt_runtime -= diff;
+ want -= diff;
+ } else {
+ iter->rt_runtime -= want;
+ want -= want;
+ }
+ spin_unlock(&iter->rt_runtime_lock);
+
+ if (!want)
+ break;
+ }
+
+ spin_lock(&rt_rq->rt_runtime_lock);
+ BUG_ON(want);
+balanced:
+ rt_rq->rt_runtime = RUNTIME_INF;
+ spin_unlock(&rt_rq->rt_runtime_lock);
+ spin_unlock(&rt_b->rt_runtime_lock);
+ }
+}
+
+static void disable_runtime(struct rq *rq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rq->lock, flags);
+ __disable_runtime(rq);
+ spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static void __enable_runtime(struct rq *rq)
+{
+ struct root_domain *rd = rq->rd;
+ struct rt_rq *rt_rq;
+
+ if (unlikely(!scheduler_running))
+ return;
+
+ for_each_leaf_rt_rq(rt_rq, rq) {
+ struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
+
+ spin_lock(&rt_b->rt_runtime_lock);
+ spin_lock(&rt_rq->rt_runtime_lock);
+ rt_rq->rt_runtime = rt_b->rt_runtime;
+ rt_rq->rt_time = 0;
+ spin_unlock(&rt_rq->rt_runtime_lock);
+ spin_unlock(&rt_b->rt_runtime_lock);
+ }
+}
+
+static void enable_runtime(struct rq *rq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rq->lock, flags);
+ __enable_runtime(rq);
+ spin_unlock_irqrestore(&rq->lock, flags);
+}
+
#endif
static inline int rt_se_prio(struct sched_rt_entity *rt_se)
@@ -334,14 +430,13 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
#ifdef CONFIG_SMP
if (rt_rq->rt_time > runtime) {
- int more;
-
spin_unlock(&rt_rq->rt_runtime_lock);
- more = balance_runtime(rt_rq);
+ balance_runtime(rt_rq);
spin_lock(&rt_rq->rt_runtime_lock);
- if (more)
- runtime = sched_rt_runtime(rt_rq);
+ runtime = sched_rt_runtime(rt_rq);
+ if (runtime == RUNTIME_INF)
+ return 0;
}
#endif
@@ -1174,6 +1269,8 @@ static void rq_online_rt(struct rq *rq)
if (rq->rt.overloaded)
rt_set_overload(rq);
+ __enable_runtime(rq);
+
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
}
@@ -1183,6 +1280,8 @@ static void rq_offline_rt(struct rq *rq)
if (rq->rt.overloaded)
rt_clear_overload(rq);
+ __disable_runtime(rq);
+
cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
}