Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar: "The biggest change affects group scheduling: we now track the runnable average on a per-task entity basis, allowing a smoother, exponential decay average based load/weight estimation instead of the previous binary on-the-runqueue/off-the-runqueue load weight method. This will inevitably disturb workloads that were in some sort of borderline balancing state or unstable equilibrium, so an eye has to be kept on regressions. For that reason the new load average is only limited to group scheduling (shares distribution) at the moment (which was also hurting the most from the prior, crude weight calculation and whose scheduling quality wins most from this change) - but we plan to extend this to regular SMP balancing as well in the future, which will simplify and speed up things a bit. Other changes involve ongoing preparatory work to extend NOHZ to the scheduler as well, eventually allowing completely irq-free user-space execution." * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits) Revert "sched/autogroup: Fix crash on reboot when autogroup is disabled" cputime: Comment cputime's adjusting code cputime: Consolidate cputime adjustment code cputime: Rename thread_group_times to thread_group_cputime_adjusted cputime: Move thread_group_cputime() to sched code vtime: Warn if irqs aren't disabled on system time accounting APIs vtime: No need to disable irqs on vtime_account() vtime: Consolidate a bit the ctx switch code vtime: Explicitly account pending user time on process tick vtime: Remove the underscore prefix invasion sched/autogroup: Fix crash on reboot when autogroup is disabled cputime: Separate irqtime accounting from generic vtime cputime: Specialize irq vtime hooks kvm: Directly account vtime to system on guest switch vtime: Make vtime_account_system() irqsafe vtime: Gather vtime declarations to their own header file sched: Describe CFS load-balancer sched: Introduce temporary FAIR_GROUP_SCHED dependency for load-tracking sched: Make __update_entity_runnable_avg() fast sched: Update_cfs_shares at period edge ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-12-11 18:21:38 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-12-11 18:21:38 -0800
commit: f57d54bab696133fae569c5f01352249c36fc74f (patch)
tree: 8ebe3c6deaf95c424c86843c3d290fbf2a9e80d2 /arch
parent: da830e589a45f0c42eef6f3cbd07275f8893f181 (diff)
parent: c1ad41f1f7270c1956da13fa8fd59d8d5929d56e (diff)
7 files changed, 29 insertions, 35 deletions
diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index 3deac956d325..7fcf7f08ab06 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -103,5 +103,7 @@ static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
 #define cputime64_to_clock_t(__ct)	\
 	cputime_to_clock_t((__force cputime_t)__ct)
 
+extern void arch_vtime_task_switch(struct task_struct *tsk);
+
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* __IA64_CPUTIME_H */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index f6388216080d..b1995efbfd21 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -83,7 +83,7 @@ static struct clocksource *itc_clocksource;
 
 extern cputime_t cycle_to_cputime(u64 cyc);
 
-static void vtime_account_user(struct task_struct *tsk)
+void vtime_account_user(struct task_struct *tsk)
 {
 	cputime_t delta_utime;
 	struct thread_info *ti = task_thread_info(tsk);
@@ -100,18 +100,11 @@ static void vtime_account_user(struct task_struct *tsk)
  * accumulated times to the current process, and to prepare accounting on
  * the next process.
  */
-void vtime_task_switch(struct task_struct *prev)
+void arch_vtime_task_switch(struct task_struct *prev)
 {
 	struct thread_info *pi = task_thread_info(prev);
 	struct thread_info *ni = task_thread_info(current);
 
-	if (idle_task(smp_processor_id()) != prev)
-		vtime_account_system(prev);
-	else
-		vtime_account_idle(prev);
-
-	vtime_account_user(prev);
-
 	pi->ac_stamp = ni->ac_stamp;
 	ni->ac_stime = ni->ac_utime = 0;
 }
@@ -126,6 +119,8 @@ static cputime_t vtime_delta(struct task_struct *tsk)
 	cputime_t delta_stime;
 	__u64 now;
 
+	WARN_ON_ONCE(!irqs_disabled());
+
 	now = ia64_get_itc();
 
 	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
@@ -147,15 +142,6 @@ void vtime_account_idle(struct task_struct *tsk)
 	account_idle_time(vtime_delta(tsk));
 }
 
-/*
- * Called from the timer interrupt handler to charge accumulated user time
- * to the current process.  Must be called with interrupts disabled.
- */
-void account_process_tick(struct task_struct *p, int user_tick)
-{
-	vtime_account_user(p);
-}
-
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
 static irqreturn_t
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 487d46ff68a1..483733bd06d4 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -228,6 +228,8 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 
 #define cputime64_to_clock_t(ct)	cputime_to_clock_t((cputime_t)(ct))
 
+static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
+
 #endif /* __KERNEL__ */
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index ce4cb772dc78..b3b14352b05e 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -297,6 +297,8 @@ static u64 vtime_delta(struct task_struct *tsk,
 	u64 now, nowscaled, deltascaled;
 	u64 udelta, delta, user_scaled;
 
+	WARN_ON_ONCE(!irqs_disabled());
+
 	now = mftb();
 	nowscaled = read_spurr(now);
 	get_paca()->system_time += now - get_paca()->starttime;
@@ -355,15 +357,15 @@ void vtime_account_idle(struct task_struct *tsk)
 }
 
 /*
- * Transfer the user and system times accumulated in the paca
- * by the exception entry and exit code to the generic process
- * user and system time records.
+ * Transfer the user time accumulated in the paca
+ * by the exception entry and exit code to the generic
+ * process user time records.
  * Must be called with interrupts disabled.
- * Assumes that vtime_account() has been called recently
- * (i.e. since the last entry from usermode) so that
+ * Assumes that vtime_account_system/idle() has been called
+ * recently (i.e. since the last entry from usermode) so that
  * get_paca()->user_time_scaled is up to date.
  */
-void account_process_tick(struct task_struct *tsk, int user_tick)
+void vtime_account_user(struct task_struct *tsk)
 {
 	cputime_t utime, utimescaled;
 
@@ -375,12 +377,6 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
 	account_user_time(tsk, utime, utimescaled);
 }
 
-void vtime_task_switch(struct task_struct *prev)
-{
-	vtime_account(prev);
-	account_process_tick(prev, 0);
-}
-
 #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
 #define calc_cputime_factors()
 #endif
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 023d5ae24482..d2ff41370c0c 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -14,6 +14,7 @@
 
 
 #define __ARCH_HAS_VTIME_ACCOUNT
+#define __ARCH_HAS_VTIME_TASK_SWITCH
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 790334427895..e84b8b68444a 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -112,7 +112,12 @@ void vtime_task_switch(struct task_struct *prev)
 	S390_lowcore.system_timer = ti->system_timer;
 }
 
-void account_process_tick(struct task_struct *tsk, int user_tick)
+/*
+ * In s390, accounting pending user time also implies
+ * accounting system time in order to correctly compute
+ * the stolen time accounting.
+ */
+void vtime_account_user(struct task_struct *tsk)
 {
 	if (do_account_vtime(tsk, HARDIRQ_OFFSET))
 		virt_timer_expire();
@@ -127,6 +132,8 @@ void vtime_account(struct task_struct *tsk)
 	struct thread_info *ti = task_thread_info(tsk);
 	u64 timer, system;
 
+	WARN_ON_ONCE(!irqs_disabled());
+
 	timer = S390_lowcore.last_update_timer;
 	S390_lowcore.last_update_timer = get_vtimer();
 	S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
@@ -140,6 +147,10 @@ void vtime_account(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(vtime_account);
 
+void vtime_account_system(struct task_struct *tsk)
+__attribute__((alias("vtime_account")));
+EXPORT_SYMBOL_GPL(vtime_account_system);
+
 void __kprobes vtime_stop_cpu(void)
 {
 	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ecced9d18986..d91a95568002 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -608,9 +608,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		kvm_s390_deliver_pending_interrupts(vcpu);
 
 	vcpu->arch.sie_block->icptcode = 0;
-	local_irq_disable();
 	kvm_guest_enter();
-	local_irq_enable();
 	VCPU_EVENT(vcpu, 6, "entering sie flags %x",
 		   atomic_read(&vcpu->arch.sie_block->cpuflags));
 	trace_kvm_s390_sie_enter(vcpu,
@@ -629,9 +627,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
 		   vcpu->arch.sie_block->icptcode);
 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
-	local_irq_disable();
 	kvm_guest_exit();
-	local_irq_enable();
 
 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
 	return rc;
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-12-11 18:21:38 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-12-11 18:21:38 -0800
commit	f57d54bab696133fae569c5f01352249c36fc74f (patch)
tree	8ebe3c6deaf95c424c86843c3d290fbf2a9e80d2 /arch
parent	da830e589a45f0c42eef6f3cbd07275f8893f181 (diff)
parent	c1ad41f1f7270c1956da13fa8fd59d8d5929d56e (diff)