Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar: "The main scheduler changes in this cycle were: - support Intel Turbo Boost Max Technology 3.0 (TBM3) by introducig a notion of 'better cores', which the scheduler will prefer to schedule single threaded workloads on. (Tim Chen, Srinivas Pandruvada) - enhance the handling of asymmetric capacity CPUs further (Morten Rasmussen) - improve/fix load handling when moving tasks between task groups (Vincent Guittot) - simplify and clean up the cputime code (Stanislaw Gruszka) - improve mass fork()ed task spread a.k.a. hackbench speedup (Vincent Guittot) - make struct kthread kmalloc()ed and related fixes (Oleg Nesterov) - add uaccess atomicity debugging (when using access_ok() in the wrong context), under CONFIG_DEBUG_ATOMIC_SLEEP=y (Peter Zijlstra) - implement various fixes, cleanups and other enhancements (Daniel Bristot de Oliveira, Martin Schwidefsky, Rafael J. Wysocki)" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits) sched/core: Use load_avg for selecting idlest group sched/core: Fix find_idlest_group() for fork kthread: Don't abuse kthread_create_on_cpu() in __kthread_create_worker() kthread: Don't use to_live_kthread() in kthread_[un]park() kthread: Don't use to_live_kthread() in kthread_stop() Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function" kthread: Make struct kthread kmalloc'ed x86/uaccess, sched/preempt: Verify access_ok() context sched/x86: Make CONFIG_SCHED_MC_PRIO=y easier to enable sched/x86: Change CONFIG_SCHED_ITMT to CONFIG_SCHED_MC_PRIO x86/sched: Use #include <linux/mutex.h> instead of #include <asm/mutex.h> cpufreq/intel_pstate: Use CPPC to get max performance acpi/bus: Set _OSC for diverse core support acpi/bus: Enable HWP CPPC objects x86/sched: Add SD_ASYM_PACKING flags to x86 ITMT CPU x86/sysctl: Add sysctl for ITMT scheduling feature x86: Enable Intel Turbo Boost Max Technology 3.0 x86/topology: Define x86's arch_update_cpu_topology sched: Extend scheduler's asym packing sched/fair: Clean up the tunable parameter definitions ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-12-12 12:15:10 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-12-12 12:15:10 -0800
commit: 92c020d08d83673ecd15a9069d4457378668da31 (patch)
tree: 3dbc5a9c1ab179f55be49e30e378cc4e650fc20e /include
parent: bca13ce4554ae9cf5083e5adf395ad2266cb571b (diff)
parent: 6b94780e45c17b83e3e75f8aaca5a328db583c74 (diff)
8 files changed, 76 insertions, 43 deletions
diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h
index fe386fc6e85e..6bb8cd45f53b 100644
--- a/include/asm-generic/cputime_jiffies.h
+++ b/include/asm-generic/cputime_jiffies.h
@@ -7,7 +7,6 @@ typedef unsigned long __nocast cputime_t;
 
 #define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_to_jiffies(__ct)	(__force unsigned long)(__ct)
-#define cputime_to_scaled(__ct)		(__ct)
 #define jiffies_to_cputime(__hz)	(__force cputime_t)(__hz)
 
 typedef u64 __nocast cputime64_t;
diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h
index a84e28e0c634..4e3b18e559b1 100644
--- a/include/asm-generic/cputime_nsecs.h
+++ b/include/asm-generic/cputime_nsecs.h
@@ -34,7 +34,6 @@ typedef u64 __nocast cputime64_t;
  */
 #define cputime_to_jiffies(__ct)	\
 	cputime_div(__ct, NSEC_PER_SEC / HZ)
-#define cputime_to_scaled(__ct)		(__ct)
 #define jiffies_to_cputime(__jif)	\
 	(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
 #define cputime64_to_jiffies64(__ct)	\
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 61a3d90f32b3..051023756520 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -469,6 +469,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
 #define OSC_SB_CPCV2_SUPPORT			0x00000040
 #define OSC_SB_PCLPI_SUPPORT			0x00000080
 #define OSC_SB_OSLPI_SUPPORT			0x00000100
+#define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT		0x00001000
 
 extern bool osc_sb_apei_support_acked;
 extern bool osc_pc_lpi_support_confirmed;
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 44fda64ad434..00f776816aa3 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -78,8 +78,8 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 	return kstat_cpu(cpu).irqs_sum;
 }
 
-extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
-extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
+extern void account_user_time(struct task_struct *, cputime_t);
+extern void account_system_time(struct task_struct *, int, cputime_t);
 extern void account_steal_time(cputime_t);
 extern void account_idle_time(cputime_t);
 
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index a6e82a69c363..c1c3e63d52c1 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -48,6 +48,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 	__k;								   \
 })
 
+void free_kthread_struct(struct task_struct *k);
 void kthread_bind(struct task_struct *k, unsigned int cpu);
 void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
 int kthread_stop(struct task_struct *k);
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 75e4e30677f1..7eeceac52dea 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -65,19 +65,24 @@
 
 /*
  * Are we doing bottom half or hardware interrupt processing?
- * Are we in a softirq context? Interrupt context?
- * in_softirq - Are we currently processing softirq or have bh disabled?
- * in_serving_softirq - Are we currently processing softirq?
+ *
+ * in_irq()       - We're in (hard) IRQ context
+ * in_softirq()   - We have BH disabled, or are processing softirqs
+ * in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled
+ * in_serving_softirq() - We're in softirq context
+ * in_nmi()       - We're in NMI context
+ * in_task()	  - We're in task context
+ *
+ * Note: due to the BH disabled confusion: in_softirq(),in_interrupt() really
+ *       should not be used in new code.
  */
 #define in_irq()		(hardirq_count())
 #define in_softirq()		(softirq_count())
 #define in_interrupt()		(irq_count())
 #define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
-
-/*
- * Are we in NMI context?
- */
-#define in_nmi()	(preempt_count() & NMI_MASK)
+#define in_nmi()		(preempt_count() & NMI_MASK)
+#define in_task()		(!(preempt_count() & \
+				   (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
 
 /*
  * The preempt_count offset after preempt_disable();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8863bdf582d5..7551d3e2ab70 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -262,20 +262,9 @@ extern char ___assert_task_state[1 - 2*!!(
 #define set_task_state(tsk, state_value)			\
 	do {							\
 		(tsk)->task_state_change = _THIS_IP_;		\
-		smp_store_mb((tsk)->state, (state_value));		\
+		smp_store_mb((tsk)->state, (state_value));	\
 	} while (0)
 
-/*
- * set_current_state() includes a barrier so that the write of current->state
- * is correctly serialised wrt the caller's subsequent test of whether to
- * actually sleep:
- *
- *	set_current_state(TASK_UNINTERRUPTIBLE);
- *	if (do_i_need_to_sleep())
- *		schedule();
- *
- * If the caller does not need such serialisation then use __set_current_state()
- */
 #define __set_current_state(state_value)			\
 	do {							\
 		current->task_state_change = _THIS_IP_;		\
@@ -284,11 +273,19 @@ extern char ___assert_task_state[1 - 2*!!(
 #define set_current_state(state_value)				\
 	do {							\
 		current->task_state_change = _THIS_IP_;		\
-		smp_store_mb(current->state, (state_value));		\
+		smp_store_mb(current->state, (state_value));	\
 	} while (0)
 
 #else
 
+/*
+ * @tsk had better be current, or you get to keep the pieces.
+ *
+ * The only reason is that computing current can be more expensive than
+ * using a pointer that's already available.
+ *
+ * Therefore, see set_current_state().
+ */
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
 #define set_task_state(tsk, state_value)		\
@@ -299,11 +296,34 @@ extern char ___assert_task_state[1 - 2*!!(
  * is correctly serialised wrt the caller's subsequent test of whether to
  * actually sleep:
  *
+ *   for (;;) {
  *	set_current_state(TASK_UNINTERRUPTIBLE);
- *	if (do_i_need_to_sleep())
- *		schedule();
+ *	if (!need_sleep)
+ *		break;
+ *
+ *	schedule();
+ *   }
+ *   __set_current_state(TASK_RUNNING);
+ *
+ * If the caller does not need such serialisation (because, for instance, the
+ * condition test and condition change and wakeup are under the same lock) then
+ * use __set_current_state().
+ *
+ * The above is typically ordered against the wakeup, which does:
+ *
+ *	need_sleep = false;
+ *	wake_up_state(p, TASK_UNINTERRUPTIBLE);
+ *
+ * Where wake_up_state() (and all other wakeup primitives) imply enough
+ * barriers to order the store of the variable against wakeup.
+ *
+ * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
+ * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
+ * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
  *
- * If the caller does not need such serialisation then use __set_current_state()
+ * This is obviously fine, since they both store the exact same value.
+ *
+ * Also see the comments of try_to_wake_up().
  */
 #define __set_current_state(state_value)		\
 	do { current->state = (state_value); } while (0)
@@ -1057,6 +1077,8 @@ static inline int cpu_numa_flags(void)
 }
 #endif
 
+extern int arch_asym_cpu_priority(int cpu);
+
 struct sched_domain_attr {
 	int relax_domain_level;
 };
@@ -1627,7 +1649,10 @@ struct task_struct {
 	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
 	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */
 
-	cputime_t utime, stime, utimescaled, stimescaled;
+	cputime_t utime, stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+	cputime_t utimescaled, stimescaled;
+#endif
 	cputime_t gtime;
 	struct prev_cputime prev_cputime;
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
@@ -2220,34 +2245,38 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask);
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern void task_cputime(struct task_struct *t,
 			 cputime_t *utime, cputime_t *stime);
-extern void task_cputime_scaled(struct task_struct *t,
-				cputime_t *utimescaled, cputime_t *stimescaled);
 extern cputime_t task_gtime(struct task_struct *t);
 #else
 static inline void task_cputime(struct task_struct *t,
 				cputime_t *utime, cputime_t *stime)
 {
-	if (utime)
-		*utime = t->utime;
-	if (stime)
-		*stime = t->stime;
+	*utime = t->utime;
+	*stime = t->stime;
 }
 
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+	return t->gtime;
+}
+#endif
+
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
 static inline void task_cputime_scaled(struct task_struct *t,
 				       cputime_t *utimescaled,
 				       cputime_t *stimescaled)
 {
-	if (utimescaled)
-		*utimescaled = t->utimescaled;
-	if (stimescaled)
-		*stimescaled = t->stimescaled;
+	*utimescaled = t->utimescaled;
+	*stimescaled = t->stimescaled;
 }
-
-static inline cputime_t task_gtime(struct task_struct *t)
+#else
+static inline void task_cputime_scaled(struct task_struct *t,
+				       cputime_t *utimescaled,
+				       cputime_t *stimescaled)
 {
-	return t->gtime;
+	task_cputime(t, utimescaled, stimescaled);
 }
 #endif
+
 extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
 
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 22db1e63707e..441145351301 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -36,7 +36,6 @@ extern unsigned int sysctl_numa_balancing_scan_size;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
 extern unsigned int sysctl_sched_time_avg;
-extern unsigned int sysctl_sched_shares_window;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *length,
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-12-12 12:15:10 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-12-12 12:15:10 -0800
commit	92c020d08d83673ecd15a9069d4457378668da31 (patch)
tree	3dbc5a9c1ab179f55be49e30e378cc4e650fc20e /include
parent	bca13ce4554ae9cf5083e5adf395ad2266cb571b (diff)
parent	6b94780e45c17b83e3e75f8aaca5a328db583c74 (diff)