diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-16 17:25:49 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-16 17:25:49 -0700 |
commit | 7e67a859997aad47727aff9c5a32e160da079ce3 (patch) | |
tree | 96f53425c2834de5b3276d7598782ab6412e4d5e /include | |
parent | 772c1d06bd402f7ee72c61a18c2db74cd74b6758 (diff) | |
parent | 563c4f85f9f0d63b712081d5b4522152cdcb8b6b (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
- MAINTAINERS: Add Mark Rutland as perf submaintainer, Juri Lelli and
Vincent Guittot as scheduler submaintainers. Add Dietmar Eggemann,
Steven Rostedt, Ben Segall and Mel Gorman as scheduler reviewers.
As perf and the scheduler is getting bigger and more complex,
document the status quo of current responsibilities and interests,
and spread the review pain^H^H^H^H fun via an increase in the Cc:
linecount generated by scripts/get_maintainer.pl. :-)
- Add another series of patches that brings the -rt (PREEMPT_RT) tree
closer to mainline: split the monolithic CONFIG_PREEMPT dependencies
into a new CONFIG_PREEMPTION category that will allow the eventual
introduction of CONFIG_PREEMPT_RT. Still a few more hundred patches
to go though.
- Extend the CPU cgroup controller with uclamp.min and uclamp.max to
allow the finer shaping of CPU bandwidth usage.
- Micro-optimize energy-aware wake-ups from O(CPUS^2) to O(CPUS).
- Improve the behavior of high CPU count, high thread count
applications running under cpu.cfs_quota_us constraints.
- Improve balancing with SCHED_IDLE (SCHED_BATCH) tasks present.
- Improve CPU isolation housekeeping CPU allocation NUMA locality.
- Fix deadline scheduler bandwidth calculations and logic when cpusets
rebuilds the topology, or when it gets deadline-throttled while it's
being offlined.
- Convert the cpuset_mutex to percpu_rwsem, to allow it to be used from
setscheduler() system calls without creating global serialization.
Add new synchronization between cpuset topology-changing events and
the deadline acceptance tests in setscheduler(), which were broken
before.
- Rework the active_mm state machine to be less confusing and more
optimal.
- Rework (simplify) the pick_next_task() slowpath.
- Improve load-balancing on AMD EPYC systems.
- ... and misc cleanups, smaller fixes and improvements - please see
the Git log for more details.
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (53 commits)
sched/psi: Correct overly pessimistic size calculation
sched/fair: Speed-up energy-aware wake-ups
sched/uclamp: Always use 'enum uclamp_id' for clamp_id values
sched/uclamp: Update CPU's refcount on TG's clamp changes
sched/uclamp: Use TG's clamps to restrict TASK's clamps
sched/uclamp: Propagate system defaults to the root group
sched/uclamp: Propagate parent clamps
sched/uclamp: Extend CPU's cgroup controller
sched/topology: Improve load balancing on AMD EPYC systems
arch, ia64: Make NUMA select SMP
sched, perf: MAINTAINERS update, add submaintainers and reviewers
sched/fair: Use rq_lock/unlock in online_fair_sched_group
cpufreq: schedutil: fix equation in comment
sched: Rework pick_next_task() slow-path
sched: Allow put_prev_task() to drop rq->lock
sched/fair: Expose newidle_balance()
sched: Add task_struct pointer to sched_class::set_curr_task
sched: Rework CPU hotplug task selection
sched/{rt,deadline}: Fix set_next_task vs pick_next_task
sched: Fix kerneldoc comment for ia64_set_curr_task
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/preempt.h | 4 | ||||
-rw-r--r-- | include/linux/cgroup.h | 1 | ||||
-rw-r--r-- | include/linux/cpuset.h | 13 | ||||
-rw-r--r-- | include/linux/preempt.h | 6 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 2 | ||||
-rw-r--r-- | include/linux/rcutree.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 11 | ||||
-rw-r--r-- | include/linux/sched/deadline.h | 8 | ||||
-rw-r--r-- | include/linux/sched/task.h | 6 | ||||
-rw-r--r-- | include/linux/sched/topology.h | 10 | ||||
-rw-r--r-- | include/linux/spinlock.h | 2 | ||||
-rw-r--r-- | include/linux/spinlock_api_smp.h | 2 | ||||
-rw-r--r-- | include/linux/topology.h | 14 | ||||
-rw-r--r-- | include/linux/torture.h | 2 |
14 files changed, 65 insertions, 18 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index c3046c920063..d683f5e6d791 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -78,11 +78,11 @@ static __always_inline bool should_resched(int preempt_offset) tif_need_resched()); } -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION extern asmlinkage void preempt_schedule(void); #define __preempt_schedule() preempt_schedule() extern asmlinkage void preempt_schedule_notrace(void); #define __preempt_schedule_notrace() preempt_schedule_notrace() -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ #endif /* __ASM_PREEMPT_H */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f6b048902d6c..3ba3e6da13a6 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -150,6 +150,7 @@ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, struct cgroup_subsys_state **dst_cssp); +void cgroup_enable_task_cg_lists(void); void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, struct css_task_iter *it); struct task_struct *css_task_iter_next(struct css_task_iter *it); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 934633a05d20..04c20de66afc 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -40,14 +40,14 @@ static inline bool cpusets_enabled(void) static inline void cpuset_inc(void) { - static_branch_inc(&cpusets_pre_enable_key); - static_branch_inc(&cpusets_enabled_key); + static_branch_inc_cpuslocked(&cpusets_pre_enable_key); + static_branch_inc_cpuslocked(&cpusets_enabled_key); } static inline void cpuset_dec(void) { - static_branch_dec(&cpusets_enabled_key); - static_branch_dec(&cpusets_pre_enable_key); + static_branch_dec_cpuslocked(&cpusets_enabled_key); + static_branch_dec_cpuslocked(&cpusets_pre_enable_key); } extern int cpuset_init(void); @@ -55,6 +55,8 @@ extern void cpuset_init_smp(void); extern void cpuset_force_rebuild(void); extern void cpuset_update_active_cpus(void); extern void cpuset_wait_for_hotplug(void); +extern void cpuset_read_lock(void); +extern void cpuset_read_unlock(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); @@ -176,6 +178,9 @@ static inline void cpuset_update_active_cpus(void) static inline void cpuset_wait_for_hotplug(void) { } +static inline void cpuset_read_lock(void) { } +static inline void cpuset_read_unlock(void) { } + static inline void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask) { diff --git a/include/linux/preempt.h b/include/linux/preempt.h index dd92b1a93919..bbb68dba37cc 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -182,7 +182,7 @@ do { \ #define preemptible() (preempt_count() == 0 && !irqs_disabled()) -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION #define preempt_enable() \ do { \ barrier(); \ @@ -203,7 +203,7 @@ do { \ __preempt_schedule(); \ } while (0) -#else /* !CONFIG_PREEMPT */ +#else /* !CONFIG_PREEMPTION */ #define preempt_enable() \ do { \ barrier(); \ @@ -217,7 +217,7 @@ do { \ } while (0) #define preempt_check_resched() do { } while (0) -#endif /* CONFIG_PREEMPT */ +#endif /* CONFIG_PREEMPTION */ #define preempt_disable_notrace() \ do { \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 80d6056f5855..75a2eded7aa2 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -585,7 +585,7 @@ do { \ * * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), * it is illegal to block while in an RCU read-side critical section. - * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPT + * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION * kernel builds, RCU read-side critical sections may be preempted, * but explicit blocking is illegal. Finally, in preemptible RCU * implementations in real-time (with -rt patchset) kernel builds, RCU diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 735601ac27d3..18b1ed9864b0 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -53,7 +53,7 @@ void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; void rcu_end_inkernel_boot(void); bool rcu_is_watching(void); -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION void rcu_all_qs(void); #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 9f51932bd543..f0edee94834a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -295,6 +295,11 @@ enum uclamp_id { UCLAMP_CNT }; +#ifdef CONFIG_SMP +extern struct root_domain def_root_domain; +extern struct mutex sched_domains_mutex; +#endif + struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ @@ -1767,7 +1772,7 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) * value indicates whether a reschedule was done in fact. * cond_resched_lock() will drop the spinlock before scheduling, */ -#ifndef CONFIG_PREEMPT +#ifndef CONFIG_PREEMPTION extern int _cond_resched(void); #else static inline int _cond_resched(void) { return 0; } @@ -1796,12 +1801,12 @@ static inline void cond_resched_rcu(void) /* * Does a critical section need to be broken due to another - * task waiting?: (technically does not depend on CONFIG_PREEMPT, + * task waiting?: (technically does not depend on CONFIG_PREEMPTION, * but a general need for low latency) */ static inline int spin_needbreak(spinlock_t *lock) { -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION return spin_is_contended(lock); #else return 0; diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 0cb034331cbb..1aff00b65f3c 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -24,3 +24,11 @@ static inline bool dl_time_before(u64 a, u64 b) { return (s64)(a - b) < 0; } + +#ifdef CONFIG_SMP + +struct root_domain; +extern void dl_add_task_root_domain(struct task_struct *p); +extern void dl_clear_root_domain(struct root_domain *rd); + +#endif /* CONFIG_SMP */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 0497091e40c1..3d90ed8f75f0 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -105,7 +105,11 @@ extern void sched_exec(void); #define sched_exec() {} #endif -#define get_task_struct(tsk) do { refcount_inc(&(tsk)->usage); } while(0) +static inline struct task_struct *get_task_struct(struct task_struct *t) +{ + refcount_inc(&t->usage); + return t; +} extern void __put_task_struct(struct task_struct *t); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 7863bb62d2ab..f341163fedc9 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -150,6 +150,10 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) return to_cpumask(sd->span); } +extern void partition_sched_domains_locked(int ndoms_new, + cpumask_var_t doms_new[], + struct sched_domain_attr *dattr_new); + extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new); @@ -195,6 +199,12 @@ extern void set_sched_topology(struct sched_domain_topology_level *tl); struct sched_domain_attr; static inline void +partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[], + struct sched_domain_attr *dattr_new) +{ +} + +static inline void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index ed7c4d6b8235..031ce8617df8 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -214,7 +214,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) /* * Define the various spin_lock methods. Note we define these - * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The + * regardless of whether CONFIG_SMP or CONFIG_PREEMPTION are set. The * various methods are defined as nops in the case they are not * required. */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 42dfab89e740..b762eaba4cdf 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -96,7 +96,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) /* * If lockdep is enabled then we use the non-preemption spin-ops - * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * even on CONFIG_PREEMPTION, because lockdep assumes that interrupts are * not re-enabled during lock-acquire (which the preempt-spin-ops do): */ #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) diff --git a/include/linux/topology.h b/include/linux/topology.h index 2a19d196af28..eb2fe6edd73c 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -60,6 +60,20 @@ int arch_update_cpu_topology(void); */ #define RECLAIM_DISTANCE 30 #endif + +/* + * The following tunable allows platforms to override the default node + * reclaim distance (RECLAIM_DISTANCE) if remote memory accesses are + * sufficiently fast that the default value actually hurts + * performance. + * + * AMD EPYC machines use this because even though the 2-hop distance + * is 32 (3.2x slower than a local memory access) performance actually + * *improves* if allowed to reclaim memory and load balance tasks + * between NUMA nodes 2-hops apart. + */ +extern int __read_mostly node_reclaim_distance; + #ifndef PENALTY_FOR_NODE_WITH_CPUS #define PENALTY_FOR_NODE_WITH_CPUS (1) #endif diff --git a/include/linux/torture.h b/include/linux/torture.h index a620118385bb..6241f59e2d6f 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -86,7 +86,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #define torture_stop_kthread(n, tp) \ _torture_stop_kthread("Stopping " #n " task", &(tp)) -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION #define torture_preempt_schedule() preempt_schedule() #else #define torture_preempt_schedule() |