diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/preempt.h | 3 | ||||
-rw-r--r-- | include/linux/freezer.h | 50 | ||||
-rw-r--r-- | include/linux/init_task.h | 10 | ||||
-rw-r--r-- | include/linux/kernel.h | 5 | ||||
-rw-r--r-- | include/linux/sched.h | 87 | ||||
-rw-r--r-- | include/linux/wait.h | 80 | ||||
-rw-r--r-- | include/net/sock.h | 1 | ||||
-rw-r--r-- | include/trace/events/sched.h | 9 | ||||
-rw-r--r-- | include/uapi/linux/sched.h | 4 |
9 files changed, 167 insertions, 82 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index 1cd3f5d767a8..eb6f9e6c3075 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -23,9 +23,6 @@ static __always_inline void preempt_count_set(int pc) /* * must be macros to avoid header recursion hell */ -#define task_preempt_count(p) \ - (task_thread_info(p)->preempt_count & ~PREEMPT_NEED_RESCHED) - #define init_task_preempt_count(p) do { \ task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ } while (0) diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 7fd81b8c4897..6b7fd9cf5ea2 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -246,15 +246,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires, * defined in <linux/wait.h> */ -#define wait_event_freezekillable(wq, condition) \ -({ \ - int __retval; \ - freezer_do_not_count(); \ - __retval = wait_event_killable(wq, (condition)); \ - freezer_count(); \ - __retval; \ -}) - /* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */ #define wait_event_freezekillable_unsafe(wq, condition) \ ({ \ @@ -265,35 +256,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires, __retval; \ }) -#define wait_event_freezable(wq, condition) \ -({ \ - int __retval; \ - freezer_do_not_count(); \ - __retval = wait_event_interruptible(wq, (condition)); \ - freezer_count(); \ - __retval; \ -}) - -#define wait_event_freezable_timeout(wq, condition, timeout) \ -({ \ - long __retval = timeout; \ - freezer_do_not_count(); \ - __retval = wait_event_interruptible_timeout(wq, (condition), \ - __retval); \ - freezer_count(); \ - __retval; \ -}) - -#define wait_event_freezable_exclusive(wq, condition) \ -({ \ - int __retval; \ - freezer_do_not_count(); \ - __retval = wait_event_interruptible_exclusive(wq, condition); \ - freezer_count(); \ - __retval; \ -}) - - #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } @@ -331,18 +293,6 @@ static inline void set_freezable(void) {} #define freezable_schedule_hrtimeout_range(expires, delta, mode) \ schedule_hrtimeout_range(expires, delta, mode) -#define wait_event_freezable(wq, condition) \ - wait_event_interruptible(wq, condition) - -#define wait_event_freezable_timeout(wq, condition, timeout) \ - wait_event_interruptible_timeout(wq, condition, timeout) - -#define wait_event_freezable_exclusive(wq, condition) \ - wait_event_interruptible_exclusive(wq, condition) - -#define wait_event_freezekillable(wq, condition) \ - wait_event_killable(wq, condition) - #define wait_event_freezekillable_unsafe(wq, condition) \ wait_event_killable(wq, condition) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d996aef8044f..3037fc085e8e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -166,6 +166,15 @@ extern struct task_group root_task_group; # define INIT_RT_MUTEXES(tsk) #endif +#ifdef CONFIG_NUMA_BALANCING +# define INIT_NUMA_BALANCING(tsk) \ + .numa_preferred_nid = -1, \ + .numa_group = NULL, \ + .numa_faults = NULL, +#else +# define INIT_NUMA_BALANCING(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -237,6 +246,7 @@ extern struct task_group root_task_group; INIT_CPUSET_SEQ(tsk) \ INIT_RT_MUTEXES(tsk) \ INIT_VTIME(tsk) \ + INIT_NUMA_BALANCING(tsk) \ } diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3d770f5564b8..446d76a87ba1 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -162,6 +162,7 @@ extern int _cond_resched(void); #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP + void ___might_sleep(const char *file, int line, int preempt_offset); void __might_sleep(const char *file, int line, int preempt_offset); /** * might_sleep - annotation for functions that can sleep @@ -175,10 +176,14 @@ extern int _cond_resched(void); */ # define might_sleep() \ do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) +# define sched_annotate_sleep() __set_current_state(TASK_RUNNING) #else + static inline void ___might_sleep(const char *file, int line, + int preempt_offset) { } static inline void __might_sleep(const char *file, int line, int preempt_offset) { } # define might_sleep() do { might_resched(); } while (0) +# define sched_annotate_sleep() do { } while (0) #endif #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 706a9f744909..55f5ee7cc3d3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -243,6 +243,43 @@ extern char ___assert_task_state[1 - 2*!!( ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ (task->flags & PF_FROZEN) == 0) +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP + +#define __set_task_state(tsk, state_value) \ + do { \ + (tsk)->task_state_change = _THIS_IP_; \ + (tsk)->state = (state_value); \ + } while (0) +#define set_task_state(tsk, state_value) \ + do { \ + (tsk)->task_state_change = _THIS_IP_; \ + set_mb((tsk)->state, (state_value)); \ + } while (0) + +/* + * set_current_state() includes a barrier so that the write of current->state + * is correctly serialised wrt the caller's subsequent test of whether to + * actually sleep: + * + * set_current_state(TASK_UNINTERRUPTIBLE); + * if (do_i_need_to_sleep()) + * schedule(); + * + * If the caller does not need such serialisation then use __set_current_state() + */ +#define __set_current_state(state_value) \ + do { \ + current->task_state_change = _THIS_IP_; \ + current->state = (state_value); \ + } while (0) +#define set_current_state(state_value) \ + do { \ + current->task_state_change = _THIS_IP_; \ + set_mb(current->state, (state_value)); \ + } while (0) + +#else + #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) #define set_task_state(tsk, state_value) \ @@ -259,11 +296,13 @@ extern char ___assert_task_state[1 - 2*!!( * * If the caller does not need such serialisation then use __set_current_state() */ -#define __set_current_state(state_value) \ +#define __set_current_state(state_value) \ do { current->state = (state_value); } while (0) -#define set_current_state(state_value) \ +#define set_current_state(state_value) \ set_mb(current->state, (state_value)) +#endif + /* Task command name length */ #define TASK_COMM_LEN 16 @@ -1558,28 +1597,23 @@ struct task_struct { struct numa_group *numa_group; /* - * Exponential decaying average of faults on a per-node basis. - * Scheduling placement decisions are made based on the these counts. - * The values remain static for the duration of a PTE scan + * numa_faults is an array split into four regions: + * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer + * in this precise order. + * + * faults_memory: Exponential decaying average of faults on a per-node + * basis. Scheduling placement decisions are made based on these + * counts. The values remain static for the duration of a PTE scan. + * faults_cpu: Track the nodes the process was running on when a NUMA + * hinting fault was incurred. + * faults_memory_buffer and faults_cpu_buffer: Record faults per node + * during the current scan window. When the scan completes, the counts + * in faults_memory and faults_cpu decay and these values are copied. */ - unsigned long *numa_faults_memory; + unsigned long *numa_faults; unsigned long total_numa_faults; /* - * numa_faults_buffer records faults per node during the current - * scan window. When the scan completes, the counts in - * numa_faults_memory decay and these values are copied. - */ - unsigned long *numa_faults_buffer_memory; - - /* - * Track the nodes the process was running on when a NUMA hinting - * fault was incurred. - */ - unsigned long *numa_faults_cpu; - unsigned long *numa_faults_buffer_cpu; - - /* * numa_faults_locality tracks if faults recorded during the last * scan window were remote/local. The task scan period is adapted * based on the locality of the faults with different weights @@ -1661,6 +1695,9 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif +#ifdef CONFIG_DEBUG_ATOMIC_SLEEP + unsigned long task_state_change; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ @@ -2052,6 +2089,10 @@ static inline void tsk_restore_flags(struct task_struct *task, task->flags |= orig_flags & flags; } +extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, + const struct cpumask *trial); +extern int task_can_attach(struct task_struct *p, + const struct cpumask *cs_cpus_allowed); #ifdef CONFIG_SMP extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); @@ -2760,7 +2801,7 @@ static inline int signal_pending_state(long state, struct task_struct *p) extern int _cond_resched(void); #define cond_resched() ({ \ - __might_sleep(__FILE__, __LINE__, 0); \ + ___might_sleep(__FILE__, __LINE__, 0); \ _cond_resched(); \ }) @@ -2773,14 +2814,14 @@ extern int __cond_resched_lock(spinlock_t *lock); #endif #define cond_resched_lock(lock) ({ \ - __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ + ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ __cond_resched_lock(lock); \ }) extern int __cond_resched_softirq(void); #define cond_resched_softirq() ({ \ - __might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ + ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ __cond_resched_softirq(); \ }) diff --git a/include/linux/wait.h b/include/linux/wait.h index e4a8eb9312ea..2232ed16635a 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -13,9 +13,12 @@ typedef struct __wait_queue wait_queue_t; typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key); +/* __wait_queue::flags */ +#define WQ_FLAG_EXCLUSIVE 0x01 +#define WQ_FLAG_WOKEN 0x02 + struct __wait_queue { unsigned int flags; -#define WQ_FLAG_EXCLUSIVE 0x01 void *private; wait_queue_func_t func; struct list_head task_list; @@ -258,11 +261,37 @@ __out: __ret; \ */ #define wait_event(wq, condition) \ do { \ + might_sleep(); \ if (condition) \ break; \ __wait_event(wq, condition); \ } while (0) +#define __wait_event_freezable(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ + schedule(); try_to_freeze()) + +/** + * wait_event - sleep (or freeze) until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute + * to system load) until the @condition evaluates to true. The + * @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + */ +#define wait_event_freezable(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_freezable(wq, condition); \ + __ret; \ +}) + #define __wait_event_timeout(wq, condition, timeout) \ ___wait_event(wq, ___wait_cond_timeout(condition), \ TASK_UNINTERRUPTIBLE, 0, timeout, \ @@ -290,11 +319,30 @@ do { \ #define wait_event_timeout(wq, condition, timeout) \ ({ \ long __ret = timeout; \ + might_sleep(); \ if (!___wait_cond_timeout(condition)) \ __ret = __wait_event_timeout(wq, condition, timeout); \ __ret; \ }) +#define __wait_event_freezable_timeout(wq, condition, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, timeout, \ + __ret = schedule_timeout(__ret); try_to_freeze()) + +/* + * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid + * increasing load and is freezable. + */ +#define wait_event_freezable_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_freezable_timeout(wq, condition, timeout); \ + __ret; \ +}) + #define __wait_event_cmd(wq, condition, cmd1, cmd2) \ (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ cmd1; schedule(); cmd2) @@ -315,6 +363,7 @@ do { \ */ #define wait_event_cmd(wq, condition, cmd1, cmd2) \ do { \ + might_sleep(); \ if (condition) \ break; \ __wait_event_cmd(wq, condition, cmd1, cmd2); \ @@ -342,6 +391,7 @@ do { \ #define wait_event_interruptible(wq, condition) \ ({ \ int __ret = 0; \ + might_sleep(); \ if (!(condition)) \ __ret = __wait_event_interruptible(wq, condition); \ __ret; \ @@ -375,6 +425,7 @@ do { \ #define wait_event_interruptible_timeout(wq, condition, timeout) \ ({ \ long __ret = timeout; \ + might_sleep(); \ if (!___wait_cond_timeout(condition)) \ __ret = __wait_event_interruptible_timeout(wq, \ condition, timeout); \ @@ -425,6 +476,7 @@ do { \ #define wait_event_hrtimeout(wq, condition, timeout) \ ({ \ int __ret = 0; \ + might_sleep(); \ if (!(condition)) \ __ret = __wait_event_hrtimeout(wq, condition, timeout, \ TASK_UNINTERRUPTIBLE); \ @@ -450,6 +502,7 @@ do { \ #define wait_event_interruptible_hrtimeout(wq, condition, timeout) \ ({ \ long __ret = 0; \ + might_sleep(); \ if (!(condition)) \ __ret = __wait_event_hrtimeout(wq, condition, timeout, \ TASK_INTERRUPTIBLE); \ @@ -463,12 +516,27 @@ do { \ #define wait_event_interruptible_exclusive(wq, condition) \ ({ \ int __ret = 0; \ + might_sleep(); \ if (!(condition)) \ __ret = __wait_event_interruptible_exclusive(wq, condition);\ __ret; \ }) +#define __wait_event_freezable_exclusive(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ + schedule(); try_to_freeze()) + +#define wait_event_freezable_exclusive(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_freezable_exclusive(wq, condition);\ + __ret; \ +}) + + #define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \ ({ \ int __ret = 0; \ @@ -637,6 +705,7 @@ do { \ #define wait_event_killable(wq, condition) \ ({ \ int __ret = 0; \ + might_sleep(); \ if (!(condition)) \ __ret = __wait_event_killable(wq, condition); \ __ret; \ @@ -830,6 +899,8 @@ void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int sta long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state); void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key); +long wait_woken(wait_queue_t *wait, unsigned mode, long timeout); +int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); @@ -886,6 +957,7 @@ extern int bit_wait_io_timeout(struct wait_bit_key *); static inline int wait_on_bit(void *word, int bit, unsigned mode) { + might_sleep(); if (!test_bit(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, @@ -910,6 +982,7 @@ wait_on_bit(void *word, int bit, unsigned mode) static inline int wait_on_bit_io(void *word, int bit, unsigned mode) { + might_sleep(); if (!test_bit(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, @@ -936,6 +1009,7 @@ wait_on_bit_io(void *word, int bit, unsigned mode) static inline int wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) { + might_sleep(); if (!test_bit(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); @@ -963,6 +1037,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode static inline int wait_on_bit_lock(void *word, int bit, unsigned mode) { + might_sleep(); if (!test_and_set_bit(bit, word)) return 0; return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode); @@ -986,6 +1061,7 @@ wait_on_bit_lock(void *word, int bit, unsigned mode) static inline int wait_on_bit_lock_io(void *word, int bit, unsigned mode) { + might_sleep(); if (!test_and_set_bit(bit, word)) return 0; return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode); @@ -1011,6 +1087,7 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode) static inline int wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode) { + might_sleep(); if (!test_and_set_bit(bit, word)) return 0; return out_of_line_wait_on_bit_lock(word, bit, action, mode); @@ -1029,6 +1106,7 @@ wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned static inline int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) { + might_sleep(); if (atomic_read(val) == 0) return 0; return out_of_line_wait_on_atomic_t(val, action, mode); diff --git a/include/net/sock.h b/include/net/sock.h index 7db3db112baa..e6f235ebf6c9 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -897,6 +897,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) if (!__rc) { \ *(__timeo) = schedule_timeout(*(__timeo)); \ } \ + sched_annotate_sleep(); \ lock_sock(__sk); \ __rc = __condition; \ __rc; \ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 0a68d5ae584e..30fedaf3e56a 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -97,16 +97,19 @@ static inline long __trace_sched_switch_state(struct task_struct *p) long state = p->state; #ifdef CONFIG_PREEMPT +#ifdef CONFIG_SCHED_DEBUG + BUG_ON(p != current); +#endif /* CONFIG_SCHED_DEBUG */ /* * For all intents and purposes a preempted task is a running task. */ - if (task_preempt_count(p) & PREEMPT_ACTIVE) + if (preempt_count() & PREEMPT_ACTIVE) state = TASK_RUNNING | TASK_STATE_MAX; -#endif +#endif /* CONFIG_PREEMPT */ return state; } -#endif +#endif /* CREATE_TRACE_POINTS */ /* * Tracepoint for task switches, performed by the scheduler: diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h index b932be9f5c5b..cc89ddefa926 100644 --- a/include/uapi/linux/sched.h +++ b/include/uapi/linux/sched.h @@ -23,8 +23,8 @@ #define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */ /* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state) and is now available for re-use. */ -#define CLONE_NEWUTS 0x04000000 /* New utsname group? */ -#define CLONE_NEWIPC 0x08000000 /* New ipcs */ +#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */ +#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */ #define CLONE_NEWUSER 0x10000000 /* New user namespace */ #define CLONE_NEWPID 0x20000000 /* New pid namespace */ #define CLONE_NEWNET 0x40000000 /* New network namespace */ |