From ec12cb7f31e28854efae7dd6f9544e0a66379040 Mon Sep 17 00:00:00 2001
From: Paul Turner <pjt@google.com>
Date: Thu, 21 Jul 2011 09:43:30 -0700
Subject: sched: Accumulate per-cfs_rq cpu usage and charge against bandwidth

Account bandwidth usage on the cfs_rq level versus the task_groups to which
they belong.  Whether we are tracking bandwidth on a given cfs_rq is maintained
under cfs_rq->runtime_enabled.

cfs_rq's which belong to a bandwidth constrained task_group have their runtime
accounted via the update_curr() path, which withdraws bandwidth from the global
pool as desired.  Updates involving the global pool are currently protected
under cfs_bandwidth->lock, local runtime is protected by rq->lock.

This patch only assigns and tracks quota, no action is taken in the case that
cfs_rq->runtime_used exceeds cfs_rq->runtime_assigned.

Signed-off-by: Paul Turner <pjt@google.com>
Signed-off-by: Nikhil Rao <ncrao@google.com>
Signed-off-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110721184757.179386821@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4ac2c0578e0f..bc6f5f2e24fa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2040,6 +2040,10 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 
+#ifdef CONFIG_CFS_BANDWIDTH
+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
+#endif
+
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
 extern void rt_mutex_setprio(struct task_struct *p, int prio);
-- 
cgit v1.2.3-58-ga151


From 557ab425429a5123d37f412ce3e6d6137cb621f8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 16 Sep 2011 11:16:43 +0200
Subject: sched, tracing: Show PREEMPT_ACTIVE state in trace_sched_switch

We had need to see the difference between scheduling a runnable task and
a runnable task being involuntarily preempted.

No app should rely on the old string output (the binary
trace event record format is not changed).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1316164603.10174.11.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/sched.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index f6334782a593..959ff18b63b6 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -100,7 +100,7 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
 	 * For all intents and purposes a preempted task is a running task.
 	 */
 	if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
-		state = TASK_RUNNING;
+		state = TASK_RUNNING | TASK_STATE_MAX;
 #endif
 
 	return state;
@@ -137,13 +137,14 @@ TRACE_EVENT(sched_switch,
 		__entry->next_prio	= next->prio;
 	),
 
-	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
+	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
-		__entry->prev_state ?
-		  __print_flags(__entry->prev_state, "|",
+		__entry->prev_state & (TASK_STATE_MAX-1) ?
+		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
 				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
 				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
 				{ 128, "W" }) : "R",
+		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
 
-- 
cgit v1.2.3-58-ga151


From 1230db8e1543c0471dd165727d34647ab098cc1e Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 8 Sep 2011 14:00:42 +0800
Subject: llist: Make some llist functions inline

Because llist code will be used in performance critical scheduler
code path, make llist_add() and llist_del_all() inline to avoid
function calling overhead and related 'glue' overhead.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1315461646-1379-2-git-send-email-ying.huang@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 drivers/acpi/apei/Kconfig |  1 -
 include/linux/llist.h     | 64 ++++++++++++++++++++++++++++++++++++++++++-----
 lib/Kconfig               |  3 ---
 lib/Makefile              |  4 +--
 lib/llist.c               | 40 -----------------------------
 5 files changed, 59 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index e3f47872ec22..f0c1ce95a0ec 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -14,7 +14,6 @@ config ACPI_APEI_GHES
 	depends on ACPI_APEI && X86
 	select ACPI_HED
 	select IRQ_WORK
-	select LLIST
 	select GENERIC_ALLOCATOR
 	help
 	  Generic Hardware Error Source provides a way to report
diff --git a/include/linux/llist.h b/include/linux/llist.h
index aa0c8b5b3cd0..3eccdfd66096 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -37,8 +37,28 @@
  * architectures that don't have NMI-safe cmpxchg implementation, the
  * list can NOT be used in NMI handler.  So code uses the list in NMI
  * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ *
+ * Copyright 2010,2011 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/kernel.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+
 struct llist_head {
 	struct llist_node *first;
 };
@@ -113,14 +133,46 @@ static inline void init_llist_head(struct llist_head *list)
  * test whether the list is empty without deleting something from the
  * list.
  */
-static inline int llist_empty(const struct llist_head *head)
+static inline bool llist_empty(const struct llist_head *head)
 {
 	return ACCESS_ONCE(head->first) == NULL;
 }
 
-void llist_add(struct llist_node *new, struct llist_head *head);
-void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
-		     struct llist_head *head);
-struct llist_node *llist_del_first(struct llist_head *head);
-struct llist_node *llist_del_all(struct llist_head *head);
+/**
+ * llist_add - add a new entry
+ * @new:	new entry to be added
+ * @head:	the head for your lock-less list
+ */
+static inline void llist_add(struct llist_node *new, struct llist_head *head)
+{
+	struct llist_node *entry, *old_entry;
+
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
+
+	entry = head->first;
+	do {
+		old_entry = entry;
+		new->next = entry;
+		cpu_relax();
+	} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
+}
+
+/**
+ * llist_del_all - delete all entries from lock-less list
+ * @head:	the head of lock-less list to delete all entries
+ *
+ * If list is empty, return NULL, otherwise, delete all entries and
+ * return the pointer to the first entry.  The order of entries
+ * deleted is from the newest to the oldest added one.
+ */
+static inline struct llist_node *llist_del_all(struct llist_head *head)
+{
+#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+	BUG_ON(in_nmi());
+#endif
+
+	return xchg(&head->first, NULL);
+}
 #endif /* LLIST_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 6c695ff9caba..32f3e5ae2be5 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -276,7 +276,4 @@ config CORDIC
 	  so its calculations are in fixed point. Modules can select this
 	  when they require this function. Module will be called cordic.
 
-config LLIST
-	bool
-
 endmenu
diff --git a/lib/Makefile b/lib/Makefile
index 3f5bc6d903e0..a4da283f5dc0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,7 @@ lib-y	+= kobject.o kref.o klist.o
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
-	 bsearch.o find_last_bit.o find_next_bit.o
+	 bsearch.o find_last_bit.o find_next_bit.o llist.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 
@@ -115,8 +115,6 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
 obj-$(CONFIG_CORDIC) += cordic.o
 
-obj-$(CONFIG_LLIST) += llist.o
-
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/llist.c b/lib/llist.c
index da445724fa1f..3e3fa9139c41 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -29,28 +29,6 @@
 
 #include <asm/system.h>
 
-/**
- * llist_add - add a new entry
- * @new:	new entry to be added
- * @head:	the head for your lock-less list
- */
-void llist_add(struct llist_node *new, struct llist_head *head)
-{
-	struct llist_node *entry, *old_entry;
-
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
-	entry = head->first;
-	do {
-		old_entry = entry;
-		new->next = entry;
-		cpu_relax();
-	} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
-}
-EXPORT_SYMBOL_GPL(llist_add);
-
 /**
  * llist_add_batch - add several linked entries in batch
  * @new_first:	first entry in batch to be added
@@ -109,21 +87,3 @@ struct llist_node *llist_del_first(struct llist_head *head)
 	return entry;
 }
 EXPORT_SYMBOL_GPL(llist_del_first);
-
-/**
- * llist_del_all - delete all entries from lock-less list
- * @head:	the head of lock-less list to delete all entries
- *
- * If list is empty, return NULL, otherwise, delete all entries and
- * return the pointer to the first entry.  The order of entries
- * deleted is from the newest to the oldest added one.
- */
-struct llist_node *llist_del_all(struct llist_head *head)
-{
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
-	return xchg(&head->first, NULL);
-}
-EXPORT_SYMBOL_GPL(llist_del_all);
-- 
cgit v1.2.3-58-ga151


From 2c30245c65e8ebc3080b75ce65572ab8140bad0b Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 4 Oct 2011 12:43:11 +0200
Subject: llist: Remove the platform-dependent NMI checks

Remove the nmi() checks spread around the code. in_nmi() is not available
on every architecture and it's a pretty obscure and ugly check in any case.

Cc: Huang Ying <ying.huang@intel.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1315461646-1379-3-git-send-email-ying.huang@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/llist.h | 12 ++----------
 lib/llist.c           | 12 ++----------
 2 files changed, 4 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index 3eccdfd66096..65fca1cbf514 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -35,8 +35,8 @@
  *
  * The basic atomic operation of this list is cmpxchg on long.  On
  * architectures that don't have NMI-safe cmpxchg implementation, the
- * list can NOT be used in NMI handler.  So code uses the list in NMI
- * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ * list can NOT be used in NMI handlers.  So code that uses the list in
+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
  *
  * Copyright 2010,2011 Intel Corp.
  *   Author: Huang Ying <ying.huang@intel.com>
@@ -147,10 +147,6 @@ static inline void llist_add(struct llist_node *new, struct llist_head *head)
 {
 	struct llist_node *entry, *old_entry;
 
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
 	entry = head->first;
 	do {
 		old_entry = entry;
@@ -169,10 +165,6 @@ static inline void llist_add(struct llist_node *new, struct llist_head *head)
  */
 static inline struct llist_node *llist_del_all(struct llist_head *head)
 {
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
 	return xchg(&head->first, NULL);
 }
 #endif /* LLIST_H */
diff --git a/lib/llist.c b/lib/llist.c
index 3e3fa9139c41..b445f2c8596a 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -3,8 +3,8 @@
  *
  * The basic atomic operation of this list is cmpxchg on long.  On
  * architectures that don't have NMI-safe cmpxchg implementation, the
- * list can NOT be used in NMI handler.  So code uses the list in NMI
- * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
+ * list can NOT be used in NMI handlers.  So code that uses the list in
+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
  *
  * Copyright 2010,2011 Intel Corp.
  *   Author: Huang Ying <ying.huang@intel.com>
@@ -40,10 +40,6 @@ void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
 {
 	struct llist_node *entry, *old_entry;
 
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
 	entry = head->first;
 	do {
 		old_entry = entry;
@@ -71,10 +67,6 @@ struct llist_node *llist_del_first(struct llist_head *head)
 {
 	struct llist_node *entry, *old_entry, *next;
 
-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
-	BUG_ON(in_nmi());
-#endif
-
 	entry = head->first;
 	do {
 		if (entry == NULL)
-- 
cgit v1.2.3-58-ga151


From a3127336b71f6833d1483c856dce91fe558dc3a9 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 8 Sep 2011 14:00:44 +0800
Subject: llist: Move cpu_relax() to after the cmpxchg()

If in llist_add()/etc. functions the first cmpxchg() call succeeds, it is
not necessary to use cpu_relax() before the cmpxchg(). So cpu_relax() in
a busy loop involving cmpxchg() should go after cmpxchg() instead of before
that.

This patch fixes this for all involved llist functions.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1315461646-1379-4-git-send-email-ying.huang@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/llist.h |  7 +++++--
 lib/llist.c           | 14 ++++++++++----
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index 65fca1cbf514..ca91875286bf 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -148,11 +148,14 @@ static inline void llist_add(struct llist_node *new, struct llist_head *head)
 	struct llist_node *entry, *old_entry;
 
 	entry = head->first;
-	do {
+	for (;;) {
 		old_entry = entry;
 		new->next = entry;
+		entry = cmpxchg(&head->first, old_entry, new);
+		if (entry == old_entry)
+			break;
 		cpu_relax();
-	} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
+	}
 }
 
 /**
diff --git a/lib/llist.c b/lib/llist.c
index b445f2c8596a..6c69f1d14c4b 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -41,11 +41,14 @@ void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
 	struct llist_node *entry, *old_entry;
 
 	entry = head->first;
-	do {
+	for (;;) {
 		old_entry = entry;
 		new_last->next = entry;
+		entry = cmpxchg(&head->first, old_entry, new_first);
+		if (entry == old_entry)
+			break;
 		cpu_relax();
-	} while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
+	}
 }
 EXPORT_SYMBOL_GPL(llist_add_batch);
 
@@ -68,13 +71,16 @@ struct llist_node *llist_del_first(struct llist_head *head)
 	struct llist_node *entry, *old_entry, *next;
 
 	entry = head->first;
-	do {
+	for (;;) {
 		if (entry == NULL)
 			return NULL;
 		old_entry = entry;
 		next = entry->next;
+		entry = cmpxchg(&head->first, old_entry, next);
+		if (entry == old_entry)
+			break;
 		cpu_relax();
-	} while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
+	}
 
 	return entry;
 }
-- 
cgit v1.2.3-58-ga151


From 781f7fd916fc77a862e20063ed3aeedf173234f9 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 8 Sep 2011 14:00:45 +0800
Subject: llist: Return whether list is empty before adding in llist_add()

Extend the llist_add*() functions to return a success indicator, this
allows us in the scheduler code to send an IPI if the queue was empty.

( There's no effect on existing users, because the list_add_xxx() functions
  are inline, thus this will be optimized out by the compiler if not used
  by callers. )

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1315461646-1379-5-git-send-email-ying.huang@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/llist.h | 6 +++++-
 lib/llist.c           | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index ca91875286bf..27bbdf5ddf82 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -142,8 +142,10 @@ static inline bool llist_empty(const struct llist_head *head)
  * llist_add - add a new entry
  * @new:	new entry to be added
  * @head:	the head for your lock-less list
+ *
+ * Return whether list is empty before adding.
  */
-static inline void llist_add(struct llist_node *new, struct llist_head *head)
+static inline bool llist_add(struct llist_node *new, struct llist_head *head)
 {
 	struct llist_node *entry, *old_entry;
 
@@ -156,6 +158,8 @@ static inline void llist_add(struct llist_node *new, struct llist_head *head)
 			break;
 		cpu_relax();
 	}
+
+	return old_entry == NULL;
 }
 
 /**
diff --git a/lib/llist.c b/lib/llist.c
index 6c69f1d14c4b..878985c4d19d 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -34,8 +34,10 @@
  * @new_first:	first entry in batch to be added
  * @new_last:	last entry in batch to be added
  * @head:	the head for your lock-less list
+ *
+ * Return whether list is empty before adding.
  */
-void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
+bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
 		     struct llist_head *head)
 {
 	struct llist_node *entry, *old_entry;
@@ -49,6 +51,8 @@ void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
 			break;
 		cpu_relax();
 	}
+
+	return old_entry == NULL;
 }
 EXPORT_SYMBOL_GPL(llist_add_batch);
 
-- 
cgit v1.2.3-58-ga151


From 38aaf8090d34b623b7919d8c933f6e938c9bf44b Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 8 Sep 2011 14:00:46 +0800
Subject: irq_work: Use llist in the struct irq_work logic

Use llist in irq_work instead of the lock-less linked list
implementation in irq_work to avoid the code duplication.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1315461646-1379-6-git-send-email-ying.huang@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq_work.h | 15 ++++----
 kernel/irq_work.c        | 91 ++++++++++++++++++------------------------------
 2 files changed, 42 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 4fa09d4d0b71..6a9e8f5399e2 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -1,20 +1,23 @@
 #ifndef _LINUX_IRQ_WORK_H
 #define _LINUX_IRQ_WORK_H
 
+#include <linux/llist.h>
+
 struct irq_work {
-	struct irq_work *next;
+	unsigned long flags;
+	struct llist_node llnode;
 	void (*func)(struct irq_work *);
 };
 
 static inline
-void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
+void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
 {
-	entry->next = NULL;
-	entry->func = func;
+	work->flags = 0;
+	work->func = func;
 }
 
-bool irq_work_queue(struct irq_work *entry);
+bool irq_work_queue(struct irq_work *work);
 void irq_work_run(void);
-void irq_work_sync(struct irq_work *entry);
+void irq_work_sync(struct irq_work *work);
 
 #endif /* _LINUX_IRQ_WORK_H */
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index c58fa7da8aef..6f0a4310defd 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -17,54 +17,34 @@
  * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
  * pending   next, 3 -> {busy}          : queued, pending callback
  * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
- *
- * We use the lower two bits of the next pointer to keep PENDING and BUSY
- * flags.
  */
 
 #define IRQ_WORK_PENDING	1UL
 #define IRQ_WORK_BUSY		2UL
 #define IRQ_WORK_FLAGS		3UL
 
-static inline bool irq_work_is_set(struct irq_work *entry, int flags)
-{
-	return (unsigned long)entry->next & flags;
-}
-
-static inline struct irq_work *irq_work_next(struct irq_work *entry)
-{
-	unsigned long next = (unsigned long)entry->next;
-	next &= ~IRQ_WORK_FLAGS;
-	return (struct irq_work *)next;
-}
-
-static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
-{
-	unsigned long next = (unsigned long)entry;
-	next |= flags;
-	return (struct irq_work *)next;
-}
-
-static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
+static DEFINE_PER_CPU(struct llist_head, irq_work_list);
 
 /*
  * Claim the entry so that no one else will poke at it.
  */
-static bool irq_work_claim(struct irq_work *entry)
+static bool irq_work_claim(struct irq_work *work)
 {
-	struct irq_work *next, *nflags;
+	unsigned long flags, nflags;
 
-	do {
-		next = entry->next;
-		if ((unsigned long)next & IRQ_WORK_PENDING)
+	for (;;) {
+		flags = work->flags;
+		if (flags & IRQ_WORK_PENDING)
 			return false;
-		nflags = next_flags(next, IRQ_WORK_FLAGS);
-	} while (cmpxchg(&entry->next, next, nflags) != next);
+		nflags = flags | IRQ_WORK_FLAGS;
+		if (cmpxchg(&work->flags, flags, nflags) == flags)
+			break;
+		cpu_relax();
+	}
 
 	return true;
 }
 
-
 void __weak arch_irq_work_raise(void)
 {
 	/*
@@ -75,20 +55,15 @@ void __weak arch_irq_work_raise(void)
 /*
  * Queue the entry and raise the IPI if needed.
  */
-static void __irq_work_queue(struct irq_work *entry)
+static void __irq_work_queue(struct irq_work *work)
 {
-	struct irq_work *next;
+	bool empty;
 
 	preempt_disable();
 
-	do {
-		next = __this_cpu_read(irq_work_list);
-		/* Can assign non-atomic because we keep the flags set. */
-		entry->next = next_flags(next, IRQ_WORK_FLAGS);
-	} while (this_cpu_cmpxchg(irq_work_list, next, entry) != next);
-
+	empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
 	/* The list was empty, raise self-interrupt to start processing. */
-	if (!irq_work_next(entry))
+	if (empty)
 		arch_irq_work_raise();
 
 	preempt_enable();
@@ -100,16 +75,16 @@ static void __irq_work_queue(struct irq_work *entry)
  *
  * Can be re-enqueued while the callback is still in progress.
  */
-bool irq_work_queue(struct irq_work *entry)
+bool irq_work_queue(struct irq_work *work)
 {
-	if (!irq_work_claim(entry)) {
+	if (!irq_work_claim(work)) {
 		/*
 		 * Already enqueued, can't do!
 		 */
 		return false;
 	}
 
-	__irq_work_queue(entry);
+	__irq_work_queue(work);
 	return true;
 }
 EXPORT_SYMBOL_GPL(irq_work_queue);
@@ -120,34 +95,34 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
  */
 void irq_work_run(void)
 {
-	struct irq_work *list;
+	struct irq_work *work;
+	struct llist_head *this_list;
+	struct llist_node *llnode;
 
-	if (this_cpu_read(irq_work_list) == NULL)
+	this_list = &__get_cpu_var(irq_work_list);
+	if (llist_empty(this_list))
 		return;
 
 	BUG_ON(!in_irq());
 	BUG_ON(!irqs_disabled());
 
-	list = this_cpu_xchg(irq_work_list, NULL);
-
-	while (list != NULL) {
-		struct irq_work *entry = list;
+	llnode = llist_del_all(this_list);
+	while (llnode != NULL) {
+		work = llist_entry(llnode, struct irq_work, llnode);
 
-		list = irq_work_next(list);
+		llnode = llnode->next;
 
 		/*
-		 * Clear the PENDING bit, after this point the @entry
+		 * Clear the PENDING bit, after this point the @work
 		 * can be re-used.
 		 */
-		entry->next = next_flags(NULL, IRQ_WORK_BUSY);
-		entry->func(entry);
+		work->flags = IRQ_WORK_BUSY;
+		work->func(work);
 		/*
 		 * Clear the BUSY bit and return to the free state if
 		 * no-one else claimed it meanwhile.
 		 */
-		(void)cmpxchg(&entry->next,
-			      next_flags(NULL, IRQ_WORK_BUSY),
-			      NULL);
+		(void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0);
 	}
 }
 EXPORT_SYMBOL_GPL(irq_work_run);
@@ -156,11 +131,11 @@ EXPORT_SYMBOL_GPL(irq_work_run);
  * Synchronize against the irq_work @entry, ensures the entry is not
  * currently in use.
  */
-void irq_work_sync(struct irq_work *entry)
+void irq_work_sync(struct irq_work *work)
 {
 	WARN_ON_ONCE(irqs_disabled());
 
-	while (irq_work_is_set(entry, IRQ_WORK_BUSY))
+	while (work->flags & IRQ_WORK_BUSY)
 		cpu_relax();
 }
 EXPORT_SYMBOL_GPL(irq_work_sync);
-- 
cgit v1.2.3-58-ga151


From 924f8f5af31423529cc3940cb2ae9fee736b7517 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 12 Sep 2011 13:12:28 +0200
Subject: llist: Add llist_next()

So we don't have to expose the struct list_node member.

Cc: Huang Ying <ying.huang@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1315836348.26517.41.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/llist.h | 5 +++++
 kernel/irq_work.c     | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index 27bbdf5ddf82..e2e96d04ee48 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -138,6 +138,11 @@ static inline bool llist_empty(const struct llist_head *head)
 	return ACCESS_ONCE(head->first) == NULL;
 }
 
+static inline struct llist_node *llist_next(struct llist_node *node)
+{
+	return node->next;
+}
+
 /**
  * llist_add - add a new entry
  * @new:	new entry to be added
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 6f0a4310defd..0e2cde4f380b 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -110,7 +110,7 @@ void irq_work_run(void)
 	while (llnode != NULL) {
 		work = llist_entry(llnode, struct irq_work, llnode);
 
-		llnode = llnode->next;
+		llnode = llist_next(llnode);
 
 		/*
 		 * Clear the PENDING bit, after this point the @work
-- 
cgit v1.2.3-58-ga151


From fa14ff4accfb24e59d2473f3d864d6648d80563b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 12 Sep 2011 13:06:17 +0200
Subject: sched: Convert to struct llist

Use the generic llist primitives.

We had a private lockless list implementation in the scheduler in the wake-list
code, now that we have a generic llist implementation that provides all required
operations, switch to it.

This patch is not expected to change any behavior.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1315836353.26517.42.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  3 ++-
 kernel/sched.c        | 48 ++++++++++--------------------------------------
 2 files changed, 12 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9fda2888a6ab..fc3e8911818a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -90,6 +90,7 @@ struct sched_param {
 #include <linux/task_io_accounting.h>
 #include <linux/latencytop.h>
 #include <linux/cred.h>
+#include <linux/llist.h>
 
 #include <asm/processor.h>
 
@@ -1225,7 +1226,7 @@ struct task_struct {
 	unsigned int ptrace;
 
 #ifdef CONFIG_SMP
-	struct task_struct *wake_entry;
+	struct llist_node wake_entry;
 	int on_cpu;
 #endif
 	int on_rq;
diff --git a/kernel/sched.c b/kernel/sched.c
index c5cf15e1eb57..1874c7418319 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -702,7 +702,7 @@ struct rq {
 #endif
 
 #ifdef CONFIG_SMP
-	struct task_struct *wake_list;
+	struct llist_head wake_list;
 #endif
 };
 
@@ -2698,42 +2698,26 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
 }
 
 #ifdef CONFIG_SMP
-static void sched_ttwu_do_pending(struct task_struct *list)
+static void sched_ttwu_pending(void)
 {
 	struct rq *rq = this_rq();
+	struct llist_node *llist = llist_del_all(&rq->wake_list);
+	struct task_struct *p;
 
 	raw_spin_lock(&rq->lock);
 
-	while (list) {
-		struct task_struct *p = list;
-		list = list->wake_entry;
+	while (llist) {
+		p = llist_entry(llist, struct task_struct, wake_entry);
+		llist = llist_next(llist);
 		ttwu_do_activate(rq, p, 0);
 	}
 
 	raw_spin_unlock(&rq->lock);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-
-static void sched_ttwu_pending(void)
-{
-	struct rq *rq = this_rq();
-	struct task_struct *list = xchg(&rq->wake_list, NULL);
-
-	if (!list)
-		return;
-
-	sched_ttwu_do_pending(list);
-}
-
-#endif /* CONFIG_HOTPLUG_CPU */
-
 void scheduler_ipi(void)
 {
-	struct rq *rq = this_rq();
-	struct task_struct *list = xchg(&rq->wake_list, NULL);
-
-	if (!list)
+	if (llist_empty(&this_rq()->wake_list))
 		return;
 
 	/*
@@ -2750,25 +2734,13 @@ void scheduler_ipi(void)
 	 * somewhat pessimize the simple resched case.
 	 */
 	irq_enter();
-	sched_ttwu_do_pending(list);
+	sched_ttwu_pending();
 	irq_exit();
 }
 
 static void ttwu_queue_remote(struct task_struct *p, int cpu)
 {
-	struct rq *rq = cpu_rq(cpu);
-	struct task_struct *next = rq->wake_list;
-
-	for (;;) {
-		struct task_struct *old = next;
-
-		p->wake_entry = next;
-		next = cmpxchg(&rq->wake_list, old, p);
-		if (next == old)
-			break;
-	}
-
-	if (!next)
+	if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list))
 		smp_send_reschedule(cpu);
 }
 
-- 
cgit v1.2.3-58-ga151


From f0f1d32f931b705c4ee5dd374074d34edf3eae14 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 12 Sep 2011 15:50:49 +0200
Subject: llist: Remove cpu_relax() usage in cmpxchg loops

Initial benchmarks show they're a net loss:

 $ for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor ; do echo performance > $i; done
 $ echo 4096 32000 64 128 > /proc/sys/kernel/sem
 $ ./sembench -t 2048 -w 1900 -o 0

Pre:

 run time 30 seconds 778936 worker burns per second
 run time 30 seconds 912190 worker burns per second
 run time 30 seconds 817506 worker burns per second
 run time 30 seconds 830870 worker burns per second
 run time 30 seconds 845056 worker burns per second

Post:

 run time 30 seconds 905920 worker burns per second
 run time 30 seconds 849046 worker burns per second
 run time 30 seconds 886286 worker burns per second
 run time 30 seconds 822320 worker burns per second
 run time 30 seconds 900283 worker burns per second

So about 4% faster. (!)

cpu_relax() stalls the pipeline, therefore, when used in a tight loop
it has the following benefits:

 - allows SMT siblings to have a go;
 - reduces pressure on the CPU interconnect.

However, cmpxchg loops are unfair and thus have unbounded completion
time, therefore we should avoid getting in such heavily contended
situations where the above benefits make any difference.

A typical cmpxchg loop should not go round more than a handfull of
times at worst, therefore adding extra delays just slows things down.

Since the llist primitives are new, there aren't any bad users yet,
and we should avoid growing them. Heavily contended sites should
generally be better off using the ticket locks for serialization since
they provide bounded completion times (fifo-fair over the cpus).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1315836358.26517.43.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/llist.h | 1 -
 lib/llist.c           | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index e2e96d04ee48..837fb4ae66fb 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -161,7 +161,6 @@ static inline bool llist_add(struct llist_node *new, struct llist_head *head)
 		entry = cmpxchg(&head->first, old_entry, new);
 		if (entry == old_entry)
 			break;
-		cpu_relax();
 	}
 
 	return old_entry == NULL;
diff --git a/lib/llist.c b/lib/llist.c
index 878985c4d19d..700cff77a387 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -49,7 +49,6 @@ bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
 		entry = cmpxchg(&head->first, old_entry, new_first);
 		if (entry == old_entry)
 			break;
-		cpu_relax();
 	}
 
 	return old_entry == NULL;
@@ -83,7 +82,6 @@ struct llist_node *llist_del_first(struct llist_head *head)
 		entry = cmpxchg(&head->first, old_entry, next);
 		if (entry == old_entry)
 			break;
-		cpu_relax();
 	}
 
 	return entry;
-- 
cgit v1.2.3-58-ga151


From 540f41edc15473ca3b2876de72646546ae101374 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 5 Oct 2011 17:25:28 +1100
Subject: llist: Add back llist_add_batch() and llist_del_first() prototypes

Commit 1230db8e1543 ("llist: Make some llist functions inline")
has deleted the definitions, causing problems for (not upstream yet)
code that tries to make use of them.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: David Miller <davem@davemloft.net>
Link: http://lkml.kernel.org/r/20111005172528.0d0a8afc65acef7ace22a24e@canb.auug.org.au
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/llist.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index 837fb4ae66fb..7287734e08d1 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -178,4 +178,10 @@ static inline struct llist_node *llist_del_all(struct llist_head *head)
 {
 	return xchg(&head->first, NULL);
 }
+
+extern bool llist_add_batch(struct llist_node *new_first,
+			    struct llist_node *new_last,
+			    struct llist_head *head);
+extern struct llist_node *llist_del_first(struct llist_head *head);
+
 #endif /* LLIST_H */
-- 
cgit v1.2.3-58-ga151