From 34f2c0ac111aaf090c7d2432dab532640870733a Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sun, 1 Apr 2012 16:38:46 -0400
Subject: tile: fix multiple build failures from system.h dismantle

Commit bd119c69239322caafdb64517a806037d0d0c70a

    "Disintegrate asm/system.h for Tile"

created the asm/switch_to.h file, but did not add an include
of it to all its users.

Also, commit b4816afa3986704d1404fc48e931da5135820472

        "Move the asm-generic/system.h xchg() implementation to asm-generic/cmpxchg.h"

introduced the concept of asm/cmpxchg.h but the tile arch
never got one.  Fork the cmpxchg content out of the asm/atomic.h
file to create one.

Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/atomic.h  | 50 ++--------------------------
 arch/tile/include/asm/cmpxchg.h | 73 +++++++++++++++++++++++++++++++++++++++++
 arch/tile/kernel/process.c      |  1 +
 arch/tile/kernel/stack.c        |  1 +
 4 files changed, 77 insertions(+), 48 deletions(-)
 create mode 100644 arch/tile/include/asm/cmpxchg.h

(limited to 'arch')

diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index bb696da5d7cd..f2461429a4a4 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -17,6 +17,8 @@
 #ifndef _ASM_TILE_ATOMIC_H
 #define _ASM_TILE_ATOMIC_H
 
+#include <asm/cmpxchg.h>
+
 #ifndef __ASSEMBLY__
 
 #include <linux/compiler.h>
@@ -121,54 +123,6 @@ static inline int atomic_read(const atomic_t *v)
  */
 #define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
 
-/* Nonexistent functions intended to cause link errors. */
-extern unsigned long __xchg_called_with_bad_pointer(void);
-extern unsigned long __cmpxchg_called_with_bad_pointer(void);
-
-#define xchg(ptr, x)							\
-	({								\
-		typeof(*(ptr)) __x;					\
-		switch (sizeof(*(ptr))) {				\
-		case 4:							\
-			__x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \
-				(atomic_t *)(ptr),			\
-				(u32)(typeof((x)-(x)))(x));		\
-			break;						\
-		case 8:							\
-			__x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \
-				(atomic64_t *)(ptr),			\
-				(u64)(typeof((x)-(x)))(x));		\
-			break;						\
-		default:						\
-			__xchg_called_with_bad_pointer();		\
-		}							\
-		__x;							\
-	})
-
-#define cmpxchg(ptr, o, n)						\
-	({								\
-		typeof(*(ptr)) __x;					\
-		switch (sizeof(*(ptr))) {				\
-		case 4:							\
-			__x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \
-				(atomic_t *)(ptr),			\
-				(u32)(typeof((o)-(o)))(o),		\
-				(u32)(typeof((n)-(n)))(n));		\
-			break;						\
-		case 8:							\
-			__x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \
-				(atomic64_t *)(ptr),			\
-				(u64)(typeof((o)-(o)))(o),		\
-				(u64)(typeof((n)-(n)))(n));		\
-			break;						\
-		default:						\
-			__cmpxchg_called_with_bad_pointer();		\
-		}							\
-		__x;							\
-	})
-
-#define tas(ptr) (xchg((ptr), 1))
-
 #endif /* __ASSEMBLY__ */
 
 #ifndef __tilegx__
diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h
new file mode 100644
index 000000000000..276f067e3640
--- /dev/null
+++ b/arch/tile/include/asm/cmpxchg.h
@@ -0,0 +1,73 @@
+/*
+ * cmpxchg.h -- forked from asm/atomic.h with this copyright:
+ *
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ */
+
+#ifndef _ASM_TILE_CMPXCHG_H
+#define _ASM_TILE_CMPXCHG_H
+
+#ifndef __ASSEMBLY__
+
+/* Nonexistent functions intended to cause link errors. */
+extern unsigned long __xchg_called_with_bad_pointer(void);
+extern unsigned long __cmpxchg_called_with_bad_pointer(void);
+
+#define xchg(ptr, x)							\
+	({								\
+		typeof(*(ptr)) __x;					\
+		switch (sizeof(*(ptr))) {				\
+		case 4:							\
+			__x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \
+				(atomic_t *)(ptr),			\
+				(u32)(typeof((x)-(x)))(x));		\
+			break;						\
+		case 8:							\
+			__x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \
+				(atomic64_t *)(ptr),			\
+				(u64)(typeof((x)-(x)))(x));		\
+			break;						\
+		default:						\
+			__xchg_called_with_bad_pointer();		\
+		}							\
+		__x;							\
+	})
+
+#define cmpxchg(ptr, o, n)						\
+	({								\
+		typeof(*(ptr)) __x;					\
+		switch (sizeof(*(ptr))) {				\
+		case 4:							\
+			__x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \
+				(atomic_t *)(ptr),			\
+				(u32)(typeof((o)-(o)))(o),		\
+				(u32)(typeof((n)-(n)))(n));		\
+			break;						\
+		case 8:							\
+			__x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \
+				(atomic64_t *)(ptr),			\
+				(u64)(typeof((o)-(o)))(o),		\
+				(u64)(typeof((n)-(n)))(n));		\
+			break;						\
+		default:						\
+			__cmpxchg_called_with_bad_pointer();		\
+		}							\
+		__x;							\
+	})
+
+#define tas(ptr) (xchg((ptr), 1))
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_CMPXCHG_H */
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 30caecac94dc..ee01b1c683e4 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -28,6 +28,7 @@
 #include <linux/tracehook.h>
 #include <linux/signal.h>
 #include <asm/stack.h>
+#include <asm/switch_to.h>
 #include <asm/homecache.h>
 #include <asm/syscalls.h>
 #include <asm/traps.h>
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index 37ee4d037e0b..0be6b0109ce0 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -25,6 +25,7 @@
 #include <asm/page.h>
 #include <asm/tlbflush.h>
 #include <asm/ucontext.h>
+#include <asm/switch_to.h>
 #include <asm/sigframe.h>
 #include <asm/stack.h>
 #include <arch/abi.h>
-- 
cgit v1.2.3-58-ga151


From 8d6951439ef524683057251f1231df232046b6b6 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 27 Mar 2012 13:47:57 -0400
Subject: arch/tile/Kconfig: remove pointless "!M386" test.

Looks like a cut and paste bug from the x86 version.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 11270ca22c0a..30413c1d1069 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -12,7 +12,7 @@ config TILE
 	select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_IRQ_SHOW
 	select SYS_HYPERVISOR
-	select ARCH_HAVE_NMI_SAFE_CMPXCHG if !M386
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
-- 
cgit v1.2.3-58-ga151


From 3d1e8a81cc81e62d13731e48c40760e60f31b0d5 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 27 Mar 2012 13:53:30 -0400
Subject: arch/tile/Kconfig: rename tile_defconfig to tilepro_defconfig

We switched to using "tilepro" for the 32-bit stuff a while ago,
but missed this one usage.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/Kconfig | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 30413c1d1069..30393aa500ec 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -69,6 +69,9 @@ config ARCH_PHYS_ADDR_T_64BIT
 config ARCH_DMA_ADDR_T_64BIT
 	def_bool y
 
+config NEED_DMA_MAP_STATE
+	def_bool y
+
 config LOCKDEP_SUPPORT
 	def_bool y
 
@@ -118,7 +121,7 @@ config 64BIT
 
 config ARCH_DEFCONFIG
 	string
-	default "arch/tile/configs/tile_defconfig" if !TILEGX
+	default "arch/tile/configs/tilepro_defconfig" if !TILEGX
 	default "arch/tile/configs/tilegx_defconfig" if TILEGX
 
 source "init/Kconfig"
-- 
cgit v1.2.3-58-ga151


From 884197f7ea93dc2faf399060dc29cb0dbbda6937 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 27 Mar 2012 13:56:04 -0400
Subject: arch/tile/Kconfig: don't specify CONFIG_PAGE_OFFSET for 64-bit builds

It's fixed at half the VA space and there's no point in configuring it.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 30393aa500ec..96033e2d6845 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -243,6 +243,7 @@ endchoice
 
 config PAGE_OFFSET
 	hex
+	depends on !64BIT
 	default 0xF0000000 if VMSPLIT_3_75G
 	default 0xE0000000 if VMSPLIT_3_5G
 	default 0xB0000000 if VMSPLIT_2_75G
-- 
cgit v1.2.3-58-ga151


From 327e8b6b25588ab906856a4e506b28c59422f11b Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 27 Mar 2012 14:04:57 -0400
Subject: arch/tile: fix typo in <arch/spr_def.h>

We aren't yet using this definition in the kernel, but fix it up
before someone goes looking for it.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/arch/spr_def.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h
index f548efeb2de3..d6ba449b5363 100644
--- a/arch/tile/include/arch/spr_def.h
+++ b/arch/tile/include/arch/spr_def.h
@@ -60,8 +60,8 @@
 	_concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,)
 #define SPR_IPI_EVENT_RESET_K \
 	_concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,)
-#define SPR_IPI_MASK_SET_K \
-	_concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,)
+#define SPR_IPI_EVENT_SET_K \
+	_concat4(SPR_IPI_EVENT_SET_, CONFIG_KERNEL_PL,,)
 #define INT_IPI_K \
 	_concat4(INT_IPI_, CONFIG_KERNEL_PL,,)
 
-- 
cgit v1.2.3-58-ga151


From 07feea877d18453bbe4ad47fe2a365eebf56a7af Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 27 Mar 2012 14:10:03 -0400
Subject: arch/tile: revert comment for atomic64_add_unless().

It still returns whether @v was not @u, not the old value,
unlike __atomic_add_unless().

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Acked-by: Arun Sharma <asharma@fb.com>
---
 arch/tile/include/asm/atomic_32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index 466dc4a39a4f..54d1da826f93 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -200,7 +200,7 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
  * @u: ...unless v is equal to u.
  *
  * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns the old value of @v.
+ * Returns non-zero if @v was not @u, and zero otherwise.
  */
 static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
 {
-- 
cgit v1.2.3-58-ga151


From 664c100bce0070148131f8d49518015476c03cae Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 27 Mar 2012 14:17:05 -0400
Subject: arch/tile: fix gcc 4.6 warnings in <asm/bitops_64.h>

Fix some signedness and variable usage warnings in change_bit()
and test_and_change_bit().

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/bitops_64.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h
index 58d021a9834f..60b87ee54fb8 100644
--- a/arch/tile/include/asm/bitops_64.h
+++ b/arch/tile/include/asm/bitops_64.h
@@ -38,10 +38,10 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
 
 static inline void change_bit(unsigned nr, volatile unsigned long *addr)
 {
-	unsigned long old, mask = (1UL << (nr % BITS_PER_LONG));
-	long guess, oldval;
+	unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+	unsigned long guess, oldval;
 	addr += nr / BITS_PER_LONG;
-	old = *addr;
+	oldval = *addr;
 	do {
 		guess = oldval;
 		oldval = atomic64_cmpxchg((atomic64_t *)addr,
@@ -85,7 +85,7 @@ static inline int test_and_change_bit(unsigned nr,
 				      volatile unsigned long *addr)
 {
 	unsigned long mask = (1UL << (nr % BITS_PER_LONG));
-	long guess, oldval = *addr;
+	unsigned long guess, oldval;
 	addr += nr / BITS_PER_LONG;
 	oldval = *addr;
 	do {
-- 
cgit v1.2.3-58-ga151


From cbe224705e573cead57c4d4bed0c91efb564a344 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Tue, 27 Mar 2012 15:21:00 -0400
Subject: arch/tile: use 0 for IRQ_RESCHEDULE instead of 1

This avoids assigning IRQ 0 to PCI devices, because we've seen that
doesn't always work well.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
index f80f8ceabc67..33cff9a3058b 100644
--- a/arch/tile/include/asm/irq.h
+++ b/arch/tile/include/asm/irq.h
@@ -21,7 +21,7 @@
 #define NR_IRQS 32
 
 /* IRQ numbers used for linux IPIs. */
-#define IRQ_RESCHEDULE 1
+#define IRQ_RESCHEDULE 0
 
 #define irq_canonicalize(irq)   (irq)
 
-- 
cgit v1.2.3-58-ga151


From b287f69676a34a9fc341de4d79a9c74e1959dec6 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 14:02:52 -0400
Subject: arch/tile: avoid false corrupt frame warning in early boot

With lockstat we can end up trying to get a backtrace before
"high_memory" is initialized, so don't worry about range testing
if it is zero.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index ee01b1c683e4..2d5ef617bb39 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -286,7 +286,7 @@ struct task_struct *validate_current(void)
 	static struct task_struct corrupt = { .comm = "<corrupt>" };
 	struct task_struct *tsk = current;
 	if (unlikely((unsigned long)tsk < PAGE_OFFSET ||
-		     (void *)tsk > high_memory ||
+		     (high_memory && (void *)tsk > high_memory) ||
 		     ((unsigned long)tsk & (__alignof__(*tsk) - 1)) != 0)) {
 		pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk, stack_pointer);
 		tsk = &corrupt;
-- 
cgit v1.2.3-58-ga151


From efb734d8ed040b053f53fd53589ed5d9c9b5cd04 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 14:05:04 -0400
Subject: arch/tile: make sure to build memcpy_user_64 without frame pointer

Add a comment explaining why this is important, and add a CFLAGS_REMOVE
clause to the Makefile to make sure it happens.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/lib/Makefile         | 1 +
 arch/tile/lib/memcpy_user_64.c | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 0c26086ecbef..985f59858234 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -7,6 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
 	strchr_$(BITS).o strlen_$(BITS).o
 
 ifeq ($(CONFIG_TILEGX),y)
+CFLAGS_REMOVE_memcpy_user_64.o = -fno-omit-frame-pointer
 lib-y += memcpy_user_64.o
 else
 lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c
index 4763b3aff1cc..37440caa7370 100644
--- a/arch/tile/lib/memcpy_user_64.c
+++ b/arch/tile/lib/memcpy_user_64.c
@@ -14,7 +14,13 @@
  * Do memcpy(), but trap and return "n" when a load or store faults.
  *
  * Note: this idiom only works when memcpy() compiles to a leaf function.
- * If "sp" is updated during memcpy, the "jrp lr" will be incorrect.
+ * Here leaf function not only means it does not have calls, but also
+ * requires no stack operations (sp, stack frame pointer) and no
+ * use of callee-saved registers, else "jrp lr" will be incorrect since
+ * unwinding stack frame is bypassed. Since memcpy() is not complex so
+ * these conditions are satisfied here, but we need to be careful when
+ * modifying this file. This is not a clean solution but is the best
+ * one so far.
  *
  * Also note that we are capturing "n" from the containing scope here.
  */
-- 
cgit v1.2.3-58-ga151


From 5f639fdcd8c186c8128c616e94a7e7b159c968ae Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 14:06:14 -0400
Subject: arch/tile: various bugs in stack backtracer

Fix a long-standing bug in the stack backtracer where we would print
garbage to the console instead of kernel function names, if the kernel
wasn't built with symbol support (e.g. mboot).

Make sure to tag every line of userspace backtrace output if we actually
have the mmap_sem, since that way if there's no tag, we know that it's
because we couldn't trylock the semaphore.

Stop doing a TLB flush and examining page tables during backtrace.
Instead, just trust that __copy_from_user_inatomic() will properly fault
and return a failure, which it should do in all cases.

Fix a latent bug where the backtracer would directly examine a signal
context in user space, rather than copying it safely to kernel memory
first.  This meant that a race with another thread could potentially
have caused a kernel panic.

Guard against unaligned sp when trying to restart backtrace at an
interrupt or signal handler point in the kernel backtracer.

Report kernel symbolic information for the call instruction rather
than for the following instruction.  We still report the actual numeric
address corresponding to the instruction after the call, for the sake
of consistency with the normal expectations for stack backtracers.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/stack.h |   1 -
 arch/tile/kernel/stack.c      | 231 ++++++++++++++++++++----------------------
 2 files changed, 112 insertions(+), 120 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index 4d97a2db932e..0e9d382a2d45 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -25,7 +25,6 @@
 struct KBacktraceIterator {
 	BacktraceIterator it;
 	struct task_struct *task;     /* task we are backtracing */
-	pte_t *pgtable;		      /* page table for user space access */
 	int end;		      /* iteration complete. */
 	int new_context;              /* new context is starting */
 	int profile;                  /* profiling, so stop on async intrpt */
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index 0be6b0109ce0..b2f44c28dda6 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -21,9 +21,10 @@
 #include <linux/stacktrace.h>
 #include <linux/uaccess.h>
 #include <linux/mmzone.h>
+#include <linux/dcache.h>
+#include <linux/fs.h>
 #include <asm/backtrace.h>
 #include <asm/page.h>
-#include <asm/tlbflush.h>
 #include <asm/ucontext.h>
 #include <asm/switch_to.h>
 #include <asm/sigframe.h>
@@ -45,72 +46,23 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, unsigned long sp)
 	return sp >= kstack_base && sp < kstack_base + THREAD_SIZE;
 }
 
-/* Is address valid for reading? */
-static int valid_address(struct KBacktraceIterator *kbt, unsigned long address)
-{
-	HV_PTE *l1_pgtable = kbt->pgtable;
-	HV_PTE *l2_pgtable;
-	unsigned long pfn;
-	HV_PTE pte;
-	struct page *page;
-
-	if (l1_pgtable == NULL)
-		return 0;	/* can't read user space in other tasks */
-
-#ifdef CONFIG_64BIT
-	/* Find the real l1_pgtable by looking in the l0_pgtable. */
-	pte = l1_pgtable[HV_L0_INDEX(address)];
-	if (!hv_pte_get_present(pte))
-		return 0;
-	pfn = hv_pte_get_pfn(pte);
-	if (pte_huge(pte)) {
-		if (!pfn_valid(pfn)) {
-			pr_err("L0 huge page has bad pfn %#lx\n", pfn);
-			return 0;
-		}
-		return hv_pte_get_present(pte) && hv_pte_get_readable(pte);
-	}
-	page = pfn_to_page(pfn);
-	BUG_ON(PageHighMem(page));  /* No HIGHMEM on 64-bit. */
-	l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn);
-#endif
-	pte = l1_pgtable[HV_L1_INDEX(address)];
-	if (!hv_pte_get_present(pte))
-		return 0;
-	pfn = hv_pte_get_pfn(pte);
-	if (pte_huge(pte)) {
-		if (!pfn_valid(pfn)) {
-			pr_err("huge page has bad pfn %#lx\n", pfn);
-			return 0;
-		}
-		return hv_pte_get_present(pte) && hv_pte_get_readable(pte);
-	}
-
-	page = pfn_to_page(pfn);
-	if (PageHighMem(page)) {
-		pr_err("L2 page table not in LOWMEM (%#llx)\n",
-		       HV_PFN_TO_CPA(pfn));
-		return 0;
-	}
-	l2_pgtable = (HV_PTE *)pfn_to_kaddr(pfn);
-	pte = l2_pgtable[HV_L2_INDEX(address)];
-	return hv_pte_get_present(pte) && hv_pte_get_readable(pte);
-}
-
 /* Callback for backtracer; basically a glorified memcpy */
 static bool read_memory_func(void *result, unsigned long address,
 			     unsigned int size, void *vkbt)
 {
 	int retval;
 	struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt;
+
+	if (address == 0)
+		return 0;
 	if (__kernel_text_address(address)) {
 		/* OK to read kernel code. */
 	} else if (address >= PAGE_OFFSET) {
 		/* We only tolerate kernel-space reads of this task's stack */
 		if (!in_kernel_stack(kbt, address))
 			return 0;
-	} else if (!valid_address(kbt, address)) {
-		return 0;	/* invalid user-space address */
+	} else if (!kbt->is_current) {
+		return 0;	/* can't read from other user address spaces */
 	}
 	pagefault_disable();
 	retval = __copy_from_user_inatomic(result,
@@ -128,6 +80,8 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
 	unsigned long sp = kbt->it.sp;
 	struct pt_regs *p;
 
+	if (sp % sizeof(long) != 0)
+		return NULL;
 	if (!in_kernel_stack(kbt, sp))
 		return NULL;
 	if (!in_kernel_stack(kbt, sp + C_ABI_SAVE_AREA_SIZE + PTREGS_SIZE-1))
@@ -170,27 +124,27 @@ static int is_sigreturn(unsigned long pc)
 }
 
 /* Return a pt_regs pointer for a valid signal handler frame */
-static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt)
+static struct pt_regs *valid_sigframe(struct KBacktraceIterator* kbt,
+				      struct rt_sigframe* kframe)
 {
 	BacktraceIterator *b = &kbt->it;
 
-	if (b->pc == VDSO_BASE) {
-		struct rt_sigframe *frame;
-		unsigned long sigframe_top =
-			b->sp + sizeof(struct rt_sigframe) - 1;
-		if (!valid_address(kbt, b->sp) ||
-		    !valid_address(kbt, sigframe_top)) {
-			if (kbt->verbose)
-				pr_err("  (odd signal: sp %#lx?)\n",
-				       (unsigned long)(b->sp));
+	if (b->pc == VDSO_BASE && b->sp < PAGE_OFFSET &&
+	    b->sp % sizeof(long) == 0) {
+		int retval;
+		pagefault_disable();
+		retval = __copy_from_user_inatomic(
+			kframe, (void __user __force *)b->sp,
+			sizeof(*kframe));
+		pagefault_enable();
+		if (retval != 0 ||
+		    (unsigned int)(kframe->info.si_signo) >= _NSIG)
 			return NULL;
-		}
-		frame = (struct rt_sigframe *)b->sp;
 		if (kbt->verbose) {
 			pr_err("  <received signal %d>\n",
-			       frame->info.si_signo);
+			       kframe->info.si_signo);
 		}
-		return (struct pt_regs *)&frame->uc.uc_mcontext;
+		return (struct pt_regs *)&kframe->uc.uc_mcontext;
 	}
 	return NULL;
 }
@@ -203,10 +157,11 @@ static int KBacktraceIterator_is_sigreturn(struct KBacktraceIterator *kbt)
 static int KBacktraceIterator_restart(struct KBacktraceIterator *kbt)
 {
 	struct pt_regs *p;
+	struct rt_sigframe kframe;
 
 	p = valid_fault_handler(kbt);
 	if (p == NULL)
-		p = valid_sigframe(kbt);
+		p = valid_sigframe(kbt, &kframe);
 	if (p == NULL)
 		return 0;
 	backtrace_init(&kbt->it, read_memory_func, kbt,
@@ -266,41 +221,19 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt,
 
 	/*
 	 * Set up callback information.  We grab the kernel stack base
-	 * so we will allow reads of that address range, and if we're
-	 * asking about the current process we grab the page table
-	 * so we can check user accesses before trying to read them.
-	 * We flush the TLB to avoid any weird skew issues.
+	 * so we will allow reads of that address range.
 	 */
-	is_current = (t == NULL);
+	is_current = (t == NULL || t == current);
 	kbt->is_current = is_current;
 	if (is_current)
 		t = validate_current();
 	kbt->task = t;
-	kbt->pgtable = NULL;
 	kbt->verbose = 0;   /* override in caller if desired */
 	kbt->profile = 0;   /* override in caller if desired */
 	kbt->end = KBT_ONGOING;
-	kbt->new_context = 0;
-	if (is_current) {
-		HV_PhysAddr pgdir_pa = hv_inquire_context().page_table;
-		if (pgdir_pa == (unsigned long)swapper_pg_dir - PAGE_OFFSET) {
-			/*
-			 * Not just an optimization: this also allows
-			 * this to work at all before va/pa mappings
-			 * are set up.
-			 */
-			kbt->pgtable = swapper_pg_dir;
-		} else {
-			struct page *page = pfn_to_page(PFN_DOWN(pgdir_pa));
-			if (!PageHighMem(page))
-				kbt->pgtable = __va(pgdir_pa);
-			else
-				pr_err("page table not in LOWMEM"
-				       " (%#llx)\n", pgdir_pa);
-		}
-		local_flush_tlb_all();
+	kbt->new_context = 1;
+	if (is_current)
 		validate_stack(regs);
-	}
 
 	if (regs == NULL) {
 		if (is_current || t->state == TASK_RUNNING) {
@@ -346,6 +279,78 @@ void KBacktraceIterator_next(struct KBacktraceIterator *kbt)
 }
 EXPORT_SYMBOL(KBacktraceIterator_next);
 
+static void describe_addr(struct KBacktraceIterator *kbt,
+			  unsigned long address,
+			  int have_mmap_sem, char *buf, size_t bufsize)
+{
+	struct vm_area_struct *vma;
+	size_t namelen, remaining;
+	unsigned long size, offset, adjust;
+	char *p, *modname;
+	const char *name;
+	int rc;
+
+	/*
+	 * Look one byte back for every caller frame (i.e. those that
+	 * aren't a new context) so we look up symbol data for the
+	 * call itself, not the following instruction, which may be on
+	 * a different line (or in a different function).
+	 */
+	adjust = !kbt->new_context;
+	address -= adjust;
+
+	if (address >= PAGE_OFFSET) {
+		/* Handle kernel symbols. */
+		BUG_ON(bufsize < KSYM_NAME_LEN);
+		name = kallsyms_lookup(address, &size, &offset,
+				       &modname, buf);
+		if (name == NULL) {
+			buf[0] = '\0';
+			return;
+		}
+		namelen = strlen(buf);
+		remaining = (bufsize - 1) - namelen;
+		p = buf + namelen;
+		rc = snprintf(p, remaining, "+%#lx/%#lx ",
+			      offset + adjust, size);
+		if (modname && rc < remaining)
+			snprintf(p + rc, remaining - rc, "[%s] ", modname);
+		buf[bufsize-1] = '\0';
+		return;
+	}
+
+	/* If we don't have the mmap_sem, we can't show any more info. */
+	buf[0] = '\0';
+	if (!have_mmap_sem)
+		return;
+
+	/* Find vma info. */
+	vma = find_vma(kbt->task->mm, address);
+	if (vma == NULL || address < vma->vm_start) {
+		snprintf(buf, bufsize, "[unmapped address] ");
+		return;
+	}
+
+	if (vma->vm_file) {
+		char *s;
+		p = d_path(&vma->vm_file->f_path, buf, bufsize);
+		if (IS_ERR(p))
+			p = "?";
+		s = strrchr(p, '/');
+		if (s)
+			p = s+1;
+	} else {
+		p = "anon";
+	}
+
+	/* Generate a string description of the vma info. */
+	namelen = strlen(p);
+	remaining = (bufsize - 1) - namelen;
+	memmove(buf, p, namelen);
+	snprintf(buf + namelen, remaining, "[%lx+%lx] ",
+		 vma->vm_start, vma->vm_end - vma->vm_start);
+}
+
 /*
  * This method wraps the backtracer's more generic support.
  * It is only invoked from the architecture-specific code; show_stack()
@@ -354,6 +359,7 @@ EXPORT_SYMBOL(KBacktraceIterator_next);
 void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
 {
 	int i;
+	int have_mmap_sem = 0;
 
 	if (headers) {
 		/*
@@ -370,31 +376,16 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
 	kbt->verbose = 1;
 	i = 0;
 	for (; !KBacktraceIterator_end(kbt); KBacktraceIterator_next(kbt)) {
-		char *modname;
-		const char *name;
-		unsigned long address = kbt->it.pc;
-		unsigned long offset, size;
 		char namebuf[KSYM_NAME_LEN+100];
+		unsigned long address = kbt->it.pc;
 
-		if (address >= PAGE_OFFSET)
-			name = kallsyms_lookup(address, &size, &offset,
-					       &modname, namebuf);
-		else
-			name = NULL;
-
-		if (!name)
-			namebuf[0] = '\0';
-		else {
-			size_t namelen = strlen(namebuf);
-			size_t remaining = (sizeof(namebuf) - 1) - namelen;
-			char *p = namebuf + namelen;
-			int rc = snprintf(p, remaining, "+%#lx/%#lx ",
-					  offset, size);
-			if (modname && rc < remaining)
-				snprintf(p + rc, remaining - rc,
-					 "[%s] ", modname);
-			namebuf[sizeof(namebuf)-1] = '\0';
-		}
+		/* Try to acquire the mmap_sem as we pass into userspace. */
+		if (address < PAGE_OFFSET && !have_mmap_sem && kbt->task->mm)
+			have_mmap_sem =
+				down_read_trylock(&kbt->task->mm->mmap_sem);
+
+		describe_addr(kbt, address, have_mmap_sem,
+			      namebuf, sizeof(namebuf));
 
 		pr_err("  frame %d: 0x%lx %s(sp 0x%lx)\n",
 		       i++, address, namebuf, (unsigned long)(kbt->it.sp));
@@ -409,6 +400,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers)
 		pr_err("Stack dump stopped; next frame identical to this one\n");
 	if (headers)
 		pr_err("Stack dump complete\n");
+	if (have_mmap_sem)
+		up_read(&kbt->task->mm->mmap_sem);
 }
 EXPORT_SYMBOL(tile_show_stack);
 
-- 
cgit v1.2.3-58-ga151


From e17235382dbb05f70146e141e4b780fd069050dc Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 14:52:00 -0400
Subject: arch/tile: work around a hardware issue with the return-address stack

In certain circumstances we need to do a bunch of jump-and-link
instructions to fill the hardware return-address stack with nonzero values.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/traps.h |  6 +++++-
 arch/tile/kernel/intvec_64.S  | 12 ++++++++++++
 arch/tile/kernel/traps.c      |  6 +++++-
 3 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index 5f20f920f932..e28c3df4176a 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -64,7 +64,11 @@ void do_breakpoint(struct pt_regs *, int fault_num);
 
 
 #ifdef __tilegx__
+/* kernel/single_step.c */
 void gx_singlestep_handle(struct pt_regs *, int fault_num);
+
+/* kernel/intvec_64.S */
+void fill_ra_stack(void);
 #endif
 
-#endif /* _ASM_TILE_SYSCALLS_H */
+#endif /* _ASM_TILE_TRAPS_H */
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 79c93e10ba27..2c181c864ef7 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -1156,6 +1156,18 @@ int_unalign:
 	push_extra_callee_saves r0
 	j       do_trap
 
+/* Fill the return address stack with nonzero entries. */
+STD_ENTRY(fill_ra_stack)
+	{
+	 move	r0, lr
+	 jal	1f
+	}
+1:	jal	2f
+2:	jal	3f
+3:	jal	4f
+4:	jrp	r0
+	STD_ENDPROC(fill_ra_stack)
+
 /* Include .intrpt1 array of interrupt vectors */
 	.section ".intrpt1", "ax"
 
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 2bb6602a1ee7..32acfd9e23d0 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -289,7 +289,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 		address = regs->pc;
 		break;
 #ifdef __tilegx__
-	case INT_ILL_TRANS:
+	case INT_ILL_TRANS: {
+		/* Avoid a hardware erratum with the return address stack. */
+		fill_ra_stack();
+
 		signo = SIGSEGV;
 		code = SEGV_MAPERR;
 		if (reason & SPR_ILL_TRANS_REASON__I_STREAM_VA_RMASK)
@@ -297,6 +300,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 		else
 			address = 0;  /* FIXME: GX: single-step for address */
 		break;
+	}
 #endif
 	default:
 		panic("Unexpected do_trap interrupt number %d", fault_num);
-- 
cgit v1.2.3-58-ga151


From a714ffff36a581756ec3b001f47e8e5e96a9fa0e Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:23:54 -0400
Subject: arch/tile: fix up some minor trap handling issues

We now respond to MEM_ERROR traps (e.g. an atomic instruction to
non-cacheable memory) with a SIGBUS.

We also no longer generate a console crash message if a user
process die due to a SIGTRAP.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/intvec_64.S | 2 +-
 arch/tile/kernel/traps.c     | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 2c181c864ef7..005535d108c1 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -1178,7 +1178,7 @@ STD_ENTRY(fill_ra_stack)
 #define do_hardwall_trap bad_intr
 #endif
 
-	int_hand     INT_MEM_ERROR, MEM_ERROR, bad_intr
+	int_hand     INT_MEM_ERROR, MEM_ERROR, do_trap
 	int_hand     INT_SINGLE_STEP_3, SINGLE_STEP_3, bad_intr
 #if CONFIG_KERNEL_PL == 2
 	int_hand     INT_SINGLE_STEP_2, SINGLE_STEP_2, gx_singlestep_handle
diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c
index 32acfd9e23d0..73cff814ac57 100644
--- a/arch/tile/kernel/traps.c
+++ b/arch/tile/kernel/traps.c
@@ -200,7 +200,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 {
 	siginfo_t info = { 0 };
 	int signo, code;
-	unsigned long address;
+	unsigned long address = 0;
 	bundle_bits instr;
 
 	/* Re-enable interrupts. */
@@ -223,6 +223,10 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 	}
 
 	switch (fault_num) {
+	case INT_MEM_ERROR:
+		signo = SIGBUS;
+		code = BUS_OBJERR;
+		break;
 	case INT_ILL:
 		if (copy_from_user(&instr, (void __user *)regs->pc,
 				   sizeof(instr))) {
@@ -312,7 +316,8 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num,
 	info.si_addr = (void __user *)address;
 	if (signo == SIGILL)
 		info.si_trapno = fault_num;
-	trace_unhandled_signal("trap", regs, address, signo);
+	if (signo != SIGTRAP)
+		trace_unhandled_signal("trap", regs, address, signo);
 	force_sig_info(signo, &info, current);
 }
 
-- 
cgit v1.2.3-58-ga151


From 51bcdf8879f7920946c90087e6160680812a44bd Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:29:28 -0400
Subject: arch/tile: fix a couple of comments that needed updating

Not associated with any code changes, so I'm just lumping these
comment changes into a commit by themselves.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/setup.c | 11 +++++++++--
 arch/tile/mm/fault.c     |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 5f85d8b34dbb..023e2e1cf7f8 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -913,6 +913,13 @@ void __cpuinit setup_cpu(int boot)
 
 #ifdef CONFIG_BLK_DEV_INITRD
 
+/*
+ * Note that the kernel can potentially support other compression
+ * techniques than gz, though we don't do so by default.  If we ever
+ * decide to do so we can either look for other filename extensions,
+ * or just allow a file with this name to be compressed with an
+ * arbitrary compressor (somewhat counterintuitively).
+ */
 static int __initdata set_initramfs_file;
 static char __initdata initramfs_file[128] = "initramfs.cpio.gz";
 
@@ -928,9 +935,9 @@ static int __init setup_initramfs_file(char *str)
 early_param("initramfs_file", setup_initramfs_file);
 
 /*
- * We look for an additional "initramfs.cpio.gz" file in the hvfs.
+ * We look for an "initramfs.cpio.gz" file in the hvfs.
  * If there is one, we allocate some memory for it and it will be
- * unpacked to the initramfs after any built-in initramfs_data.
+ * unpacked to the initramfs.
  */
 static void __init load_hv_initrd(void)
 {
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index cba30e9547b4..cac17c4f2ecf 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -130,7 +130,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
 }
 
 /*
- * Handle a fault on the vmalloc or module mapping area
+ * Handle a fault on the vmalloc area.
  */
 static inline int vmalloc_fault(pgd_t *pgd, unsigned long address)
 {
-- 
cgit v1.2.3-58-ga151


From 6731aa9eae3e94b094a44d2aea02387a39202fbc Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:30:19 -0400
Subject: arch/tile/Makefile: use KCFLAGS when figuring out the libgcc path.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/Makefile b/arch/tile/Makefile
index 17acce70569b..5e4d3b98d711 100644
--- a/arch/tile/Makefile
+++ b/arch/tile/Makefile
@@ -30,7 +30,8 @@ ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"")
 KBUILD_CFLAGS   += $(CONFIG_DEBUG_EXTRA_FLAGS)
 endif
 
-LIBGCC_PATH     := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
+LIBGCC_PATH     := \
+  $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
 
 # Provide the path to use for "make defconfig".
 KBUILD_DEFCONFIG := $(ARCH)_defconfig
-- 
cgit v1.2.3-58-ga151


From 48292738d06d7b46d861652ef59bd03be931c2c9 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:34:52 -0400
Subject: arch/tile: don't wait for migrating PTEs in an NMI handler

Doing so raises the possibility of self-deadlock if we are waiting
for a backtrace for an oprofile or perf interrupt while we are
in the middle of migrating our own stack page.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/mm/fault.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index cac17c4f2ecf..a1da473c8555 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -203,9 +203,14 @@ static pgd_t *get_current_pgd(void)
  * interrupt or a critical region, and must do as little as possible.
  * Similarly, we can't use atomic ops here, since we may be handling a
  * fault caused by an atomic op access.
+ *
+ * If we find a migrating PTE while we're in an NMI context, and we're
+ * at a PC that has a registered exception handler, we don't wait,
+ * since this thread may (e.g.) have been interrupted while migrating
+ * its own stack, which would then cause us to self-deadlock.
  */
 static int handle_migrating_pte(pgd_t *pgd, int fault_num,
-				unsigned long address,
+				unsigned long address, unsigned long pc,
 				int is_kernel_mode, int write)
 {
 	pud_t *pud;
@@ -227,6 +232,8 @@ static int handle_migrating_pte(pgd_t *pgd, int fault_num,
 		pte_offset_kernel(pmd, address);
 	pteval = *pte;
 	if (pte_migrating(pteval)) {
+		if (in_nmi() && search_exception_tables(pc))
+			return 0;
 		wait_for_migration(pte);
 		return 1;
 	}
@@ -300,7 +307,7 @@ static int handle_page_fault(struct pt_regs *regs,
 	 * rather than trying to patch up the existing PTE.
 	 */
 	pgd = get_current_pgd();
-	if (handle_migrating_pte(pgd, fault_num, address,
+	if (handle_migrating_pte(pgd, fault_num, address, regs->pc,
 				 is_kernel_mode, write))
 		return 1;
 
@@ -665,7 +672,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
 	 */
 	if (fault_num == INT_DTLB_ACCESS)
 		write = 1;
-	if (handle_migrating_pte(pgd, fault_num, address, 1, write))
+	if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write))
 		return state;
 
 	/* Return zero so that we continue on with normal fault handling. */
-- 
cgit v1.2.3-58-ga151


From 12400f1f22ad7651efa1d3d06dd8b6b178d19ea3 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:36:53 -0400
Subject: arch/tile: don't set the homecache of a PTE unless appropriate

We make sure not to try to set the home for an MMIO PTE (on tilegx)
or a PTE that isn't referencing memory managed by Linux.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/mm/pgtable.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 87303693a072..6b9a109c3e31 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -469,10 +469,18 @@ void __set_pte(pte_t *ptep, pte_t pte)
 
 void set_pte(pte_t *ptep, pte_t pte)
 {
-	struct page *page = pfn_to_page(pte_pfn(pte));
-
-	/* Update the home of a PTE if necessary */
-	pte = pte_set_home(pte, page_home(page));
+	if (pte_present(pte) &&
+	    (!CHIP_HAS_MMIO() || hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) {
+		/* The PTE actually references physical memory. */
+		unsigned long pfn = pte_pfn(pte);
+		if (pfn_valid(pfn)) {
+			/* Update the home of the PTE from the struct page. */
+			pte = pte_set_home(pte, page_home(pfn_to_page(pfn)));
+		} else if (hv_pte_get_mode(pte) == 0) {
+			/* remap_pfn_range(), etc, must supply PTE mode. */
+			panic("set_pte(): out-of-range PFN and mode 0\n");
+		}
+	}
 
 	__set_pte(ptep, pte);
 }
-- 
cgit v1.2.3-58-ga151


From b230ff2d5c53bb79e1121554cd3c827e5c1b70fd Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:40:50 -0400
Subject: arch/tile: don't enable irqs unconditionally in page fault handler

If we took a page fault while we had interrupts disabled, we
shouldn't enable them in the page fault handler.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/mm/fault.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index a1da473c8555..22e58f51ed23 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -342,9 +342,12 @@ static int handle_page_fault(struct pt_regs *regs,
 	/*
 	 * If we're trying to touch user-space addresses, we must
 	 * be either at PL0, or else with interrupts enabled in the
-	 * kernel, so either way we can re-enable interrupts here.
+	 * kernel, so either way we can re-enable interrupts here
+	 * unless we are doing atomic access to user space with
+	 * interrupts disabled.
 	 */
-	local_irq_enable();
+	if (!(regs->flags & PT_FLAGS_DISABLE_IRQ))
+		local_irq_enable();
 
 	mm = tsk->mm;
 
-- 
cgit v1.2.3-58-ga151


From 7a7039ee71811222310b431aee246eb78dd0d401 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:42:27 -0400
Subject: arch/tile: fix bug in loading kernels larger than 16 MB

Previously we only handled kernels up to a single huge page in size.
Now we create additional PTEs appropriately.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/mm/init.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 830c4908ea76..8400d3fb9e0a 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -557,6 +557,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 
 	address = MEM_SV_INTRPT;
 	pmd = get_pmd(pgtables, address);
+	pfn = 0;  /* code starts at PA 0 */
 	if (ktext_small) {
 		/* Allocate an L2 PTE for the kernel text */
 		int cpu = 0;
@@ -579,10 +580,15 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 		}
 
 		BUG_ON(address != (unsigned long)_stext);
-		pfn = 0;  /* code starts at PA 0 */
-		pte = alloc_pte();
-		for (pte_ofs = 0; address < (unsigned long)_einittext;
-		     pfn++, pte_ofs++, address += PAGE_SIZE) {
+		pte = NULL;
+		for (; address < (unsigned long)_einittext;
+		     pfn++, address += PAGE_SIZE) {
+			pte_ofs = pte_index(address);
+			if (pte_ofs == 0) {
+				if (pte)
+					assign_pte(pmd++, pte);
+				pte = alloc_pte();
+			}
 			if (!ktext_local) {
 				prot = set_remote_cache_cpu(prot, cpu);
 				cpu = cpumask_next(cpu, &ktext_mask);
@@ -591,7 +597,8 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 			}
 			pte[pte_ofs] = pfn_pte(pfn, prot);
 		}
-		assign_pte(pmd, pte);
+		if (pte)
+			assign_pte(pmd, pte);
 	} else {
 		pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC);
 		pteval = pte_mkhuge(pteval);
@@ -614,7 +621,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 		else
 			pteval = hv_pte_set_mode(pteval,
 						 HV_PTE_MODE_CACHE_NO_L3);
-		*(pte_t *)pmd = pteval;
+		for (; address < (unsigned long)_einittext;
+		     pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE)
+			*(pte_t *)(pmd++) = pfn_pte(pfn, pteval);
 	}
 
 	/* Set swapper_pgprot here so it is flushed to memory right away. */
-- 
cgit v1.2.3-58-ga151


From 444eef1ba40546690a77b2af4cba7d4561e7bba5 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:43:20 -0400
Subject: arch/tile: fix bug in delay_backoff()

We were carefully computing a value to use for the number of loops
to spin for, and then ignoring it.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/lib/spinlock_common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/lib/spinlock_common.h b/arch/tile/lib/spinlock_common.h
index c10109809132..6ac37509faca 100644
--- a/arch/tile/lib/spinlock_common.h
+++ b/arch/tile/lib/spinlock_common.h
@@ -60,5 +60,5 @@ static void delay_backoff(int iterations)
 	loops += __insn_crc32_32(stack_pointer, get_cycles_low()) &
 		(loops - 1);
 
-	relax(1 << exponent);
+	relax(loops);
 }
-- 
cgit v1.2.3-58-ga151


From 5f220704127ae70db519fabbda4ece649eadac7f Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:44:10 -0400
Subject: arch/tile: don't leak kernel memory when we unload modules

We were failing to track the memory when we allocated it.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/module.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/tile/kernel/module.c b/arch/tile/kernel/module.c
index b90ab9925674..98d476920106 100644
--- a/arch/tile/kernel/module.c
+++ b/arch/tile/kernel/module.c
@@ -67,6 +67,8 @@ void *module_alloc(unsigned long size)
 	area = __get_vm_area(size, VM_ALLOC, MEM_MODULE_START, MEM_MODULE_END);
 	if (!area)
 		goto error;
+	area->nr_pages = npages;
+	area->pages = pages;
 
 	if (map_vm_area(area, prot_rwx, &pages)) {
 		vunmap(area->addr);
-- 
cgit v1.2.3-58-ga151


From 719ea79e330c5e1a17fb7e4cf352a81e4c84cff5 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:50:08 -0400
Subject: arch/tile: fix up locking in pgtable.c slightly

We should be holding the init_mm.page_table_lock in shatter_huge_page()
since we are modifying the kernel page tables.  Then, only if we are
walking the other root page tables to update them, do we want to take
the pgd_lock.

Add a comment about taking the pgd_lock that we always do it with
interrupts disabled and therefore are not at risk from the tlbflush
IPI deadlock as is seen on x86.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/mm/pgtable.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 6b9a109c3e31..2410aa899b3e 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -177,14 +177,10 @@ void shatter_huge_page(unsigned long addr)
 	if (!pmd_huge_page(*pmd))
 		return;
 
-	/*
-	 * Grab the pgd_lock, since we may need it to walk the pgd_list,
-	 * and since we need some kind of lock here to avoid races.
-	 */
-	spin_lock_irqsave(&pgd_lock, flags);
+	spin_lock_irqsave(&init_mm.page_table_lock, flags);
 	if (!pmd_huge_page(*pmd)) {
 		/* Lost the race to convert the huge page. */
-		spin_unlock_irqrestore(&pgd_lock, flags);
+		spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
 		return;
 	}
 
@@ -194,6 +190,7 @@ void shatter_huge_page(unsigned long addr)
 
 #ifdef __PAGETABLE_PMD_FOLDED
 	/* Walk every pgd on the system and update the pmd there. */
+	spin_lock(&pgd_lock);
 	list_for_each(pos, &pgd_list) {
 		pmd_t *copy_pmd;
 		pgd = list_to_pgd(pos) + pgd_index(addr);
@@ -201,6 +198,7 @@ void shatter_huge_page(unsigned long addr)
 		copy_pmd = pmd_offset(pud, addr);
 		__set_pmd(copy_pmd, *pmd);
 	}
+	spin_unlock(&pgd_lock);
 #endif
 
 	/* Tell every cpu to notice the change. */
@@ -208,7 +206,7 @@ void shatter_huge_page(unsigned long addr)
 		     cpu_possible_mask, NULL, 0);
 
 	/* Hold the lock until the TLB flush is finished to avoid races. */
-	spin_unlock_irqrestore(&pgd_lock, flags);
+	spin_unlock_irqrestore(&init_mm.page_table_lock, flags);
 }
 
 /*
@@ -217,9 +215,13 @@ void shatter_huge_page(unsigned long addr)
  * against pageattr.c; it is the unique case in which a valid change
  * of kernel pagetables can't be lazily synchronized by vmalloc faults.
  * vmalloc faults work because attached pagetables are never freed.
- * The locking scheme was chosen on the basis of manfred's
- * recommendations and having no core impact whatsoever.
- * -- wli
+ *
+ * The lock is always taken with interrupts disabled, unlike on x86
+ * and other platforms, because we need to take the lock in
+ * shatter_huge_page(), which may be called from an interrupt context.
+ * We are not at risk from the tlbflush IPI deadlock that was seen on
+ * x86, since we use the flush_remote() API to have the hypervisor do
+ * the TLB flushes regardless of irq disabling.
  */
 DEFINE_SPINLOCK(pgd_lock);
 LIST_HEAD(pgd_list);
-- 
cgit v1.2.3-58-ga151


From bfffe79bc29a9c4c817d5f51590961220e26db1a Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:56:18 -0400
Subject: arch/tile: use proper memparse() for "maxmem" options

This is more standard and avoids having to remember what units
the options actually take.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/setup.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 023e2e1cf7f8..f3598e7a47fa 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -103,13 +103,11 @@ unsigned long __initdata pci_reserve_end_pfn = -1U;
 
 static int __init setup_maxmem(char *str)
 {
-	long maxmem_mb;
-	if (str == NULL || strict_strtol(str, 0, &maxmem_mb) != 0 ||
-	    maxmem_mb == 0)
+	unsigned long long maxmem;
+	if (str == NULL || (maxmem = memparse(str, NULL)) == 0)
 		return -EINVAL;
 
-	maxmem_pfn = (maxmem_mb >> (HPAGE_SHIFT - 20)) <<
-		(HPAGE_SHIFT - PAGE_SHIFT);
+	maxmem_pfn = (maxmem >> HPAGE_SHIFT) << (HPAGE_SHIFT - PAGE_SHIFT);
 	pr_info("Forcing RAM used to no more than %dMB\n",
 	       maxmem_pfn >> (20 - PAGE_SHIFT));
 	return 0;
@@ -119,14 +117,15 @@ early_param("maxmem", setup_maxmem);
 static int __init setup_maxnodemem(char *str)
 {
 	char *endp;
-	long maxnodemem_mb, node;
+	unsigned long long maxnodemem;
+	long node;
 
 	node = str ? simple_strtoul(str, &endp, 0) : INT_MAX;
-	if (node >= MAX_NUMNODES || *endp != ':' ||
-	    strict_strtol(endp+1, 0, &maxnodemem_mb) != 0)
+	if (node >= MAX_NUMNODES || *endp != ':')
 		return -EINVAL;
 
-	maxnodemem_pfn[node] = (maxnodemem_mb >> (HPAGE_SHIFT - 20)) <<
+	maxnodemem = memparse(endp+1, NULL);
+	maxnodemem_pfn[node] = (maxnodemem >> HPAGE_SHIFT) <<
 		(HPAGE_SHIFT - PAGE_SHIFT);
 	pr_info("Forcing RAM used on node %ld to no more than %dMB\n",
 	       node, maxnodemem_pfn[node] >> (20 - PAGE_SHIFT));
-- 
cgit v1.2.3-58-ga151


From 8c92ba6c327ee5089dec1e92eaa82927bee63d6d Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:57:18 -0400
Subject: arch/tile: add "nop" after "nap" to help GX idle power draw

This avoids the hardware istream prefetcher doing unnecessary work.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/entry.S | 2 ++
 arch/tile/kernel/smp.c   | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index 431e9ae60488..ec91568df880 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -85,6 +85,7 @@ STD_ENTRY(cpu_idle_on_new_stack)
 /* Loop forever on a nap during SMP boot. */
 STD_ENTRY(smp_nap)
 	nap
+	nop       /* avoid provoking the icache prefetch with a jump */
 	j smp_nap /* we are not architecturally guaranteed not to exit nap */
 	jrp lr    /* clue in the backtracer */
 	STD_ENDPROC(smp_nap)
@@ -105,5 +106,6 @@ STD_ENTRY(_cpu_idle)
 	.global _cpu_idle_nap
 _cpu_idle_nap:
 	nap
+	nop       /* avoid provoking the icache prefetch with a jump */
 	jrp lr
 	STD_ENDPROC(_cpu_idle)
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index a44e103c5a63..7b6df8c27709 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -103,7 +103,7 @@ static void smp_stop_cpu_interrupt(void)
 	set_cpu_online(smp_processor_id(), 0);
 	arch_local_irq_disable_all();
 	for (;;)
-		asm("nap");
+		asm("nap; nop");
 }
 
 /* This function calls the 'stop' function on all other CPUs in the system. */
-- 
cgit v1.2.3-58-ga151


From cb210ee3a81afab7c64777635cc18899a2bdd9a5 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 15:59:11 -0400
Subject: arch/tile: implement panic_smp_self_stop()

This allows the later-panicking tiles to wait in a lower power state
until they get interrupted with an smp_send_stop().

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/smp.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index 7b6df8c27709..91da0f721958 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -113,6 +113,12 @@ void smp_send_stop(void)
 	send_IPI_allbutself(MSG_TAG_STOP_CPU);
 }
 
+/* On panic, just wait; we may get an smp_send_stop() later on. */
+void panic_smp_self_stop(void)
+{
+	while (1)
+		asm("nap; nop");
+}
 
 /*
  * Dispatch code called from hv_message_intr() for HV_MSG_TILE hv messages.
-- 
cgit v1.2.3-58-ga151


From 2858f856021340f3730fa8639dd520a2e4331f7f Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 16:11:09 -0400
Subject: arch/tile: fix single-stepping over swint1 instructions on tilegx

If we are single-stepping and make a syscall, we call ptrace_notify()
explicitly on the return path back to user space, since we are returning
to a pc value set artificially to the next instruction, and otherwise
we won't register that we stepped over the syscall instruction (swint1).

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/intvec_64.S | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 005535d108c1..fdff17c70cc8 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -22,6 +22,7 @@
 #include <asm/irqflags.h>
 #include <asm/asm-offsets.h>
 #include <asm/types.h>
+#include <asm/signal.h>
 #include <hv/hypervisor.h>
 #include <arch/abi.h>
 #include <arch/interrupts.h>
@@ -1039,11 +1040,25 @@ handle_syscall:
 
 	/* Do syscall trace again, if requested. */
 	ld	r30, r31
-	andi    r30, r30, _TIF_SYSCALL_TRACE
-	beqzt	r30, 1f
+	andi    r0, r30, _TIF_SYSCALL_TRACE
+	{
+	 andi    r0, r30, _TIF_SINGLESTEP
+	 beqzt   r0, 1f
+	}
 	jal	do_syscall_trace
 	FEEDBACK_REENTER(handle_syscall)
-1:	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+	andi    r0, r30, _TIF_SINGLESTEP
+
+1:	beqzt	r0, 2f
+
+	/* Single stepping -- notify ptrace. */
+	{
+	 movei   r0, SIGTRAP
+	 jal     ptrace_notify
+	}
+	FEEDBACK_REENTER(handle_syscall)
+
+2:	j       .Lresume_userspace   /* jump into middle of interrupt_return */
 
 .Lcompat_syscall:
 	/*
-- 
cgit v1.2.3-58-ga151


From 918cbd38aef83de3a2516299bcb230caf59462a0 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 16:14:40 -0400
Subject: arch/tile: fix pointer cast in cacheflush.c

Pragmatically it couldn't be wrong to cast pointers to long to compare
them (since all kernel addresses are in the top half of VA space),
but it's more correct to cast to unsigned long.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/lib/cacheflush.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
index 8928aace7a64..6af2b97a6886 100644
--- a/arch/tile/lib/cacheflush.c
+++ b/arch/tile/lib/cacheflush.c
@@ -109,7 +109,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
 
 	/* Figure out how far back we need to go. */
 	base = p - (step_size * (load_count - 2));
-	if ((long)base < (long)buffer)
+	if ((unsigned long)base < (unsigned long)buffer)
 		base = buffer;
 
 	/*
-- 
cgit v1.2.3-58-ga151


From e81510e0c3800dc730e0c544e3949f7bde368c2b Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Thu, 29 Mar 2012 16:19:45 -0400
Subject: arch/tile: export the page_home() function.

This avois a bug in modules trying to use the function.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/mm/homecache.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 1cc6ae477c98..499f73770b05 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -394,6 +394,7 @@ int page_home(struct page *page)
 		return pte_to_home(*virt_to_pte(NULL, kva));
 	}
 }
+EXPORT_SYMBOL(page_home);
 
 void homecache_change_page_home(struct page *page, int order, int home)
 {
-- 
cgit v1.2.3-58-ga151


From b14f21906774be181627412fed5b6b5fae2b53a2 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 30 Mar 2012 15:29:40 -0400
Subject: arch/tile: stop mentioning the "kvm" subdirectory

It causes "make clean" to fail, for example.  Once we have KVM support
complete, we'll reinstate the subdir reference.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/Makefile | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/Makefile b/arch/tile/Makefile
index 5e4d3b98d711..9520bc5a4b7f 100644
--- a/arch/tile/Makefile
+++ b/arch/tile/Makefile
@@ -54,8 +54,6 @@ libs-y		+= $(LIBGCC_PATH)
 # See arch/tile/Kbuild for content of core part of the kernel
 core-y		+= arch/tile/
 
-core-$(CONFIG_KVM) += arch/tile/kvm/
-
 ifdef TILERA_ROOT
 INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot
 endif
-- 
cgit v1.2.3-58-ga151


From ab306cae660e524edbeb8889e4e23d3c97717b9c Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 30 Mar 2012 15:46:29 -0400
Subject: arch/tile: use atomic exchange in arch_write_unlock()

This idiom is used elsewhere when we do an unlock by writing a zero,
but I missed it here.  Using an atomic operation avoids waiting
on the write buffer for the unlocking write to be sent to the home cache.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/include/asm/spinlock_64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
index 72be5904e020..5f8b6a095fd8 100644
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -137,7 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
 static inline void arch_write_unlock(arch_rwlock_t *rw)
 {
 	__insn_mf();
-	rw->lock = 0;
+	__insn_exch4(&rw->lock, 0);  /* Avoid waiting in the write buffer. */
 }
 
 static inline int arch_read_trylock(arch_rwlock_t *rw)
-- 
cgit v1.2.3-58-ga151


From 54229ff359250ce7292dbeb59f157a2d3b67e30c Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 30 Mar 2012 15:47:38 -0400
Subject: arch/tile: fix finv_buffer_remote() for tilegx

There were some correctness issues with this code that are now fixed
with this change.  The change is likely less performant than it could
be, but it should no longer be vulnerable to any races with memory
operations on the memory network while invalidating a range of memory.
This code is run infrequently so performance isn't critical, but
correctness definitely is.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/lib/cacheflush.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
index 6af2b97a6886..db4fb89e12d8 100644
--- a/arch/tile/lib/cacheflush.c
+++ b/arch/tile/lib/cacheflush.c
@@ -39,7 +39,21 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
 {
 	char *p, *base;
 	size_t step_size, load_count;
+
+	/*
+	 * On TILEPro the striping granularity is a fixed 8KB; on
+	 * TILE-Gx it is configurable, and we rely on the fact that
+	 * the hypervisor always configures maximum striping, so that
+	 * bits 9 and 10 of the PA are part of the stripe function, so
+	 * every 512 bytes we hit a striping boundary.
+	 *
+	 */
+#ifdef __tilegx__
+	const unsigned long STRIPE_WIDTH = 512;
+#else
 	const unsigned long STRIPE_WIDTH = 8192;
+#endif
+
 #ifdef __tilegx__
 	/*
 	 * On TILE-Gx, we must disable the dstream prefetcher before doing
@@ -74,7 +88,7 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
 	 * memory, that one load would be sufficient, but since we may
 	 * be, we also need to back up to the last load issued to
 	 * another memory controller, which would be the point where
-	 * we crossed an 8KB boundary (the granularity of striping
+	 * we crossed a "striping" boundary (the granularity of striping
 	 * across memory controllers).  Keep backing up and doing this
 	 * until we are before the beginning of the buffer, or have
 	 * hit all the controllers.
@@ -88,12 +102,22 @@ void finv_buffer_remote(void *buffer, size_t size, int hfh)
 	 * every cache line on a full memory stripe on each
 	 * controller" that we simply do that, to simplify the logic.
 	 *
-	 * FIXME: See bug 9535 for some issues with this code.
+	 * On TILE-Gx the hash-for-home function is much more complex,
+	 * with the upshot being we can't readily guarantee we have
+	 * hit both entries in the 128-entry AMT that were hit by any
+	 * load in the entire range, so we just re-load them all.
+	 * With larger buffers, we may want to consider using a hypervisor
+	 * trap to issue loads directly to each hash-for-home tile for
+	 * each controller (doing it from Linux would trash the TLB).
 	 */
 	if (hfh) {
 		step_size = L2_CACHE_BYTES;
+#ifdef __tilegx__
+		load_count = (size + L2_CACHE_BYTES - 1) / L2_CACHE_BYTES;
+#else
 		load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) *
 			      (1 << CHIP_LOG_NUM_MSHIMS());
+#endif
 	} else {
 		step_size = STRIPE_WIDTH;
 		load_count = (1 << CHIP_LOG_NUM_MSHIMS());
-- 
cgit v1.2.3-58-ga151


From cdd8e16feba87a3fc2bb1885d36f895a2a3288bf Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 30 Mar 2012 16:24:41 -0400
Subject: arch/tile: return SIGBUS for addresses that are unaligned AND invalid

Previously we were returning SIGSEGV in this case.  It seems cleaner
to return SIGBUS since the hardware figures out alignment traps
before TLB violations, so SIGBUS is the "more correct" signal.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/single_step.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index bc1eb586e24d..9efbc1391b3c 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -153,6 +153,25 @@ static tile_bundle_bits rewrite_load_store_unaligned(
 	if (((unsigned long)addr % size) == 0)
 		return bundle;
 
+	/*
+	 * Return SIGBUS with the unaligned address, if requested.
+	 * Note that we return SIGBUS even for completely invalid addresses
+	 * as long as they are in fact unaligned; this matches what the
+	 * tilepro hardware would be doing, if it could provide us with the
+	 * actual bad address in an SPR, which it doesn't.
+	 */
+	if (unaligned_fixup == 0) {
+		siginfo_t info = {
+			.si_signo = SIGBUS,
+			.si_code = BUS_ADRALN,
+			.si_addr = addr
+		};
+		trace_unhandled_signal("unaligned trap", regs,
+				       (unsigned long)addr, SIGBUS);
+		force_sig_info(info.si_signo, &info, current);
+		return (tilepro_bundle_bits) 0;
+	}
+
 #ifndef __LITTLE_ENDIAN
 # error We assume little-endian representation with copy_xx_user size 2 here
 #endif
@@ -192,18 +211,6 @@ static tile_bundle_bits rewrite_load_store_unaligned(
 		return (tile_bundle_bits) 0;
 	}
 
-	if (unaligned_fixup == 0) {
-		siginfo_t info = {
-			.si_signo = SIGBUS,
-			.si_code = BUS_ADRALN,
-			.si_addr = addr
-		};
-		trace_unhandled_signal("unaligned trap", regs,
-				       (unsigned long)addr, SIGBUS);
-		force_sig_info(info.si_signo, &info, current);
-		return (tile_bundle_bits) 0;
-	}
-
 	if (unaligned_printk || unaligned_fixup_count == 0) {
 		pr_info("Process %d/%s: PC %#lx: Fixup of"
 			" unaligned %s at %#lx.\n",
-- 
cgit v1.2.3-58-ga151


From b1760c847ff9d04fba7cdbef005a0ad805311c6d Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 30 Mar 2012 16:27:20 -0400
Subject: arch/tile: remove bogus performance optimization

We were re-homing the initial task's kernel stack on the boot cpu,
but in fact it's better to let it stay globally homed, since that
task isn't bound to the boot cpu anyway.  This is more of a general
cleanup than an actual performance optimization, but it removes
code, which is a good thing. :-)

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/mm/init.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 8400d3fb9e0a..6a9d20ddc34f 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -254,11 +254,6 @@ static pgprot_t __init init_pgprot(ulong address)
 		return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE);
 	}
 
-	/* As a performance optimization, keep the boot init stack here. */
-	if (address >= (ulong)&init_thread_union &&
-	    address < (ulong)&init_thread_union + THREAD_SIZE)
-		return construct_pgprot(PAGE_KERNEL, smp_processor_id());
-
 #ifndef __tilegx__
 #if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
 	/* Force the atomic_locks[] array page to be hash-for-home. */
-- 
cgit v1.2.3-58-ga151


From e1d5c0195075abaa45cd04ca397dbeaa0d18c490 Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Fri, 30 Mar 2012 16:29:06 -0400
Subject: arch/tile: avoid accidentally unmasking NMI-type interrupt
 accidentally

The return path as we reload registers and core state requires that r30
hold a boolean indicating whether we are returning from an NMI, but in a
couple of cases we weren't setting this properly, with the result that we
could accidentally unmask the NMI interrupt(s), which could cause confusion.
Now we set r30 in every place where we jump into the interrupt return path.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
---
 arch/tile/kernel/intvec_32.S | 24 ++++++++++++++++++++----
 arch/tile/kernel/intvec_64.S | 19 ++++++++++++++++---
 2 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index aecc8ed5f39b..5d56a1ef5ba5 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -799,6 +799,10 @@ handle_interrupt:
  * This routine takes a boolean in r30 indicating if this is an NMI.
  * If so, we also expect a boolean in r31 indicating whether to
  * re-enable the oprofile interrupts.
+ *
+ * Note that .Lresume_userspace is jumped to directly in several
+ * places, and we need to make sure r30 is set correctly in those
+ * callers as well.
  */
 STD_ENTRY(interrupt_return)
 	/* If we're resuming to kernel space, don't check thread flags. */
@@ -1237,7 +1241,10 @@ handle_syscall:
 	bzt     r30, 1f
 	jal	do_syscall_trace
 	FEEDBACK_REENTER(handle_syscall)
-1:	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+1:	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
 
 .Linvalid_syscall:
 	/* Report an invalid syscall back to the user program */
@@ -1246,7 +1253,10 @@ handle_syscall:
 	 movei  r28, -ENOSYS
 	}
 	sw      r29, r28
-	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
 	STD_ENDPROC(handle_syscall)
 
 	/* Return the address for oprofile to suppress in backtraces. */
@@ -1262,7 +1272,10 @@ STD_ENTRY(ret_from_fork)
 	jal     sim_notify_fork
 	jal     schedule_tail
 	FEEDBACK_REENTER(ret_from_fork)
-	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
 	STD_ENDPROC(ret_from_fork)
 
 	/*
@@ -1376,7 +1389,10 @@ handle_ill:
 
 	jal     send_sigtrap    /* issue a SIGTRAP */
 	FEEDBACK_REENTER(handle_ill)
-	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
 
 .Ldispatch_normal_ill:
 	{
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index fdff17c70cc8..49d9d6621682 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -606,6 +606,10 @@ handle_interrupt:
  * This routine takes a boolean in r30 indicating if this is an NMI.
  * If so, we also expect a boolean in r31 indicating whether to
  * re-enable the oprofile interrupts.
+ *
+ * Note that .Lresume_userspace is jumped to directly in several
+ * places, and we need to make sure r30 is set correctly in those
+ * callers as well.
  */
 STD_ENTRY(interrupt_return)
 	/* If we're resuming to kernel space, don't check thread flags. */
@@ -1058,7 +1062,10 @@ handle_syscall:
 	}
 	FEEDBACK_REENTER(handle_syscall)
 
-2:	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+2:	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
 
 .Lcompat_syscall:
 	/*
@@ -1092,7 +1099,10 @@ handle_syscall:
 	 movei  r28, -ENOSYS
 	}
 	st      r29, r28
-	j       .Lresume_userspace   /* jump into middle of interrupt_return */
+	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
 	STD_ENDPROC(handle_syscall)
 
 	/* Return the address for oprofile to suppress in backtraces. */
@@ -1108,7 +1118,10 @@ STD_ENTRY(ret_from_fork)
 	jal     sim_notify_fork
 	jal     schedule_tail
 	FEEDBACK_REENTER(ret_from_fork)
-	j       .Lresume_userspace
+	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
 	STD_ENDPROC(ret_from_fork)
 
 /* Various stub interrupt handlers and syscall handlers */
-- 
cgit v1.2.3-58-ga151