From efa773fe913ff188c554ccaac577d0a1bb1dc2f4 Mon Sep 17 00:00:00 2001
From: Alim Akhtar <alim.akhtar@samsung.com>
Date: Mon, 24 Aug 2015 18:01:15 +0530
Subject: arm64: defconfig: Enable samsung serial and mmc

This patch update defconfig, adds samsung serial and
Synopsys Designware MMC configs related to exynos SoC

Signed-off-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/configs/defconfig | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 34d71dd86781..c8ccda16e079 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -109,6 +109,10 @@ CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_8250_MT6577=y
 CONFIG_SERIAL_AMBA_PL011=y
 CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_SAMSUNG=y
+CONFIG_SERIAL_SAMSUNG_UARTS_4=y
+CONFIG_SERIAL_SAMSUNG_UARTS=4
+CONFIG_SERIAL_SAMSUNG_CONSOLE=y
 CONFIG_SERIAL_MSM=y
 CONFIG_SERIAL_MSM_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
@@ -145,6 +149,10 @@ CONFIG_MMC_ARMMMCI=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_MMC_SPI=y
+CONFIG_MMC_DW=y
+CONFIG_MMC_DW_IDMAC=y
+CONFIG_MMC_DW_PLTFM=y
+CONFIG_MMC_DW_EXYNOS=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_SYSCON=y
-- 
cgit v1.2.3-58-ga151


From e5c88e3f2fb35dca5f3e46d65095bf5d008595b7 Mon Sep 17 00:00:00 2001
From: Feng Kan <fkan@apm.com>
Date: Wed, 23 Sep 2015 11:55:38 -0700
Subject: arm64: Change memcpy in kernel to use the copy template file

This converts the memcpy.S to use the copy template file. The copy
template file was based originally on the memcpy.S

Signed-off-by: Feng Kan <fkan@apm.com>
Signed-off-by: Balamurugan Shanmugam <bshanmugam@apm.com>
[catalin.marinas@arm.com: removed tmp3(w) .req statements as they are not used]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/lib/copy_template.S | 193 +++++++++++++++++++++++++++++++++++++++++
 arch/arm64/lib/memcpy.S        | 179 ++++++--------------------------------
 2 files changed, 219 insertions(+), 153 deletions(-)
 create mode 100644 arch/arm64/lib/copy_template.S

(limited to 'arch')

diff --git a/arch/arm64/lib/copy_template.S b/arch/arm64/lib/copy_template.S
new file mode 100644
index 000000000000..410fbdb8163f
--- /dev/null
+++ b/arch/arm64/lib/copy_template.S
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
+ *
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
+ *
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/*
+ * Copy a buffer from src to dest (alignment handled by the hardware)
+ *
+ * Parameters:
+ *	x0 - dest
+ *	x1 - src
+ *	x2 - n
+ * Returns:
+ *	x0 - dest
+ */
+dstin	.req	x0
+src	.req	x1
+count	.req	x2
+tmp1	.req	x3
+tmp1w	.req	w3
+tmp2	.req	x4
+tmp2w	.req	w4
+dst	.req	x6
+
+A_l	.req	x7
+A_h	.req	x8
+B_l	.req	x9
+B_h	.req	x10
+C_l	.req	x11
+C_h	.req	x12
+D_l	.req	x13
+D_h	.req	x14
+
+	mov	dst, dstin
+	cmp	count, #16
+	/*When memory length is less than 16, the accessed are not aligned.*/
+	b.lo	.Ltiny15
+
+	neg	tmp2, src
+	ands	tmp2, tmp2, #15/* Bytes to reach alignment. */
+	b.eq	.LSrcAligned
+	sub	count, count, tmp2
+	/*
+	* Copy the leading memory data from src to dst in an increasing
+	* address order.By this way,the risk of overwritting the source
+	* memory data is eliminated when the distance between src and
+	* dst is less than 16. The memory accesses here are alignment.
+	*/
+	tbz	tmp2, #0, 1f
+	ldrb1	tmp1w, src, #1
+	strb1	tmp1w, dst, #1
+1:
+	tbz	tmp2, #1, 2f
+	ldrh1	tmp1w, src, #2
+	strh1	tmp1w, dst, #2
+2:
+	tbz	tmp2, #2, 3f
+	ldr1	tmp1w, src, #4
+	str1	tmp1w, dst, #4
+3:
+	tbz	tmp2, #3, .LSrcAligned
+	ldr1	tmp1, src, #8
+	str1	tmp1, dst, #8
+
+.LSrcAligned:
+	cmp	count, #64
+	b.ge	.Lcpy_over64
+	/*
+	* Deal with small copies quickly by dropping straight into the
+	* exit block.
+	*/
+.Ltail63:
+	/*
+	* Copy up to 48 bytes of data. At this point we only need the
+	* bottom 6 bits of count to be accurate.
+	*/
+	ands	tmp1, count, #0x30
+	b.eq	.Ltiny15
+	cmp	tmp1w, #0x20
+	b.eq	1f
+	b.lt	2f
+	ldp1	A_l, A_h, src, #16
+	stp1	A_l, A_h, dst, #16
+1:
+	ldp1	A_l, A_h, src, #16
+	stp1	A_l, A_h, dst, #16
+2:
+	ldp1	A_l, A_h, src, #16
+	stp1	A_l, A_h, dst, #16
+.Ltiny15:
+	/*
+	* Prefer to break one ldp/stp into several load/store to access
+	* memory in an increasing address order,rather than to load/store 16
+	* bytes from (src-16) to (dst-16) and to backward the src to aligned
+	* address,which way is used in original cortex memcpy. If keeping
+	* the original memcpy process here, memmove need to satisfy the
+	* precondition that src address is at least 16 bytes bigger than dst
+	* address,otherwise some source data will be overwritten when memove
+	* call memcpy directly. To make memmove simpler and decouple the
+	* memcpy's dependency on memmove, withdrew the original process.
+	*/
+	tbz	count, #3, 1f
+	ldr1	tmp1, src, #8
+	str1	tmp1, dst, #8
+1:
+	tbz	count, #2, 2f
+	ldr1	tmp1w, src, #4
+	str1	tmp1w, dst, #4
+2:
+	tbz	count, #1, 3f
+	ldrh1	tmp1w, src, #2
+	strh1	tmp1w, dst, #2
+3:
+	tbz	count, #0, .Lexitfunc
+	ldrb1	tmp1w, src, #1
+	strb1	tmp1w, dst, #1
+
+	b	.Lexitfunc
+
+.Lcpy_over64:
+	subs	count, count, #128
+	b.ge	.Lcpy_body_large
+	/*
+	* Less than 128 bytes to copy, so handle 64 here and then jump
+	* to the tail.
+	*/
+	ldp1	A_l, A_h, src, #16
+	stp1	A_l, A_h, dst, #16
+	ldp1	B_l, B_h, src, #16
+	ldp1	C_l, C_h, src, #16
+	stp1	B_l, B_h, dst, #16
+	stp1	C_l, C_h, dst, #16
+	ldp1	D_l, D_h, src, #16
+	stp1	D_l, D_h, dst, #16
+
+	tst	count, #0x3f
+	b.ne	.Ltail63
+	b	.Lexitfunc
+
+	/*
+	* Critical loop.  Start at a new cache line boundary.  Assuming
+	* 64 bytes per line this ensures the entire loop is in one line.
+	*/
+	.p2align	L1_CACHE_SHIFT
+.Lcpy_body_large:
+	/* pre-get 64 bytes data. */
+	ldp1	A_l, A_h, src, #16
+	ldp1	B_l, B_h, src, #16
+	ldp1	C_l, C_h, src, #16
+	ldp1	D_l, D_h, src, #16
+1:
+	/*
+	* interlace the load of next 64 bytes data block with store of the last
+	* loaded 64 bytes data.
+	*/
+	stp1	A_l, A_h, dst, #16
+	ldp1	A_l, A_h, src, #16
+	stp1	B_l, B_h, dst, #16
+	ldp1	B_l, B_h, src, #16
+	stp1	C_l, C_h, dst, #16
+	ldp1	C_l, C_h, src, #16
+	stp1	D_l, D_h, dst, #16
+	ldp1	D_l, D_h, src, #16
+	subs	count, count, #64
+	b.ge	1b
+	stp1	A_l, A_h, dst, #16
+	stp1	B_l, B_h, dst, #16
+	stp1	C_l, C_h, dst, #16
+	stp1	D_l, D_h, dst, #16
+
+	tst	count, #0x3f
+	b.ne	.Ltail63
+.Lexitfunc:
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 8a9a96d3ddae..173a1aace9bb 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -36,166 +36,39 @@
  * Returns:
  *	x0 - dest
  */
-dstin	.req	x0
-src	.req	x1
-count	.req	x2
-tmp1	.req	x3
-tmp1w	.req	w3
-tmp2	.req	x4
-tmp2w	.req	w4
-tmp3	.req	x5
-tmp3w	.req	w5
-dst	.req	x6
+	.macro ldrb1 ptr, regB, val
+	ldrb  \ptr, [\regB], \val
+	.endm
 
-A_l	.req	x7
-A_h	.req	x8
-B_l	.req	x9
-B_h	.req	x10
-C_l	.req	x11
-C_h	.req	x12
-D_l	.req	x13
-D_h	.req	x14
+	.macro strb1 ptr, regB, val
+	strb \ptr, [\regB], \val
+	.endm
 
-ENTRY(memcpy)
-	mov	dst, dstin
-	cmp	count, #16
-	/*When memory length is less than 16, the accessed are not aligned.*/
-	b.lo	.Ltiny15
+	.macro ldrh1 ptr, regB, val
+	ldrh  \ptr, [\regB], \val
+	.endm
 
-	neg	tmp2, src
-	ands	tmp2, tmp2, #15/* Bytes to reach alignment. */
-	b.eq	.LSrcAligned
-	sub	count, count, tmp2
-	/*
-	* Copy the leading memory data from src to dst in an increasing
-	* address order.By this way,the risk of overwritting the source
-	* memory data is eliminated when the distance between src and
-	* dst is less than 16. The memory accesses here are alignment.
-	*/
-	tbz	tmp2, #0, 1f
-	ldrb	tmp1w, [src], #1
-	strb	tmp1w, [dst], #1
-1:
-	tbz	tmp2, #1, 2f
-	ldrh	tmp1w, [src], #2
-	strh	tmp1w, [dst], #2
-2:
-	tbz	tmp2, #2, 3f
-	ldr	tmp1w, [src], #4
-	str	tmp1w, [dst], #4
-3:
-	tbz	tmp2, #3, .LSrcAligned
-	ldr	tmp1, [src],#8
-	str	tmp1, [dst],#8
+	.macro strh1 ptr, regB, val
+	strh \ptr, [\regB], \val
+	.endm
 
-.LSrcAligned:
-	cmp	count, #64
-	b.ge	.Lcpy_over64
-	/*
-	* Deal with small copies quickly by dropping straight into the
-	* exit block.
-	*/
-.Ltail63:
-	/*
-	* Copy up to 48 bytes of data. At this point we only need the
-	* bottom 6 bits of count to be accurate.
-	*/
-	ands	tmp1, count, #0x30
-	b.eq	.Ltiny15
-	cmp	tmp1w, #0x20
-	b.eq	1f
-	b.lt	2f
-	ldp	A_l, A_h, [src], #16
-	stp	A_l, A_h, [dst], #16
-1:
-	ldp	A_l, A_h, [src], #16
-	stp	A_l, A_h, [dst], #16
-2:
-	ldp	A_l, A_h, [src], #16
-	stp	A_l, A_h, [dst], #16
-.Ltiny15:
-	/*
-	* Prefer to break one ldp/stp into several load/store to access
-	* memory in an increasing address order,rather than to load/store 16
-	* bytes from (src-16) to (dst-16) and to backward the src to aligned
-	* address,which way is used in original cortex memcpy. If keeping
-	* the original memcpy process here, memmove need to satisfy the
-	* precondition that src address is at least 16 bytes bigger than dst
-	* address,otherwise some source data will be overwritten when memove
-	* call memcpy directly. To make memmove simpler and decouple the
-	* memcpy's dependency on memmove, withdrew the original process.
-	*/
-	tbz	count, #3, 1f
-	ldr	tmp1, [src], #8
-	str	tmp1, [dst], #8
-1:
-	tbz	count, #2, 2f
-	ldr	tmp1w, [src], #4
-	str	tmp1w, [dst], #4
-2:
-	tbz	count, #1, 3f
-	ldrh	tmp1w, [src], #2
-	strh	tmp1w, [dst], #2
-3:
-	tbz	count, #0, .Lexitfunc
-	ldrb	tmp1w, [src]
-	strb	tmp1w, [dst]
+	.macro ldr1 ptr, regB, val
+	ldr \ptr, [\regB], \val
+	.endm
 
-.Lexitfunc:
-	ret
+	.macro str1 ptr, regB, val
+	str \ptr, [\regB], \val
+	.endm
 
-.Lcpy_over64:
-	subs	count, count, #128
-	b.ge	.Lcpy_body_large
-	/*
-	* Less than 128 bytes to copy, so handle 64 here and then jump
-	* to the tail.
-	*/
-	ldp	A_l, A_h, [src],#16
-	stp	A_l, A_h, [dst],#16
-	ldp	B_l, B_h, [src],#16
-	ldp	C_l, C_h, [src],#16
-	stp	B_l, B_h, [dst],#16
-	stp	C_l, C_h, [dst],#16
-	ldp	D_l, D_h, [src],#16
-	stp	D_l, D_h, [dst],#16
+	.macro ldp1 ptr, regB, regC, val
+	ldp \ptr, \regB, [\regC], \val
+	.endm
 
-	tst	count, #0x3f
-	b.ne	.Ltail63
-	ret
+	.macro stp1 ptr, regB, regC, val
+	stp \ptr, \regB, [\regC], \val
+	.endm
 
-	/*
-	* Critical loop.  Start at a new cache line boundary.  Assuming
-	* 64 bytes per line this ensures the entire loop is in one line.
-	*/
-	.p2align	L1_CACHE_SHIFT
-.Lcpy_body_large:
-	/* pre-get 64 bytes data. */
-	ldp	A_l, A_h, [src],#16
-	ldp	B_l, B_h, [src],#16
-	ldp	C_l, C_h, [src],#16
-	ldp	D_l, D_h, [src],#16
-1:
-	/*
-	* interlace the load of next 64 bytes data block with store of the last
-	* loaded 64 bytes data.
-	*/
-	stp	A_l, A_h, [dst],#16
-	ldp	A_l, A_h, [src],#16
-	stp	B_l, B_h, [dst],#16
-	ldp	B_l, B_h, [src],#16
-	stp	C_l, C_h, [dst],#16
-	ldp	C_l, C_h, [src],#16
-	stp	D_l, D_h, [dst],#16
-	ldp	D_l, D_h, [src],#16
-	subs	count, count, #64
-	b.ge	1b
-	stp	A_l, A_h, [dst],#16
-	stp	B_l, B_h, [dst],#16
-	stp	C_l, C_h, [dst],#16
-	stp	D_l, D_h, [dst],#16
-
-	tst	count, #0x3f
-	b.ne	.Ltail63
+ENTRY(memcpy)
+#include "copy_template.S"
 	ret
 ENDPROC(memcpy)
-- 
cgit v1.2.3-58-ga151


From 404268828c74ba06b1f21584b26edafb381c9d7d Mon Sep 17 00:00:00 2001
From: Feng Kan <fkan@apm.com>
Date: Wed, 23 Sep 2015 11:55:39 -0700
Subject: arm64: copy_to-from-in_user optimization using copy template

This patch optimize copy_to-from-in_user for arm 64bit architecture. The
copy template is used as template file for all the copy*.S files. Minor
change was made to it to accommodate the copy to/from/in user files.

Signed-off-by: Feng Kan <fkan@apm.com>
Signed-off-by: Balamurugan Shanmugam <bshanmugam@apm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/lib/copy_from_user.S | 78 +++++++++++++++++++++++------------------
 arch/arm64/lib/copy_in_user.S   | 67 ++++++++++++++++++++---------------
 arch/arm64/lib/copy_to_user.S   | 67 ++++++++++++++++++++---------------
 3 files changed, 120 insertions(+), 92 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 1be9ef27be97..4699cd74f87e 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -18,6 +18,7 @@
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cpufeature.h>
 #include <asm/sysreg.h>
 
@@ -31,49 +32,58 @@
  * Returns:
  *	x0 - bytes not copied
  */
+
+	.macro ldrb1 ptr, regB, val
+	USER(9998f, ldrb  \ptr, [\regB], \val)
+	.endm
+
+	.macro strb1 ptr, regB, val
+	strb \ptr, [\regB], \val
+	.endm
+
+	.macro ldrh1 ptr, regB, val
+	USER(9998f, ldrh  \ptr, [\regB], \val)
+	.endm
+
+	.macro strh1 ptr, regB, val
+	strh \ptr, [\regB], \val
+	.endm
+
+	.macro ldr1 ptr, regB, val
+	USER(9998f, ldr \ptr, [\regB], \val)
+	.endm
+
+	.macro str1 ptr, regB, val
+	str \ptr, [\regB], \val
+	.endm
+
+	.macro ldp1 ptr, regB, regC, val
+	USER(9998f, ldp \ptr, \regB, [\regC], \val)
+	.endm
+
+	.macro stp1 ptr, regB, regC, val
+	stp \ptr, \regB, [\regC], \val
+	.endm
+
+end	.req	x5
 ENTRY(__copy_from_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
-	add	x5, x1, x2			// upper user buffer boundary
-	subs	x2, x2, #16
-	b.mi	1f
-0:
-USER(9f, ldp	x3, x4, [x1], #16)
-	subs	x2, x2, #16
-	stp	x3, x4, [x0], #16
-	b.pl	0b
-1:	adds	x2, x2, #8
-	b.mi	2f
-USER(9f, ldr	x3, [x1], #8	)
-	sub	x2, x2, #8
-	str	x3, [x0], #8
-2:	adds	x2, x2, #4
-	b.mi	3f
-USER(9f, ldr	w3, [x1], #4	)
-	sub	x2, x2, #4
-	str	w3, [x0], #4
-3:	adds	x2, x2, #2
-	b.mi	4f
-USER(9f, ldrh	w3, [x1], #2	)
-	sub	x2, x2, #2
-	strh	w3, [x0], #2
-4:	adds	x2, x2, #1
-	b.mi	5f
-USER(9f, ldrb	w3, [x1]	)
-	strb	w3, [x0]
-5:	mov	x0, #0
+	add	end, x0, x2
+#include "copy_template.S"
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
+	mov	x0, #0				// Nothing to copy
 	ret
 ENDPROC(__copy_from_user)
 
 	.section .fixup,"ax"
 	.align	2
-9:	sub	x2, x5, x1
-	mov	x3, x2
-10:	strb	wzr, [x0], #1			// zero remaining buffer space
-	subs	x3, x3, #1
-	b.ne	10b
-	mov	x0, x2				// bytes not copied
+9998:
+	sub	x0, end, dst
+9999:
+	strb	wzr, [dst], #1			// zero remaining buffer space
+	cmp	dst, end
+	b.lo	9999b
 	ret
 	.previous
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 1b94661e22b3..81c8fc93c100 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -20,6 +20,7 @@
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cpufeature.h>
 #include <asm/sysreg.h>
 
@@ -33,44 +34,52 @@
  * Returns:
  *	x0 - bytes not copied
  */
+	.macro ldrb1 ptr, regB, val
+	USER(9998f, ldrb  \ptr, [\regB], \val)
+	.endm
+
+	.macro strb1 ptr, regB, val
+	USER(9998f, strb \ptr, [\regB], \val)
+	.endm
+
+	.macro ldrh1 ptr, regB, val
+	USER(9998f, ldrh  \ptr, [\regB], \val)
+	.endm
+
+	.macro strh1 ptr, regB, val
+	USER(9998f, strh \ptr, [\regB], \val)
+	.endm
+
+	.macro ldr1 ptr, regB, val
+	USER(9998f, ldr \ptr, [\regB], \val)
+	.endm
+
+	.macro str1 ptr, regB, val
+	USER(9998f, str \ptr, [\regB], \val)
+	.endm
+
+	.macro ldp1 ptr, regB, regC, val
+	USER(9998f, ldp \ptr, \regB, [\regC], \val)
+	.endm
+
+	.macro stp1 ptr, regB, regC, val
+	USER(9998f, stp \ptr, \regB, [\regC], \val)
+	.endm
+
+end	.req	x5
 ENTRY(__copy_in_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
-	add	x5, x0, x2			// upper user buffer boundary
-	subs	x2, x2, #16
-	b.mi	1f
-0:
-USER(9f, ldp	x3, x4, [x1], #16)
-	subs	x2, x2, #16
-USER(9f, stp	x3, x4, [x0], #16)
-	b.pl	0b
-1:	adds	x2, x2, #8
-	b.mi	2f
-USER(9f, ldr	x3, [x1], #8	)
-	sub	x2, x2, #8
-USER(9f, str	x3, [x0], #8	)
-2:	adds	x2, x2, #4
-	b.mi	3f
-USER(9f, ldr	w3, [x1], #4	)
-	sub	x2, x2, #4
-USER(9f, str	w3, [x0], #4	)
-3:	adds	x2, x2, #2
-	b.mi	4f
-USER(9f, ldrh	w3, [x1], #2	)
-	sub	x2, x2, #2
-USER(9f, strh	w3, [x0], #2	)
-4:	adds	x2, x2, #1
-	b.mi	5f
-USER(9f, ldrb	w3, [x1]	)
-USER(9f, strb	w3, [x0]	)
-5:	mov	x0, #0
+	add	end, x0, x2
+#include "copy_template.S"
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
+	mov	x0, #0
 	ret
 ENDPROC(__copy_in_user)
 
 	.section .fixup,"ax"
 	.align	2
-9:	sub	x0, x5, x0			// bytes not copied
+9998:	sub	x0, end, dst			// bytes not copied
 	ret
 	.previous
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index a257b47e2dc4..7512bbbc07ac 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -18,6 +18,7 @@
 
 #include <asm/alternative.h>
 #include <asm/assembler.h>
+#include <asm/cache.h>
 #include <asm/cpufeature.h>
 #include <asm/sysreg.h>
 
@@ -31,44 +32,52 @@
  * Returns:
  *	x0 - bytes not copied
  */
+	.macro ldrb1 ptr, regB, val
+	ldrb  \ptr, [\regB], \val
+	.endm
+
+	.macro strb1 ptr, regB, val
+	USER(9998f, strb \ptr, [\regB], \val)
+	.endm
+
+	.macro ldrh1 ptr, regB, val
+	ldrh  \ptr, [\regB], \val
+	.endm
+
+	.macro strh1 ptr, regB, val
+	USER(9998f, strh \ptr, [\regB], \val)
+	.endm
+
+	.macro ldr1 ptr, regB, val
+	ldr \ptr, [\regB], \val
+	.endm
+
+	.macro str1 ptr, regB, val
+	USER(9998f, str \ptr, [\regB], \val)
+	.endm
+
+	.macro ldp1 ptr, regB, regC, val
+	ldp \ptr, \regB, [\regC], \val
+	.endm
+
+	.macro stp1 ptr, regB, regC, val
+	USER(9998f, stp \ptr, \regB, [\regC], \val)
+	.endm
+
+end	.req	x5
 ENTRY(__copy_to_user)
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
-	add	x5, x0, x2			// upper user buffer boundary
-	subs	x2, x2, #16
-	b.mi	1f
-0:
-	ldp	x3, x4, [x1], #16
-	subs	x2, x2, #16
-USER(9f, stp	x3, x4, [x0], #16)
-	b.pl	0b
-1:	adds	x2, x2, #8
-	b.mi	2f
-	ldr	x3, [x1], #8
-	sub	x2, x2, #8
-USER(9f, str	x3, [x0], #8	)
-2:	adds	x2, x2, #4
-	b.mi	3f
-	ldr	w3, [x1], #4
-	sub	x2, x2, #4
-USER(9f, str	w3, [x0], #4	)
-3:	adds	x2, x2, #2
-	b.mi	4f
-	ldrh	w3, [x1], #2
-	sub	x2, x2, #2
-USER(9f, strh	w3, [x0], #2	)
-4:	adds	x2, x2, #1
-	b.mi	5f
-	ldrb	w3, [x1]
-USER(9f, strb	w3, [x0]	)
-5:	mov	x0, #0
+	add	end, x0, x2
+#include "copy_template.S"
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
+	mov	x0, #0
 	ret
 ENDPROC(__copy_to_user)
 
 	.section .fixup,"ax"
 	.align	2
-9:	sub	x0, x5, x0			// bytes not copied
+9998:	sub	x0, end, dst			// bytes not copied
 	ret
 	.previous
-- 
cgit v1.2.3-58-ga151


From 127db024a7baee9874014dac33628253f438b4da Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Date: Thu, 17 Sep 2015 12:38:07 +0300
Subject: arm64: introduce VA_START macro - the first kernel virtual address.

In order to not use lengthy (UL(0xffffffffffffffff) << VA_BITS) everywhere,
replace it with VA_START.

Signed-off-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/memory.h  | 2 ++
 arch/arm64/include/asm/pgtable.h | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 6b4c3ad75a2a..11ccf6c09533 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -42,12 +42,14 @@
  * PAGE_OFFSET - the virtual address of the start of the kernel image (top
  *		 (VA_BITS - 1))
  * VA_BITS - the maximum number of bits for virtual addresses.
+ * VA_START - the first kernel virtual address.
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
  * The module space lives between the addresses given by TASK_SIZE
  * and PAGE_OFFSET - it must be within 128MB of the kernel text.
  */
 #define VA_BITS			(CONFIG_ARM64_VA_BITS)
+#define VA_START		(UL(0xffffffffffffffff) << VA_BITS)
 #define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
 #define MODULES_END		(PAGE_OFFSET)
 #define MODULES_VADDR		(MODULES_END - SZ_64M)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 26b066690593..581d1406ba28 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -41,7 +41,7 @@
  *	fixed mappings and modules
  */
 #define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
-#define VMALLOC_START		(UL(0xffffffffffffffff) << VA_BITS)
+#define VMALLOC_START		(VA_START)
 #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
 #define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))
-- 
cgit v1.2.3-58-ga151


From c51e97d89e526368eb697f87cd4d391b9e19f369 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 6 Oct 2015 18:46:21 +0100
Subject: arm64: mm: remove unused cpu_set_idmap_tcr_t0sz function

With commit b08d4640a3dc ("arm64: remove dead code"),
cpu_set_idmap_tcr_t0sz is no longer called and can therefore be removed
from the kernel.

This patch removes the function and effectively inlines the helper
function __cpu_set_tcr_t0sz into cpu_set_default_tcr_t0sz.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/mmu_context.h | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 8ec41e5f56f0..549b89554ce8 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -77,34 +77,23 @@ static inline bool __cpu_uses_extended_idmap(void)
 		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
 }
 
-static inline void __cpu_set_tcr_t0sz(u64 t0sz)
-{
-	unsigned long tcr;
-
-	if (__cpu_uses_extended_idmap())
-		asm volatile (
-		"	mrs	%0, tcr_el1	;"
-		"	bfi	%0, %1, %2, %3	;"
-		"	msr	tcr_el1, %0	;"
-		"	isb"
-		: "=&r" (tcr)
-		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
-}
-
-/*
- * Set TCR.T0SZ to the value appropriate for activating the identity map.
- */
-static inline void cpu_set_idmap_tcr_t0sz(void)
-{
-	__cpu_set_tcr_t0sz(idmap_t0sz);
-}
-
 /*
  * Set TCR.T0SZ to its default value (based on VA_BITS)
  */
 static inline void cpu_set_default_tcr_t0sz(void)
 {
-	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
+	unsigned long tcr;
+
+	if (!__cpu_uses_extended_idmap())
+		return;
+
+	asm volatile (
+	"	mrs	%0, tcr_el1	;"
+	"	bfi	%0, %1, %2, %3	;"
+	"	msr	tcr_el1, %0	;"
+	"	isb"
+	: "=&r" (tcr)
+	: "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
 }
 
 static inline void switch_new_context(struct mm_struct *mm)
-- 
cgit v1.2.3-58-ga151


From fa7aae8a4257e6be7051420dac1f150c1eef721b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 6 Oct 2015 18:46:22 +0100
Subject: arm64: proc: de-scope TLBI operation during cold boot

When cold-booting a CPU, we must invalidate any junk entries from the
local TLB prior to enabling the MMU. This doesn't require broadcasting
within the inner-shareable domain, so de-scope the operation to apply
only to the local CPU.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/proc.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index e4ee7bd8830a..bbde13d77da5 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -146,8 +146,8 @@ ENDPROC(cpu_do_switch_mm)
  *	value of the SCTLR_EL1 register.
  */
 ENTRY(__cpu_setup)
-	tlbi	vmalle1is			// invalidate I + D TLBs
-	dsb	ish
+	tlbi	vmalle1				// Invalidate local TLB
+	dsb	nsh
 
 	mov	x0, #3 << 20
 	msr	cpacr_el1, x0			// Enable FP/ASIMD
-- 
cgit v1.2.3-58-ga151


From 8e63d38876691756f9bc6930850f1fb77809be1b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 6 Oct 2015 18:46:23 +0100
Subject: arm64: flush: use local TLB and I-cache invalidation

There are a number of places where a single CPU is running with a
private page-table and we need to perform maintenance on the TLB and
I-cache in order to ensure correctness, but do not require the operation
to be broadcast to other CPUs.

This patch adds local variants of tlb_flush_all and __flush_icache_all
to support these use-cases and updates the callers respectively.
__local_flush_icache_all also implies an isb, since it is intended to be
used synchronously.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: David Daney <david.daney@cavium.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cacheflush.h | 7 +++++++
 arch/arm64/include/asm/tlbflush.h   | 8 ++++++++
 arch/arm64/kernel/efi.c             | 4 ++--
 arch/arm64/kernel/smp.c             | 2 +-
 arch/arm64/kernel/suspend.c         | 2 +-
 arch/arm64/mm/context.c             | 4 ++--
 arch/arm64/mm/mmu.c                 | 2 +-
 7 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index c75b8d027eb1..54efedaf331f 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -115,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
+static inline void __local_flush_icache_all(void)
+{
+	asm("ic iallu");
+	dsb(nsh);
+	isb();
+}
+
 static inline void __flush_icache_all(void)
 {
 	asm("ic	ialluis");
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 7bd2da021658..96f944e75dc4 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -63,6 +63,14 @@
  *		only require the D-TLB to be invalidated.
  *		- kaddr - Kernel virtual memory address
  */
+static inline void local_flush_tlb_all(void)
+{
+	dsb(nshst);
+	asm("tlbi	vmalle1");
+	dsb(nsh);
+	isb();
+}
+
 static inline void flush_tlb_all(void)
 {
 	dsb(ishst);
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 13671a9cf016..4d12926ea40d 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -344,9 +344,9 @@ static void efi_set_pgd(struct mm_struct *mm)
 	else
 		cpu_switch_mm(mm->pgd, mm);
 
-	flush_tlb_all();
+	local_flush_tlb_all();
 	if (icache_is_aivivt())
-		__flush_icache_all();
+		__local_flush_icache_all();
 }
 
 void efi_virtmap_load(void)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dbdaacddd9a5..fdd4d4dbd64f 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -152,7 +152,7 @@ asmlinkage void secondary_start_kernel(void)
 	 * point to zero page to avoid speculatively fetching new entries.
 	 */
 	cpu_set_reserved_ttbr0();
-	flush_tlb_all();
+	local_flush_tlb_all();
 	cpu_set_default_tcr_t0sz();
 
 	preempt_disable();
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 8297d502217e..3c5e4e6dcf68 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -90,7 +90,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 		else
 			cpu_switch_mm(mm->pgd, mm);
 
-		flush_tlb_all();
+		local_flush_tlb_all();
 
 		/*
 		 * Restore per-cpu offset before any kernel
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index d70ff14dbdbd..48b53fb381af 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -48,9 +48,9 @@ static void flush_context(void)
 {
 	/* set the reserved TTBR0 before flushing the TLB */
 	cpu_set_reserved_ttbr0();
-	flush_tlb_all();
+	local_flush_tlb_all();
 	if (icache_is_aivivt())
-		__flush_icache_all();
+		__local_flush_icache_all();
 }
 
 static void set_mm_context(struct mm_struct *mm, unsigned int asid)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9211b8527f25..71a310478c9e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -456,7 +456,7 @@ void __init paging_init(void)
 	 * point to zero page to avoid speculatively fetching new entries.
 	 */
 	cpu_set_reserved_ttbr0();
-	flush_tlb_all();
+	local_flush_tlb_all();
 	cpu_set_default_tcr_t0sz();
 }
 
-- 
cgit v1.2.3-58-ga151


From 5aec715d7d3122f77cabaa7578d9d25a0c1ed20e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 6 Oct 2015 18:46:24 +0100
Subject: arm64: mm: rewrite ASID allocator and MM context-switching code

Our current switch_mm implementation suffers from a number of problems:

  (1) The ASID allocator relies on IPIs to synchronise the CPUs on a
      rollover event

  (2) Because of (1), we cannot allocate ASIDs with interrupts disabled
      and therefore make use of a TIF_SWITCH_MM flag to postpone the
      actual switch to finish_arch_post_lock_switch

  (3) We run context switch with a reserved (invalid) TTBR0 value, even
      though the ASID and pgd are updated atomically

  (4) We take a global spinlock (cpu_asid_lock) during context-switch

  (5) We use h/w broadcast TLB operations when they are not required
      (e.g. in flush_context)

This patch addresses these problems by rewriting the ASID algorithm to
match the bitmap-based arch/arm/ implementation more closely. This in
turn allows us to remove much of the complications surrounding switch_mm,
including the ugly thread flag.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/mmu.h         |  15 +--
 arch/arm64/include/asm/mmu_context.h |  76 ++---------
 arch/arm64/include/asm/thread_info.h |   1 -
 arch/arm64/kernel/asm-offsets.c      |   2 +-
 arch/arm64/kernel/efi.c              |   1 -
 arch/arm64/mm/context.c              | 238 +++++++++++++++++++++--------------
 arch/arm64/mm/proc.S                 |   2 +-
 7 files changed, 166 insertions(+), 169 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 030208767185..990124a67eeb 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -17,15 +17,16 @@
 #define __ASM_MMU_H
 
 typedef struct {
-	unsigned int id;
-	raw_spinlock_t id_lock;
-	void *vdso;
+	atomic64_t	id;
+	void		*vdso;
 } mm_context_t;
 
-#define INIT_MM_CONTEXT(name) \
-	.context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock),
-
-#define ASID(mm)	((mm)->context.id & 0xffff)
+/*
+ * This macro is only used by the TLBI code, which cannot race with an
+ * ASID change and therefore doesn't need to reload the counter using
+ * atomic64_read.
+ */
+#define ASID(mm)	((mm)->context.id.counter & 0xffff)
 
 extern void paging_init(void);
 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 549b89554ce8..f4c74a951b6c 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -28,13 +28,6 @@
 #include <asm/cputype.h>
 #include <asm/pgtable.h>
 
-#define MAX_ASID_BITS	16
-
-extern unsigned int cpu_last_asid;
-
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm);
-void __new_context(struct mm_struct *mm);
-
 #ifdef CONFIG_PID_IN_CONTEXTIDR
 static inline void contextidr_thread_switch(struct task_struct *next)
 {
@@ -96,66 +89,19 @@ static inline void cpu_set_default_tcr_t0sz(void)
 	: "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
 }
 
-static inline void switch_new_context(struct mm_struct *mm)
-{
-	unsigned long flags;
-
-	__new_context(mm);
-
-	local_irq_save(flags);
-	cpu_switch_mm(mm->pgd, mm);
-	local_irq_restore(flags);
-}
-
-static inline void check_and_switch_context(struct mm_struct *mm,
-					    struct task_struct *tsk)
-{
-	/*
-	 * Required during context switch to avoid speculative page table
-	 * walking with the wrong TTBR.
-	 */
-	cpu_set_reserved_ttbr0();
-
-	if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS))
-		/*
-		 * The ASID is from the current generation, just switch to the
-		 * new pgd. This condition is only true for calls from
-		 * context_switch() and interrupts are already disabled.
-		 */
-		cpu_switch_mm(mm->pgd, mm);
-	else if (irqs_disabled())
-		/*
-		 * Defer the new ASID allocation until after the context
-		 * switch critical region since __new_context() cannot be
-		 * called with interrupts disabled.
-		 */
-		set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
-	else
-		/*
-		 * That is a direct call to switch_mm() or activate_mm() with
-		 * interrupts enabled and a new context.
-		 */
-		switch_new_context(mm);
-}
-
-#define init_new_context(tsk,mm)	(__init_new_context(tsk,mm),0)
+/*
+ * It would be nice to return ASIDs back to the allocator, but unfortunately
+ * that introduces a race with a generation rollover where we could erroneously
+ * free an ASID allocated in a future generation. We could workaround this by
+ * freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap),
+ * but we'd then need to make sure that we didn't dirty any TLBs afterwards.
+ * Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you
+ * take CPU migration into account.
+ */
 #define destroy_context(mm)		do { } while(0)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu);
 
-#define finish_arch_post_lock_switch \
-	finish_arch_post_lock_switch
-static inline void finish_arch_post_lock_switch(void)
-{
-	if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
-		struct mm_struct *mm = current->mm;
-		unsigned long flags;
-
-		__new_context(mm);
-
-		local_irq_save(flags);
-		cpu_switch_mm(mm->pgd, mm);
-		local_irq_restore(flags);
-	}
-}
+#define init_new_context(tsk,mm)	({ atomic64_set(&mm->context.id, 0); 0; })
 
 /*
  * This is called when "tsk" is about to enter lazy TLB mode.
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index dcd06d18a42a..555c6dec5ef2 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -111,7 +111,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTORE_SIGMASK	20
 #define TIF_SINGLESTEP		21
 #define TIF_32BIT		22	/* 32bit process */
-#define TIF_SWITCH_MM		23	/* deferred switch_mm */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 8d89cf8dae55..25de8b244961 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -60,7 +60,7 @@ int main(void)
   DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
-  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id));
+  DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
   BLANK();
   DEFINE(VMA_VM_MM,		offsetof(struct vm_area_struct, vm_mm));
   DEFINE(VMA_VM_FLAGS,		offsetof(struct vm_area_struct, vm_flags));
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index 4d12926ea40d..a48d1f477b2e 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -48,7 +48,6 @@ static struct mm_struct efi_mm = {
 	.mmap_sem		= __RWSEM_INITIALIZER(efi_mm.mmap_sem),
 	.page_table_lock	= __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
 	.mmlist			= LIST_HEAD_INIT(efi_mm.mmlist),
-	INIT_MM_CONTEXT(efi_mm)
 };
 
 static int uefi_debug __initdata;
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 48b53fb381af..e902229b1a3d 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -17,135 +17,187 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/init.h>
+#include <linux/bitops.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
 
+#include <asm/cpufeature.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
-#include <asm/cachetype.h>
 
-#define asid_bits(reg) \
-	(((read_cpuid(ID_AA64MMFR0_EL1) & 0xf0) >> 2) + 8)
+static u32 asid_bits;
+static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
 
-#define ASID_FIRST_VERSION	(1 << MAX_ASID_BITS)
+static atomic64_t asid_generation;
+static unsigned long *asid_map;
 
-static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
-unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
 
-/*
- * We fork()ed a process, and we need a new context for the child to run in.
- */
-void __init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+#define ASID_MASK		(~GENMASK(asid_bits - 1, 0))
+#define ASID_FIRST_VERSION	(1UL << asid_bits)
+#define NUM_USER_ASIDS		ASID_FIRST_VERSION
+
+static void flush_context(unsigned int cpu)
 {
-	mm->context.id = 0;
-	raw_spin_lock_init(&mm->context.id_lock);
+	int i;
+	u64 asid;
+
+	/* Update the list of reserved ASIDs and the ASID bitmap. */
+	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+
+	/*
+	 * Ensure the generation bump is observed before we xchg the
+	 * active_asids.
+	 */
+	smp_wmb();
+
+	for_each_possible_cpu(i) {
+		asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
+		/*
+		 * If this CPU has already been through a
+		 * rollover, but hasn't run another task in
+		 * the meantime, we must preserve its reserved
+		 * ASID, as this is the only trace we have of
+		 * the process it is still running.
+		 */
+		if (asid == 0)
+			asid = per_cpu(reserved_asids, i);
+		__set_bit(asid & ~ASID_MASK, asid_map);
+		per_cpu(reserved_asids, i) = asid;
+	}
+
+	/* Queue a TLB invalidate and flush the I-cache if necessary. */
+	cpumask_setall(&tlb_flush_pending);
+
+	if (icache_is_aivivt())
+		__flush_icache_all();
 }
 
-static void flush_context(void)
+static int is_reserved_asid(u64 asid)
 {
-	/* set the reserved TTBR0 before flushing the TLB */
-	cpu_set_reserved_ttbr0();
-	local_flush_tlb_all();
-	if (icache_is_aivivt())
-		__local_flush_icache_all();
+	int cpu;
+	for_each_possible_cpu(cpu)
+		if (per_cpu(reserved_asids, cpu) == asid)
+			return 1;
+	return 0;
 }
 
-static void set_mm_context(struct mm_struct *mm, unsigned int asid)
+static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 {
-	unsigned long flags;
+	static u32 cur_idx = 1;
+	u64 asid = atomic64_read(&mm->context.id);
+	u64 generation = atomic64_read(&asid_generation);
 
-	/*
-	 * Locking needed for multi-threaded applications where the same
-	 * mm->context.id could be set from different CPUs during the
-	 * broadcast. This function is also called via IPI so the
-	 * mm->context.id_lock has to be IRQ-safe.
-	 */
-	raw_spin_lock_irqsave(&mm->context.id_lock, flags);
-	if (likely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
+	if (asid != 0) {
 		/*
-		 * Old version of ASID found. Set the new one and reset
-		 * mm_cpumask(mm).
+		 * If our current ASID was active during a rollover, we
+		 * can continue to use it and this was just a false alarm.
 		 */
-		mm->context.id = asid;
-		cpumask_clear(mm_cpumask(mm));
+		if (is_reserved_asid(asid))
+			return generation | (asid & ~ASID_MASK);
+
+		/*
+		 * We had a valid ASID in a previous life, so try to re-use
+		 * it if possible.
+		 */
+		asid &= ~ASID_MASK;
+		if (!__test_and_set_bit(asid, asid_map))
+			goto bump_gen;
 	}
-	raw_spin_unlock_irqrestore(&mm->context.id_lock, flags);
 
 	/*
-	 * Set the mm_cpumask(mm) bit for the current CPU.
+	 * Allocate a free ASID. If we can't find one, take a note of the
+	 * currently active ASIDs and mark the TLBs as requiring flushes.
+	 * We always count from ASID #1, as we use ASID #0 when setting a
+	 * reserved TTBR0 for the init_mm.
 	 */
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+	if (asid != NUM_USER_ASIDS)
+		goto set_asid;
+
+	/* We're out of ASIDs, so increment the global generation count */
+	generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION,
+						 &asid_generation);
+	flush_context(cpu);
+
+	/* We have at least 1 ASID per CPU, so this will always succeed */
+	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+
+set_asid:
+	__set_bit(asid, asid_map);
+	cur_idx = asid;
+
+bump_gen:
+	asid |= generation;
+	cpumask_clear(mm_cpumask(mm));
+	return asid;
 }
 
-/*
- * Reset the ASID on the current CPU. This function call is broadcast from the
- * CPU handling the ASID rollover and holding cpu_asid_lock.
- */
-static void reset_context(void *info)
+void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 {
-	unsigned int asid;
-	unsigned int cpu = smp_processor_id();
-	struct mm_struct *mm = current->active_mm;
+	unsigned long flags;
+	u64 asid;
+
+	asid = atomic64_read(&mm->context.id);
 
 	/*
-	 * current->active_mm could be init_mm for the idle thread immediately
-	 * after secondary CPU boot or hotplug. TTBR0_EL1 is already set to
-	 * the reserved value, so no need to reset any context.
+	 * The memory ordering here is subtle. We rely on the control
+	 * dependency between the generation read and the update of
+	 * active_asids to ensure that we are synchronised with a
+	 * parallel rollover (i.e. this pairs with the smp_wmb() in
+	 * flush_context).
 	 */
-	if (mm == &init_mm)
-		return;
+	if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
+	    && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
+		goto switch_mm_fastpath;
+
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+	/* Check that our ASID belongs to the current generation. */
+	asid = atomic64_read(&mm->context.id);
+	if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) {
+		asid = new_context(mm, cpu);
+		atomic64_set(&mm->context.id, asid);
+	}
 
-	smp_rmb();
-	asid = cpu_last_asid + cpu;
+	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
+		local_flush_tlb_all();
 
-	flush_context();
-	set_mm_context(mm, asid);
+	atomic64_set(&per_cpu(active_asids, cpu), asid);
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
 
-	/* set the new ASID */
+switch_mm_fastpath:
 	cpu_switch_mm(mm->pgd, mm);
 }
 
-void __new_context(struct mm_struct *mm)
+static int asids_init(void)
 {
-	unsigned int asid;
-	unsigned int bits = asid_bits();
-
-	raw_spin_lock(&cpu_asid_lock);
-	/*
-	 * Check the ASID again, in case the change was broadcast from another
-	 * CPU before we acquired the lock.
-	 */
-	if (!unlikely((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) {
-		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-		raw_spin_unlock(&cpu_asid_lock);
-		return;
-	}
-	/*
-	 * At this point, it is guaranteed that the current mm (with an old
-	 * ASID) isn't active on any other CPU since the ASIDs are changed
-	 * simultaneously via IPI.
-	 */
-	asid = ++cpu_last_asid;
-
-	/*
-	 * If we've used up all our ASIDs, we need to start a new version and
-	 * flush the TLB.
-	 */
-	if (unlikely((asid & ((1 << bits) - 1)) == 0)) {
-		/* increment the ASID version */
-		cpu_last_asid += (1 << MAX_ASID_BITS) - (1 << bits);
-		if (cpu_last_asid == 0)
-			cpu_last_asid = ASID_FIRST_VERSION;
-		asid = cpu_last_asid + smp_processor_id();
-		flush_context();
-		smp_wmb();
-		smp_call_function(reset_context, NULL, 1);
-		cpu_last_asid += NR_CPUS - 1;
+	int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
+
+	switch (fld) {
+	default:
+		pr_warn("Unknown ASID size (%d); assuming 8-bit\n", fld);
+		/* Fallthrough */
+	case 0:
+		asid_bits = 8;
+		break;
+	case 2:
+		asid_bits = 16;
 	}
 
-	set_mm_context(mm, asid);
-	raw_spin_unlock(&cpu_asid_lock);
+	/* If we end up with more CPUs than ASIDs, expect things to crash */
+	WARN_ON(NUM_USER_ASIDS < num_possible_cpus());
+	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
+	asid_map = kzalloc(BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(*asid_map),
+			   GFP_KERNEL);
+	if (!asid_map)
+		panic("Failed to allocate bitmap for %lu ASIDs\n",
+		      NUM_USER_ASIDS);
+
+	pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
+	return 0;
 }
+early_initcall(asids_init);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index bbde13d77da5..91cb2eaac256 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -130,7 +130,7 @@ ENDPROC(cpu_do_resume)
  *	- pgd_phys - physical address of new TTB
  */
 ENTRY(cpu_do_switch_mm)
-	mmid	w1, x1				// get mm->context.id
+	mmid	x1, x1				// get mm->context.id
 	bfi	x0, x1, #48, #16		// set the ASID
 	msr	ttbr0_el1, x0			// set TTBR0
 	isb
-- 
cgit v1.2.3-58-ga151


From f3e002c24e1f3b66f6e392ecd6928b5d04672c54 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 6 Oct 2015 18:46:25 +0100
Subject: arm64: tlbflush: remove redundant ASID casts to (unsigned long)

The ASID macro returns a 64-bit (long long) value, so there is no need
to cast to (unsigned long) before shifting prior to a TLBI operation.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/tlbflush.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 96f944e75dc4..93e9f964805c 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -81,7 +81,7 @@ static inline void flush_tlb_all(void)
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
-	unsigned long asid = (unsigned long)ASID(mm) << 48;
+	unsigned long asid = ASID(mm) << 48;
 
 	dsb(ishst);
 	asm("tlbi	aside1is, %0" : : "r" (asid));
@@ -91,8 +91,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 				  unsigned long uaddr)
 {
-	unsigned long addr = uaddr >> 12 |
-		((unsigned long)ASID(vma->vm_mm) << 48);
+	unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48);
 
 	dsb(ishst);
 	asm("tlbi	vale1is, %0" : : "r" (addr));
@@ -109,7 +108,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end,
 				     bool last_level)
 {
-	unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
+	unsigned long asid = ASID(vma->vm_mm) << 48;
 	unsigned long addr;
 
 	if ((end - start) > MAX_TLB_RANGE) {
@@ -162,7 +161,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
 static inline void __flush_tlb_pgtable(struct mm_struct *mm,
 				       unsigned long uaddr)
 {
-	unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48);
+	unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
 
 	dsb(ishst);
 	asm("tlbi	vae1is, %0" : : "r" (addr));
-- 
cgit v1.2.3-58-ga151


From 5a7862e83000ccfd36db927c6f060458fe271157 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 6 Oct 2015 18:46:26 +0100
Subject: arm64: tlbflush: avoid flushing when fullmm == 1

The TLB gather code sets fullmm=1 when tearing down the entire address
space for an mm_struct on exit or execve. Given that the ASID allocator
will never re-allocate a dirty ASID, this flushing is not needed and can
simply be avoided in the flushing code.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/tlb.h | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index d6e6b6660380..ffdaea7954bb 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -37,17 +37,21 @@ static inline void __tlb_remove_table(void *_table)
 
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
-	if (tlb->fullmm) {
-		flush_tlb_mm(tlb->mm);
-	} else {
-		struct vm_area_struct vma = { .vm_mm = tlb->mm, };
-		/*
-		 * The intermediate page table levels are already handled by
-		 * the __(pte|pmd|pud)_free_tlb() functions, so last level
-		 * TLBI is sufficient here.
-		 */
-		__flush_tlb_range(&vma, tlb->start, tlb->end, true);
-	}
+	struct vm_area_struct vma = { .vm_mm = tlb->mm, };
+
+	/*
+	 * The ASID allocator will either invalidate the ASID or mark
+	 * it as used.
+	 */
+	if (tlb->fullmm)
+		return;
+
+	/*
+	 * The intermediate page table levels are already handled by
+	 * the __(pte|pmd|pud)_free_tlb() functions, so last level
+	 * TLBI is sufficient here.
+	 */
+	__flush_tlb_range(&vma, tlb->start, tlb->end, true);
 }
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
-- 
cgit v1.2.3-58-ga151


From c2775b2ee5caca19f661ee2ab5af92462596db71 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 6 Oct 2015 18:46:27 +0100
Subject: arm64: switch_mm: simplify mm and CPU checks

switch_mm performs some checks to try and avoid entering the ASID
allocator:

  (1) If we're switching to the init_mm (no user mappings), then simply
      set a reserved TTBR0 value with no page table (the zero page)

  (2) If prev == next *and* the mm_cpumask indicates that we've run on
      this CPU before, then we can skip the allocator.

However, there is plenty of redundancy here. With the new ASID allocator,
if prev == next, then we know that our ASID is valid and do not need to
worry about re-allocation. Consequently, we can drop the mm_cpumask check
in (2) and move the prev == next check before the init_mm check, since
if prev == next == init_mm then there's nothing to do.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/mmu_context.h | 6 ++++--
 arch/arm64/mm/context.c              | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index f4c74a951b6c..c0e87898ba96 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -129,6 +129,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	unsigned int cpu = smp_processor_id();
 
+	if (prev == next)
+		return;
+
 	/*
 	 * init_mm.pgd does not contain any user mappings and it is always
 	 * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
@@ -138,8 +141,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 		return;
 	}
 
-	if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
-		check_and_switch_context(next, tsk);
+	check_and_switch_context(next, cpu);
 }
 
 #define deactivate_mm(tsk,mm)	do { } while (0)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e902229b1a3d..4b9ec4484e3f 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -166,10 +166,10 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 		local_flush_tlb_all();
 
 	atomic64_set(&per_cpu(active_asids, cpu), asid);
-	cpumask_set_cpu(cpu, mm_cpumask(mm));
 	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
 
 switch_mm_fastpath:
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
 	cpu_switch_mm(mm->pgd, mm);
 }
 
-- 
cgit v1.2.3-58-ga151


From 38d96287504a2478eb538bfecfa1fddd743bb6b2 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 6 Oct 2015 18:46:28 +0100
Subject: arm64: mm: kill mm_cpumask usage

mm_cpumask isn't actually used for anything on arm64, so remove all the
code trying to keep it up-to-date.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/smp.c | 7 -------
 arch/arm64/mm/context.c | 2 --
 2 files changed, 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index fdd4d4dbd64f..03b0aa28ea61 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -142,7 +142,6 @@ asmlinkage void secondary_start_kernel(void)
 	 */
 	atomic_inc(&mm->mm_count);
 	current->active_mm = mm;
-	cpumask_set_cpu(cpu, mm_cpumask(mm));
 
 	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 	printk("CPU%u: Booted secondary processor\n", cpu);
@@ -233,12 +232,6 @@ int __cpu_disable(void)
 	 * OK - migrate IRQs away from this CPU
 	 */
 	migrate_irqs();
-
-	/*
-	 * Remove this CPU from the vm mask set of all processes.
-	 */
-	clear_tasks_mm_cpumask(cpu);
-
 	return 0;
 }
 
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 4b9ec4484e3f..f636a2639f03 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -132,7 +132,6 @@ set_asid:
 
 bump_gen:
 	asid |= generation;
-	cpumask_clear(mm_cpumask(mm));
 	return asid;
 }
 
@@ -169,7 +168,6 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
 	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
 
 switch_mm_fastpath:
-	cpumask_set_cpu(cpu, mm_cpumask(mm));
 	cpu_switch_mm(mm->pgd, mm);
 }
 
-- 
cgit v1.2.3-58-ga151


From 28c6fbc3b446caf5f8d1f2d7b79e09e743158a4d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 6 Oct 2015 18:46:29 +0100
Subject: arm64: tlb: remove redundant barrier from __flush_tlb_pgtable

__flush_tlb_pgtable is used to invalidate intermediate page table
entries after they have been cleared and are about to be freed. Since
pXd_clear imply memory barriers, we don't need the extra one here.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/tlbflush.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 93e9f964805c..b460ae28e346 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -163,7 +163,6 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm,
 {
 	unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
 
-	dsb(ishst);
 	asm("tlbi	vae1is, %0" : : "r" (addr));
 	dsb(ish);
 }
-- 
cgit v1.2.3-58-ga151


From 120798d2e7d1ac87365fe5ea91b074bb42ca1eff Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 6 Oct 2015 18:46:30 +0100
Subject: arm64: mm: remove dsb from update_mmu_cache

update_mmu_cache() consists of a dsb(ishst) instruction so that new user
mappings are guaranteed to be visible to the page table walker on
exception return.

In reality this can be a very expensive operation which is rarely needed.
Removing this barrier shows a modest improvement in hackbench scores and
, in the worst case, we re-take the user fault and establish that there
was nothing to do.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 581d1406ba28..5043a84e724e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -646,10 +646,10 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 				    unsigned long addr, pte_t *ptep)
 {
 	/*
-	 * set_pte() does not have a DSB for user mappings, so make sure that
-	 * the page table write is visible.
+	 * We don't do anything here, so there's a very small chance of
+	 * us retaking a user fault which we just fixed up. The alternative
+	 * is doing a dsb(ishst), but that penalises the fastpath.
 	 */
-	dsb(ishst);
 }
 
 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-- 
cgit v1.2.3-58-ga151


From 8f48c0629049fdebb6e783803325bff19176d3fd Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 7 Oct 2015 11:37:36 +0100
Subject: arm64: hw_breakpoint: use target state to determine ABI behaviour

The arm64 hw_breakpoint interface is slightly less flexible than its
32-bit counterpart, thanks to some changes in the architecture rendering
unaligned watchpoint addresses obselete for AArch64.

However, in a multi-arch environment (i.e. debugging a 32-bit target
with a 64-bit GDB under a 64-bit kernel), we need to provide a feature
compatible interface to GDB in order for debugging to function correctly.

This patch adds a new helper, is_compat_bp,  to our hw_breakpoint
implementation which changes the interface behaviour based on the
architecture of the debug target as opposed to the debugger itself.
This allows debugged to function as expected for multi-arch
configurations without relying on deprecated architectural behaviours
when debugging native applications.

Cc: Yao Qi <yao.qi@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/hw_breakpoint.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index bba85c8f8037..46465d9fbc4d 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -163,6 +163,20 @@ enum hw_breakpoint_ops {
 	HW_BREAKPOINT_RESTORE
 };
 
+static int is_compat_bp(struct perf_event *bp)
+{
+	struct task_struct *tsk = bp->hw.target;
+
+	/*
+	 * tsk can be NULL for per-cpu (non-ptrace) breakpoints.
+	 * In this case, use the native interface, since we don't have
+	 * the notion of a "compat CPU" and could end up relying on
+	 * deprecated behaviour if we use unaligned watchpoints in
+	 * AArch64 state.
+	 */
+	return tsk && is_compat_thread(task_thread_info(tsk));
+}
+
 /**
  * hw_breakpoint_slot_setup - Find and setup a perf slot according to
  *			      operations
@@ -420,7 +434,7 @@ static int arch_build_bp_info(struct perf_event *bp)
 	 * Watchpoints can be of length 1, 2, 4 or 8 bytes.
 	 */
 	if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
-		if (is_compat_task()) {
+		if (is_compat_bp(bp)) {
 			if (info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
 			    info->ctrl.len != ARM_BREAKPOINT_LEN_4)
 				return -EINVAL;
@@ -477,7 +491,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 	 * AArch32 tasks expect some simple alignment fixups, so emulate
 	 * that here.
 	 */
-	if (is_compat_task()) {
+	if (is_compat_bp(bp)) {
 		if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
 			alignment_mask = 0x7;
 		else
-- 
cgit v1.2.3-58-ga151


From 6475b2d846176e3272351266869481a21ff47866 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 2 Oct 2015 10:55:03 +0100
Subject: arm64: perf: move to shared arm_pmu framework

Now that the arm_pmu framework has been factored out to drivers/perf we
can make use of it for arm64, gaining support for heterogeneous PMUs
and unifying the two codebases before they diverge further.

The as yet unused PMU name for PMUv3 is changed to armv8_pmuv3, matching
the style previously applied to the 32-bit PMUs.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig             |   8 +-
 arch/arm64/include/asm/pmu.h   |  83 ----
 arch/arm64/kernel/perf_event.c | 951 ++++-------------------------------------
 drivers/perf/Kconfig           |   2 +-
 4 files changed, 93 insertions(+), 951 deletions(-)
 delete mode 100644 arch/arm64/include/asm/pmu.h

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 07d1811aa03f..98b4f98a13de 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -454,12 +454,8 @@ config HAVE_ARCH_PFN_VALID
 	def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
 
 config HW_PERF_EVENTS
-	bool "Enable hardware performance counter support for perf events"
-	depends on PERF_EVENTS
-	default y
-	help
-	  Enable hardware performance counter support for perf events. If
-	  disabled, perf events will use software events only.
+	def_bool y
+	depends on ARM_PMU
 
 config SYS_SUPPORTS_HUGETLBFS
 	def_bool y
diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
deleted file mode 100644
index b7710a59672c..000000000000
--- a/arch/arm64/include/asm/pmu.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Based on arch/arm/include/asm/pmu.h
- *
- * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_PMU_H
-#define __ASM_PMU_H
-
-#ifdef CONFIG_HW_PERF_EVENTS
-
-/* The events for a given PMU register set. */
-struct pmu_hw_events {
-	/*
-	 * The events that are active on the PMU for the given index.
-	 */
-	struct perf_event	**events;
-
-	/*
-	 * A 1 bit for an index indicates that the counter is being used for
-	 * an event. A 0 means that the counter can be used.
-	 */
-	unsigned long           *used_mask;
-
-	/*
-	 * Hardware lock to serialize accesses to PMU registers. Needed for the
-	 * read/modify/write sequences.
-	 */
-	raw_spinlock_t		pmu_lock;
-};
-
-struct arm_pmu {
-	struct pmu		pmu;
-	cpumask_t		active_irqs;
-	int			*irq_affinity;
-	const char		*name;
-	irqreturn_t		(*handle_irq)(int irq_num, void *dev);
-	void			(*enable)(struct hw_perf_event *evt, int idx);
-	void			(*disable)(struct hw_perf_event *evt, int idx);
-	int			(*get_event_idx)(struct pmu_hw_events *hw_events,
-						 struct hw_perf_event *hwc);
-	int			(*set_event_filter)(struct hw_perf_event *evt,
-						    struct perf_event_attr *attr);
-	u32			(*read_counter)(int idx);
-	void			(*write_counter)(int idx, u32 val);
-	void			(*start)(void);
-	void			(*stop)(void);
-	void			(*reset)(void *);
-	int			(*map_event)(struct perf_event *event);
-	int			num_events;
-	atomic_t		active_events;
-	struct mutex		reserve_mutex;
-	u64			max_period;
-	struct platform_device	*plat_device;
-	struct pmu_hw_events	*(*get_hw_events)(void);
-};
-
-#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-
-int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type);
-
-u64 armpmu_event_update(struct perf_event *event,
-			struct hw_perf_event *hwc,
-			int idx);
-
-int armpmu_event_set_period(struct perf_event *event,
-			    struct hw_perf_event *hwc,
-			    int idx);
-
-#endif /* CONFIG_HW_PERF_EVENTS */
-#endif /* __ASM_PMU_H */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f9a74d4fff3b..a614100cd00f 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -18,651 +18,12 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#define pr_fmt(fmt) "hw perfevents: " fmt
-
-#include <linux/bitmap.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/of_device.h>
-#include <linux/perf_event.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/uaccess.h>
 
-#include <asm/cputype.h>
-#include <asm/irq.h>
 #include <asm/irq_regs.h>
-#include <asm/pmu.h>
-
-/*
- * ARMv8 supports a maximum of 32 events.
- * The cycle counter is included in this total.
- */
-#define ARMPMU_MAX_HWEVENTS		32
-
-static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
-static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
-static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
-
-#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
-
-/* Set at runtime when we know what CPU type we are. */
-static struct arm_pmu *cpu_pmu;
-
-int
-armpmu_get_max_events(void)
-{
-	int max_events = 0;
-
-	if (cpu_pmu != NULL)
-		max_events = cpu_pmu->num_events;
-
-	return max_events;
-}
-EXPORT_SYMBOL_GPL(armpmu_get_max_events);
-
-int perf_num_counters(void)
-{
-	return armpmu_get_max_events();
-}
-EXPORT_SYMBOL_GPL(perf_num_counters);
-
-#define HW_OP_UNSUPPORTED		0xFFFF
-
-#define C(_x) \
-	PERF_COUNT_HW_CACHE_##_x
-
-#define CACHE_OP_UNSUPPORTED		0xFFFF
-
-#define PERF_MAP_ALL_UNSUPPORTED					\
-	[0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
-
-#define PERF_CACHE_MAP_ALL_UNSUPPORTED					\
-[0 ... C(MAX) - 1] = {							\
-	[0 ... C(OP_MAX) - 1] = {					\
-		[0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED,	\
-	},								\
-}
-
-static int
-armpmu_map_cache_event(const unsigned (*cache_map)
-				      [PERF_COUNT_HW_CACHE_MAX]
-				      [PERF_COUNT_HW_CACHE_OP_MAX]
-				      [PERF_COUNT_HW_CACHE_RESULT_MAX],
-		       u64 config)
-{
-	unsigned int cache_type, cache_op, cache_result, ret;
-
-	cache_type = (config >>  0) & 0xff;
-	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-		return -EINVAL;
-
-	cache_op = (config >>  8) & 0xff;
-	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-		return -EINVAL;
-
-	cache_result = (config >> 16) & 0xff;
-	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
-
-	if (ret == CACHE_OP_UNSUPPORTED)
-		return -ENOENT;
-
-	return ret;
-}
-
-static int
-armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
-{
-	int mapping;
-
-	if (config >= PERF_COUNT_HW_MAX)
-		return -EINVAL;
-
-	mapping = (*event_map)[config];
-	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
-}
-
-static int
-armpmu_map_raw_event(u32 raw_event_mask, u64 config)
-{
-	return (int)(config & raw_event_mask);
-}
-
-static int map_cpu_event(struct perf_event *event,
-			 const unsigned (*event_map)[PERF_COUNT_HW_MAX],
-			 const unsigned (*cache_map)
-					[PERF_COUNT_HW_CACHE_MAX]
-					[PERF_COUNT_HW_CACHE_OP_MAX]
-					[PERF_COUNT_HW_CACHE_RESULT_MAX],
-			 u32 raw_event_mask)
-{
-	u64 config = event->attr.config;
-
-	switch (event->attr.type) {
-	case PERF_TYPE_HARDWARE:
-		return armpmu_map_event(event_map, config);
-	case PERF_TYPE_HW_CACHE:
-		return armpmu_map_cache_event(cache_map, config);
-	case PERF_TYPE_RAW:
-		return armpmu_map_raw_event(raw_event_mask, config);
-	}
-
-	return -ENOENT;
-}
-
-int
-armpmu_event_set_period(struct perf_event *event,
-			struct hw_perf_event *hwc,
-			int idx)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	s64 left = local64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int ret = 0;
-
-	if (unlikely(left <= -period)) {
-		left = period;
-		local64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		local64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	/*
-	 * Limit the maximum period to prevent the counter value
-	 * from overtaking the one we are about to program. In
-	 * effect we are reducing max_period to account for
-	 * interrupt latency (and we are being very conservative).
-	 */
-	if (left > (armpmu->max_period >> 1))
-		left = armpmu->max_period >> 1;
-
-	local64_set(&hwc->prev_count, (u64)-left);
-
-	armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
-
-	perf_event_update_userpage(event);
-
-	return ret;
-}
-
-u64
-armpmu_event_update(struct perf_event *event,
-		    struct hw_perf_event *hwc,
-		    int idx)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	u64 delta, prev_raw_count, new_raw_count;
-
-again:
-	prev_raw_count = local64_read(&hwc->prev_count);
-	new_raw_count = armpmu->read_counter(idx);
-
-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			     new_raw_count) != prev_raw_count)
-		goto again;
-
-	delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
-
-	local64_add(delta, &event->count);
-	local64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static void
-armpmu_read(struct perf_event *event)
-{
-	struct hw_perf_event *hwc = &event->hw;
-
-	/* Don't read disabled counters! */
-	if (hwc->idx < 0)
-		return;
-
-	armpmu_event_update(event, hwc, hwc->idx);
-}
-
-static void
-armpmu_stop(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	/*
-	 * ARM pmu always has to update the counter, so ignore
-	 * PERF_EF_UPDATE, see comments in armpmu_start().
-	 */
-	if (!(hwc->state & PERF_HES_STOPPED)) {
-		armpmu->disable(hwc, hwc->idx);
-		barrier(); /* why? */
-		armpmu_event_update(event, hwc, hwc->idx);
-		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
-	}
-}
 
-static void
-armpmu_start(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-
-	/*
-	 * ARM pmu always has to reprogram the period, so ignore
-	 * PERF_EF_RELOAD, see the comment below.
-	 */
-	if (flags & PERF_EF_RELOAD)
-		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
-
-	hwc->state = 0;
-	/*
-	 * Set the period again. Some counters can't be stopped, so when we
-	 * were stopped we simply disabled the IRQ source and the counter
-	 * may have been left counting. If we don't do this step then we may
-	 * get an interrupt too soon or *way* too late if the overflow has
-	 * happened since disabling.
-	 */
-	armpmu_event_set_period(event, hwc, hwc->idx);
-	armpmu->enable(hwc, hwc->idx);
-}
-
-static void
-armpmu_del(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
-	struct hw_perf_event *hwc = &event->hw;
-	int idx = hwc->idx;
-
-	WARN_ON(idx < 0);
-
-	armpmu_stop(event, PERF_EF_UPDATE);
-	hw_events->events[idx] = NULL;
-	clear_bit(idx, hw_events->used_mask);
-
-	perf_event_update_userpage(event);
-}
-
-static int
-armpmu_add(struct perf_event *event, int flags)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
-	struct hw_perf_event *hwc = &event->hw;
-	int idx;
-	int err = 0;
-
-	perf_pmu_disable(event->pmu);
-
-	/* If we don't have a space for the counter then finish early. */
-	idx = armpmu->get_event_idx(hw_events, hwc);
-	if (idx < 0) {
-		err = idx;
-		goto out;
-	}
-
-	/*
-	 * If there is an event in the counter we are going to use then make
-	 * sure it is disabled.
-	 */
-	event->hw.idx = idx;
-	armpmu->disable(hwc, idx);
-	hw_events->events[idx] = event;
-
-	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
-	if (flags & PERF_EF_START)
-		armpmu_start(event, PERF_EF_RELOAD);
-
-	/* Propagate our changes to the userspace mapping. */
-	perf_event_update_userpage(event);
-
-out:
-	perf_pmu_enable(event->pmu);
-	return err;
-}
-
-static int
-validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
-				struct perf_event *event)
-{
-	struct arm_pmu *armpmu;
-	struct hw_perf_event fake_event = event->hw;
-	struct pmu *leader_pmu = event->group_leader->pmu;
-
-	if (is_software_event(event))
-		return 1;
-
-	/*
-	 * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The
-	 * core perf code won't check that the pmu->ctx == leader->ctx
-	 * until after pmu->event_init(event).
-	 */
-	if (event->pmu != pmu)
-		return 0;
-
-	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
-		return 1;
-
-	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
-		return 1;
-
-	armpmu = to_arm_pmu(event->pmu);
-	return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
-}
-
-static int
-validate_group(struct perf_event *event)
-{
-	struct perf_event *sibling, *leader = event->group_leader;
-	struct pmu_hw_events fake_pmu;
-	DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
-
-	/*
-	 * Initialise the fake PMU. We only need to populate the
-	 * used_mask for the purposes of validation.
-	 */
-	memset(fake_used_mask, 0, sizeof(fake_used_mask));
-	fake_pmu.used_mask = fake_used_mask;
-
-	if (!validate_event(event->pmu, &fake_pmu, leader))
-		return -EINVAL;
-
-	list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
-		if (!validate_event(event->pmu, &fake_pmu, sibling))
-			return -EINVAL;
-	}
-
-	if (!validate_event(event->pmu, &fake_pmu, event))
-		return -EINVAL;
-
-	return 0;
-}
-
-static void
-armpmu_disable_percpu_irq(void *data)
-{
-	unsigned int irq = *(unsigned int *)data;
-	disable_percpu_irq(irq);
-}
-
-static void
-armpmu_release_hardware(struct arm_pmu *armpmu)
-{
-	int irq;
-	unsigned int i, irqs;
-	struct platform_device *pmu_device = armpmu->plat_device;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (!irqs)
-		return;
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq <= 0)
-		return;
-
-	if (irq_is_percpu(irq)) {
-		on_each_cpu(armpmu_disable_percpu_irq, &irq, 1);
-		free_percpu_irq(irq, &cpu_hw_events);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			if (armpmu->irq_affinity)
-				cpu = armpmu->irq_affinity[i];
-
-			if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
-				continue;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq > 0)
-				free_irq(irq, armpmu);
-		}
-	}
-}
-
-static void
-armpmu_enable_percpu_irq(void *data)
-{
-	unsigned int irq = *(unsigned int *)data;
-	enable_percpu_irq(irq, IRQ_TYPE_NONE);
-}
-
-static int
-armpmu_reserve_hardware(struct arm_pmu *armpmu)
-{
-	int err, irq;
-	unsigned int i, irqs;
-	struct platform_device *pmu_device = armpmu->plat_device;
-
-	if (!pmu_device)
-		return -ENODEV;
-
-	irqs = min(pmu_device->num_resources, num_possible_cpus());
-	if (!irqs) {
-		pr_err("no irqs for PMUs defined\n");
-		return -ENODEV;
-	}
-
-	irq = platform_get_irq(pmu_device, 0);
-	if (irq <= 0) {
-		pr_err("failed to get valid irq for PMU device\n");
-		return -ENODEV;
-	}
-
-	if (irq_is_percpu(irq)) {
-		err = request_percpu_irq(irq, armpmu->handle_irq,
-				"arm-pmu", &cpu_hw_events);
-
-		if (err) {
-			pr_err("unable to request percpu IRQ%d for ARM PMU counters\n",
-					irq);
-			armpmu_release_hardware(armpmu);
-			return err;
-		}
-
-		on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
-	} else {
-		for (i = 0; i < irqs; ++i) {
-			int cpu = i;
-
-			err = 0;
-			irq = platform_get_irq(pmu_device, i);
-			if (irq <= 0)
-				continue;
-
-			if (armpmu->irq_affinity)
-				cpu = armpmu->irq_affinity[i];
-
-			/*
-			 * If we have a single PMU interrupt that we can't shift,
-			 * assume that we're running on a uniprocessor machine and
-			 * continue. Otherwise, continue without this interrupt.
-			 */
-			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
-				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-						irq, cpu);
-				continue;
-			}
-
-			err = request_irq(irq, armpmu->handle_irq,
-					IRQF_NOBALANCING | IRQF_NO_THREAD,
-					"arm-pmu", armpmu);
-			if (err) {
-				pr_err("unable to request IRQ%d for ARM PMU counters\n",
-						irq);
-				armpmu_release_hardware(armpmu);
-				return err;
-			}
-
-			cpumask_set_cpu(cpu, &armpmu->active_irqs);
-		}
-	}
-
-	return 0;
-}
-
-static void
-hw_perf_event_destroy(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	atomic_t *active_events	 = &armpmu->active_events;
-	struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
-
-	if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
-		armpmu_release_hardware(armpmu);
-		mutex_unlock(pmu_reserve_mutex);
-	}
-}
-
-static int
-event_requires_mode_exclusion(struct perf_event_attr *attr)
-{
-	return attr->exclude_idle || attr->exclude_user ||
-	       attr->exclude_kernel || attr->exclude_hv;
-}
-
-static int
-__hw_perf_event_init(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	struct hw_perf_event *hwc = &event->hw;
-	int mapping, err;
-
-	mapping = armpmu->map_event(event);
-
-	if (mapping < 0) {
-		pr_debug("event %x:%llx not supported\n", event->attr.type,
-			 event->attr.config);
-		return mapping;
-	}
-
-	/*
-	 * We don't assign an index until we actually place the event onto
-	 * hardware. Use -1 to signify that we haven't decided where to put it
-	 * yet. For SMP systems, each core has it's own PMU so we can't do any
-	 * clever allocation or constraints checking at this point.
-	 */
-	hwc->idx		= -1;
-	hwc->config_base	= 0;
-	hwc->config		= 0;
-	hwc->event_base		= 0;
-
-	/*
-	 * Check whether we need to exclude the counter from certain modes.
-	 */
-	if ((!armpmu->set_event_filter ||
-	     armpmu->set_event_filter(hwc, &event->attr)) &&
-	     event_requires_mode_exclusion(&event->attr)) {
-		pr_debug("ARM performance counters do not support mode exclusion\n");
-		return -EPERM;
-	}
-
-	/*
-	 * Store the event encoding into the config_base field.
-	 */
-	hwc->config_base	    |= (unsigned long)mapping;
-
-	if (!hwc->sample_period) {
-		/*
-		 * For non-sampling runs, limit the sample_period to half
-		 * of the counter width. That way, the new counter value
-		 * is far less likely to overtake the previous one unless
-		 * you have some serious IRQ latency issues.
-		 */
-		hwc->sample_period  = armpmu->max_period >> 1;
-		hwc->last_period    = hwc->sample_period;
-		local64_set(&hwc->period_left, hwc->sample_period);
-	}
-
-	err = 0;
-	if (event->group_leader != event) {
-		err = validate_group(event);
-		if (err)
-			return -EINVAL;
-	}
-
-	return err;
-}
-
-static int armpmu_event_init(struct perf_event *event)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
-	int err = 0;
-	atomic_t *active_events = &armpmu->active_events;
-
-	if (armpmu->map_event(event) == -ENOENT)
-		return -ENOENT;
-
-	event->destroy = hw_perf_event_destroy;
-
-	if (!atomic_inc_not_zero(active_events)) {
-		mutex_lock(&armpmu->reserve_mutex);
-		if (atomic_read(active_events) == 0)
-			err = armpmu_reserve_hardware(armpmu);
-
-		if (!err)
-			atomic_inc(active_events);
-		mutex_unlock(&armpmu->reserve_mutex);
-	}
-
-	if (err)
-		return err;
-
-	err = __hw_perf_event_init(event);
-	if (err)
-		hw_perf_event_destroy(event);
-
-	return err;
-}
-
-static void armpmu_enable(struct pmu *pmu)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	struct pmu_hw_events *hw_events = armpmu->get_hw_events();
-	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
-
-	if (enabled)
-		armpmu->start();
-}
-
-static void armpmu_disable(struct pmu *pmu)
-{
-	struct arm_pmu *armpmu = to_arm_pmu(pmu);
-	armpmu->stop();
-}
-
-static void __init armpmu_init(struct arm_pmu *armpmu)
-{
-	atomic_set(&armpmu->active_events, 0);
-	mutex_init(&armpmu->reserve_mutex);
-
-	armpmu->pmu = (struct pmu) {
-		.pmu_enable	= armpmu_enable,
-		.pmu_disable	= armpmu_disable,
-		.event_init	= armpmu_event_init,
-		.add		= armpmu_add,
-		.del		= armpmu_del,
-		.start		= armpmu_start,
-		.stop		= armpmu_stop,
-		.read		= armpmu_read,
-	};
-}
-
-int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type)
-{
-	armpmu_init(armpmu);
-	return perf_pmu_register(&armpmu->pmu, name, type);
-}
+#include <linux/of.h>
+#include <linux/perf/arm_pmu.h>
+#include <linux/platform_device.h>
 
 /*
  * ARMv8 PMUv3 Performance Events handling code.
@@ -739,7 +100,8 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
  */
 #define	ARMV8_IDX_CYCLE_COUNTER	0
 #define	ARMV8_IDX_COUNTER0	1
-#define	ARMV8_IDX_COUNTER_LAST	(ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
+#define	ARMV8_IDX_COUNTER_LAST(cpu_pmu) \
+	(ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1)
 
 #define	ARMV8_MAX_COUNTERS	32
 #define	ARMV8_COUNTER_MASK	(ARMV8_MAX_COUNTERS - 1)
@@ -805,49 +167,34 @@ static inline int armv8pmu_has_overflowed(u32 pmovsr)
 	return pmovsr & ARMV8_OVERFLOWED_MASK;
 }
 
-static inline int armv8pmu_counter_valid(int idx)
+static inline int armv8pmu_counter_valid(struct arm_pmu *cpu_pmu, int idx)
 {
-	return idx >= ARMV8_IDX_CYCLE_COUNTER && idx <= ARMV8_IDX_COUNTER_LAST;
+	return idx >= ARMV8_IDX_CYCLE_COUNTER &&
+		idx <= ARMV8_IDX_COUNTER_LAST(cpu_pmu);
 }
 
 static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
 {
-	int ret = 0;
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u checking wrong counter %d overflow status\n",
-			smp_processor_id(), idx);
-	} else {
-		counter = ARMV8_IDX_TO_COUNTER(idx);
-		ret = pmnc & BIT(counter);
-	}
-
-	return ret;
+	return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
 }
 
 static inline int armv8pmu_select_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u selecting wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV8_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 	asm volatile("msr pmselr_el0, %0" :: "r" (counter));
 	isb();
 
 	return idx;
 }
 
-static inline u32 armv8pmu_read_counter(int idx)
+static inline u32 armv8pmu_read_counter(struct perf_event *event)
 {
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
 	u32 value = 0;
 
-	if (!armv8pmu_counter_valid(idx))
+	if (!armv8pmu_counter_valid(cpu_pmu, idx))
 		pr_err("CPU%u reading wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV8_IDX_CYCLE_COUNTER)
@@ -858,9 +205,13 @@ static inline u32 armv8pmu_read_counter(int idx)
 	return value;
 }
 
-static inline void armv8pmu_write_counter(int idx, u32 value)
+static inline void armv8pmu_write_counter(struct perf_event *event, u32 value)
 {
-	if (!armv8pmu_counter_valid(idx))
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (!armv8pmu_counter_valid(cpu_pmu, idx))
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
 	else if (idx == ARMV8_IDX_CYCLE_COUNTER)
@@ -879,65 +230,34 @@ static inline void armv8pmu_write_evtype(int idx, u32 val)
 
 static inline int armv8pmu_enable_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u enabling wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV8_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 	asm volatile("msr pmcntenset_el0, %0" :: "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv8pmu_disable_counter(int idx)
 {
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u disabling wrong PMNC counter %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV8_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 	asm volatile("msr pmcntenclr_el0, %0" :: "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv8pmu_enable_intens(int idx)
 {
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV8_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 	asm volatile("msr pmintenset_el1, %0" :: "r" (BIT(counter)));
 	return idx;
 }
 
 static inline int armv8pmu_disable_intens(int idx)
 {
-	u32 counter;
-
-	if (!armv8pmu_counter_valid(idx)) {
-		pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n",
-			smp_processor_id(), idx);
-		return -EINVAL;
-	}
-
-	counter = ARMV8_IDX_TO_COUNTER(idx);
+	u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 	asm volatile("msr pmintenclr_el1, %0" :: "r" (BIT(counter)));
 	isb();
 	/* Clear the overflow flag in case an interrupt is pending. */
 	asm volatile("msr pmovsclr_el0, %0" :: "r" (BIT(counter)));
 	isb();
+
 	return idx;
 }
 
@@ -955,10 +275,13 @@ static inline u32 armv8pmu_getreset_flags(void)
 	return value;
 }
 
-static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
+static void armv8pmu_enable_event(struct perf_event *event)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
 
 	/*
 	 * Enable counter and interrupt, and set the counter to count
@@ -989,10 +312,13 @@ static void armv8pmu_enable_event(struct hw_perf_event *hwc, int idx)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv8pmu_disable_event(struct hw_perf_event *hwc, int idx)
+static void armv8pmu_disable_event(struct perf_event *event)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct hw_perf_event *hwc = &event->hw;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
+	int idx = hwc->idx;
 
 	/*
 	 * Disable counter and interrupt
@@ -1016,7 +342,8 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
 {
 	u32 pmovsr;
 	struct perf_sample_data data;
-	struct pmu_hw_events *cpuc;
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
+	struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
 	struct pt_regs *regs;
 	int idx;
 
@@ -1036,7 +363,6 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
 	 */
 	regs = get_irq_regs();
 
-	cpuc = this_cpu_ptr(&cpu_hw_events);
 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 		struct perf_event *event = cpuc->events[idx];
 		struct hw_perf_event *hwc;
@@ -1053,13 +379,13 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
 			continue;
 
 		hwc = &event->hw;
-		armpmu_event_update(event, hwc, idx);
+		armpmu_event_update(event);
 		perf_sample_data_init(&data, 0, hwc->last_period);
-		if (!armpmu_event_set_period(event, hwc, idx))
+		if (!armpmu_event_set_period(event))
 			continue;
 
 		if (perf_event_overflow(event, &data, regs))
-			cpu_pmu->disable(hwc, idx);
+			cpu_pmu->disable(event);
 	}
 
 	/*
@@ -1074,10 +400,10 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev)
 	return IRQ_HANDLED;
 }
 
-static void armv8pmu_start(void)
+static void armv8pmu_start(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Enable all counters */
@@ -1085,10 +411,10 @@ static void armv8pmu_start(void)
 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
-static void armv8pmu_stop(void)
+static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
 {
 	unsigned long flags;
-	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
 
 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
 	/* Disable all counters */
@@ -1097,10 +423,12 @@ static void armv8pmu_stop(void)
 }
 
 static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
-				  struct hw_perf_event *event)
+				  struct perf_event *event)
 {
 	int idx;
-	unsigned long evtype = event->config_base & ARMV8_EVTYPE_EVENT;
+	struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long evtype = hwc->config_base & ARMV8_EVTYPE_EVENT;
 
 	/* Always place a cycle counter into the cycle counter. */
 	if (evtype == ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES) {
@@ -1151,11 +479,14 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
 
 static void armv8pmu_reset(void *info)
 {
+	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
 	u32 idx, nb_cnt = cpu_pmu->num_events;
 
 	/* The counter and interrupt enable registers are unknown at reset. */
-	for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx)
-		armv8pmu_disable_event(NULL, idx);
+	for (idx = ARMV8_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) {
+		armv8pmu_disable_counter(idx);
+		armv8pmu_disable_intens(idx);
+	}
 
 	/* Initialize & Reset PMNC: C and P bits. */
 	armv8pmu_pmcr_write(ARMV8_PMCR_P | ARMV8_PMCR_C);
@@ -1166,169 +497,67 @@ static void armv8pmu_reset(void *info)
 
 static int armv8_pmuv3_map_event(struct perf_event *event)
 {
-	return map_cpu_event(event, &armv8_pmuv3_perf_map,
+	return armpmu_map_event(event, &armv8_pmuv3_perf_map,
 				&armv8_pmuv3_perf_cache_map,
 				ARMV8_EVTYPE_EVENT);
 }
 
-static struct arm_pmu armv8pmu = {
-	.handle_irq		= armv8pmu_handle_irq,
-	.enable			= armv8pmu_enable_event,
-	.disable		= armv8pmu_disable_event,
-	.read_counter		= armv8pmu_read_counter,
-	.write_counter		= armv8pmu_write_counter,
-	.get_event_idx		= armv8pmu_get_event_idx,
-	.start			= armv8pmu_start,
-	.stop			= armv8pmu_stop,
-	.reset			= armv8pmu_reset,
-	.max_period		= (1LLU << 32) - 1,
-};
-
-static u32 __init armv8pmu_read_num_pmnc_events(void)
+static void armv8pmu_read_num_pmnc_events(void *info)
 {
-	u32 nb_cnt;
+	int *nb_cnt = info;
 
 	/* Read the nb of CNTx counters supported from PMNC */
-	nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
+	*nb_cnt = (armv8pmu_pmcr_read() >> ARMV8_PMCR_N_SHIFT) & ARMV8_PMCR_N_MASK;
 
-	/* Add the CPU cycles counter and return */
-	return nb_cnt + 1;
+	/* Add the CPU cycles counter */
+	*nb_cnt += 1;
 }
 
-static struct arm_pmu *__init armv8_pmuv3_pmu_init(void)
+static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu)
 {
-	armv8pmu.name			= "arm/armv8-pmuv3";
-	armv8pmu.map_event		= armv8_pmuv3_map_event;
-	armv8pmu.num_events		= armv8pmu_read_num_pmnc_events();
-	armv8pmu.set_event_filter	= armv8pmu_set_event_filter;
-	return &armv8pmu;
+	return smp_call_function_any(&arm_pmu->supported_cpus,
+				    armv8pmu_read_num_pmnc_events,
+				    &arm_pmu->num_events, 1);
 }
 
-/*
- * Ensure the PMU has sane values out of reset.
- * This requires SMP to be available, so exists as a separate initcall.
- */
-static int __init
-cpu_pmu_reset(void)
+static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
 {
-	if (cpu_pmu && cpu_pmu->reset)
-		return on_each_cpu(cpu_pmu->reset, NULL, 1);
-	return 0;
+	cpu_pmu->handle_irq		= armv8pmu_handle_irq,
+	cpu_pmu->enable			= armv8pmu_enable_event,
+	cpu_pmu->disable		= armv8pmu_disable_event,
+	cpu_pmu->read_counter		= armv8pmu_read_counter,
+	cpu_pmu->write_counter		= armv8pmu_write_counter,
+	cpu_pmu->get_event_idx		= armv8pmu_get_event_idx,
+	cpu_pmu->start			= armv8pmu_start,
+	cpu_pmu->stop			= armv8pmu_stop,
+	cpu_pmu->reset			= armv8pmu_reset,
+	cpu_pmu->max_period		= (1LLU << 32) - 1,
+	cpu_pmu->name			= "armv8_pmuv3";
+	cpu_pmu->map_event		= armv8_pmuv3_map_event;
+	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
+	return armv8pmu_probe_num_events(cpu_pmu);
 }
-arch_initcall(cpu_pmu_reset);
 
-/*
- * PMU platform driver and devicetree bindings.
- */
-static const struct of_device_id armpmu_of_device_ids[] = {
-	{.compatible = "arm,armv8-pmuv3"},
+static const struct of_device_id armv8_pmu_of_device_ids[] = {
+	{.compatible = "arm,armv8-pmuv3",	.data = armv8_pmuv3_init},
 	{},
 };
 
-static int armpmu_device_probe(struct platform_device *pdev)
+static int armv8_pmu_device_probe(struct platform_device *pdev)
 {
-	int i, irq, *irqs;
-
-	if (!cpu_pmu)
-		return -ENODEV;
-
-	/* Don't bother with PPIs; they're already affine */
-	irq = platform_get_irq(pdev, 0);
-	if (irq >= 0 && irq_is_percpu(irq))
-		goto out;
-
-	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-	if (!irqs)
-		return -ENOMEM;
-
-	for (i = 0; i < pdev->num_resources; ++i) {
-		struct device_node *dn;
-		int cpu;
-
-		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
-				      i);
-		if (!dn) {
-			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
-				of_node_full_name(pdev->dev.of_node), i);
-			break;
-		}
-
-		for_each_possible_cpu(cpu)
-			if (dn == of_cpu_device_node_get(cpu))
-				break;
-
-		if (cpu >= nr_cpu_ids) {
-			pr_warn("Failed to find logical CPU for %s\n",
-				dn->name);
-			of_node_put(dn);
-			break;
-		}
-		of_node_put(dn);
-
-		irqs[i] = cpu;
-	}
-
-	if (i == pdev->num_resources)
-		cpu_pmu->irq_affinity = irqs;
-	else
-		kfree(irqs);
-
-out:
-	cpu_pmu->plat_device = pdev;
-	return 0;
+	return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
 }
 
-static struct platform_driver armpmu_driver = {
+static struct platform_driver armv8_pmu_driver = {
 	.driver		= {
-		.name	= "arm-pmu",
-		.of_match_table = armpmu_of_device_ids,
+		.name	= "armv8-pmu",
+		.of_match_table = armv8_pmu_of_device_ids,
 	},
-	.probe		= armpmu_device_probe,
+	.probe		= armv8_pmu_device_probe,
 };
 
-static int __init register_pmu_driver(void)
+static int __init register_armv8_pmu_driver(void)
 {
-	return platform_driver_register(&armpmu_driver);
+	return platform_driver_register(&armv8_pmu_driver);
 }
-device_initcall(register_pmu_driver);
-
-static struct pmu_hw_events *armpmu_get_cpu_events(void)
-{
-	return this_cpu_ptr(&cpu_hw_events);
-}
-
-static void __init cpu_pmu_init(struct arm_pmu *armpmu)
-{
-	int cpu;
-	for_each_possible_cpu(cpu) {
-		struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
-		events->events = per_cpu(hw_events, cpu);
-		events->used_mask = per_cpu(used_mask, cpu);
-		raw_spin_lock_init(&events->pmu_lock);
-	}
-	armpmu->get_hw_events = armpmu_get_cpu_events;
-}
-
-static int __init init_hw_perf_events(void)
-{
-	u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
-
-	switch ((dfr >> 8) & 0xf) {
-	case 0x1:	/* PMUv3 */
-		cpu_pmu = armv8_pmuv3_pmu_init();
-		break;
-	}
-
-	if (cpu_pmu) {
-		pr_info("enabled with %s PMU driver, %d counters available\n",
-			cpu_pmu->name, cpu_pmu->num_events);
-		cpu_pmu_init(cpu_pmu);
-		armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW);
-	} else {
-		pr_info("no hardware support available\n");
-	}
-
-	return 0;
-}
-early_initcall(init_hw_perf_events);
-
+device_initcall(register_armv8_pmu_driver);
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index d9de36ee165d..04e2653bb8c0 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -5,7 +5,7 @@
 menu "Performance monitor support"
 
 config ARM_PMU
-	depends on PERF_EVENTS && ARM
+	depends on PERF_EVENTS && (ARM || ARM64)
 	bool "ARM PMU framework"
 	default y
 	help
-- 
cgit v1.2.3-58-ga151


From ac82d12772158dfbc1d3827a68b317e10326bbaa Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 2 Oct 2015 10:55:04 +0100
Subject: arm64: perf: add Cortex-A53 support

The Cortex-A53 PMU supports a few events outside of the required PMUv3
set that are rather useful.

This patch adds the event map data for said events.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/devicetree/bindings/arm/pmu.txt |  1 +
 arch/arm64/kernel/perf_event.c                | 64 ++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 435251fa9ce0..2e6737b09137 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -8,6 +8,7 @@ Required properties:
 
 - compatible : should be one of
 	"arm,armv8-pmuv3"
+	"arm.cortex-a53-pmu"
 	"arm,cortex-a17-pmu"
 	"arm,cortex-a15-pmu"
 	"arm,cortex-a12-pmu"
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a614100cd00f..5a5308a78da3 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -69,6 +69,11 @@ enum armv8_pmuv3_perf_types {
 	ARMV8_PMUV3_PERFCTR_BUS_CYCLES				= 0x1D,
 };
 
+/* ARMv8 Cortex-A53 specific event types. */
+enum armv8_a53_pmu_perf_types {
+	ARMV8_A53_PERFCTR_PREFETCH_LINEFILL			= 0xC2,
+};
+
 /* PMUv3 HW events mapping. */
 static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 	PERF_MAP_ALL_UNSUPPORTED,
@@ -79,6 +84,18 @@ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
 };
 
+/* ARM Cortex-A53 HW events mapping. */
+static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+};
+
 static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 						[PERF_COUNT_HW_CACHE_OP_MAX]
 						[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -95,6 +112,28 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
 };
 
+static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+	[C(L1D)][C(OP_PREFETCH)][C(RESULT_MISS)] = ARMV8_A53_PERFCTR_PREFETCH_LINEFILL,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
 /*
  * Perf Events' indices
  */
@@ -502,6 +541,13 @@ static int armv8_pmuv3_map_event(struct perf_event *event)
 				ARMV8_EVTYPE_EVENT);
 }
 
+static int armv8_a53_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv8_a53_perf_map,
+				&armv8_a53_perf_cache_map,
+				ARMV8_EVTYPE_EVENT);
+}
+
 static void armv8pmu_read_num_pmnc_events(void *info)
 {
 	int *nb_cnt = info;
@@ -520,7 +566,7 @@ static int armv8pmu_probe_num_events(struct arm_pmu *arm_pmu)
 				    &arm_pmu->num_events, 1);
 }
 
-static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
+static void armv8_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	cpu_pmu->handle_irq		= armv8pmu_handle_irq,
 	cpu_pmu->enable			= armv8pmu_enable_event,
@@ -532,14 +578,28 @@ static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop			= armv8pmu_stop,
 	cpu_pmu->reset			= armv8pmu_reset,
 	cpu_pmu->max_period		= (1LLU << 32) - 1,
+	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
+}
+
+static int armv8_pmuv3_init(struct arm_pmu *cpu_pmu)
+{
+	armv8_pmu_init(cpu_pmu);
 	cpu_pmu->name			= "armv8_pmuv3";
 	cpu_pmu->map_event		= armv8_pmuv3_map_event;
-	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
+	return armv8pmu_probe_num_events(cpu_pmu);
+}
+
+static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv8_pmu_init(cpu_pmu);
+	cpu_pmu->name			= "armv8_cortex_a53";
+	cpu_pmu->map_event		= armv8_a53_map_event;
 	return armv8pmu_probe_num_events(cpu_pmu);
 }
 
 static const struct of_device_id armv8_pmu_of_device_ids[] = {
 	{.compatible = "arm,armv8-pmuv3",	.data = armv8_pmuv3_init},
+	{.compatible = "arm,cortex-a53-pmu",	.data = armv8_a53_pmu_init},
 	{},
 };
 
-- 
cgit v1.2.3-58-ga151


From 62a4dda9d63a10e5b28943967fc936c74fa16dfb Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 2 Oct 2015 10:55:05 +0100
Subject: arm64: perf: add Cortex-A57 support

The Cortex-A57 PMU supports a few events outside of the required PMUv3
set that are rather useful.

This patch adds the event map data for said events.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/devicetree/bindings/arm/pmu.txt |  1 +
 arch/arm64/kernel/perf_event.c                | 61 +++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

(limited to 'arch')

diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 2e6737b09137..4b7c3d9b29bb 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -8,6 +8,7 @@ Required properties:
 
 - compatible : should be one of
 	"arm,armv8-pmuv3"
+	"arm.cortex-a57-pmu"
 	"arm.cortex-a53-pmu"
 	"arm,cortex-a17-pmu"
 	"arm,cortex-a15-pmu"
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 5a5308a78da3..5b1897e8ca24 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -74,6 +74,16 @@ enum armv8_a53_pmu_perf_types {
 	ARMV8_A53_PERFCTR_PREFETCH_LINEFILL			= 0xC2,
 };
 
+/* ARMv8 Cortex-A57 specific event types. */
+enum armv8_a57_perf_types {
+	ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD			= 0x40,
+	ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST			= 0x41,
+	ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD			= 0x42,
+	ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST			= 0x43,
+	ARMV8_A57_PERFCTR_DTLB_REFILL_LD			= 0x4c,
+	ARMV8_A57_PERFCTR_DTLB_REFILL_ST			= 0x4d,
+};
+
 /* PMUv3 HW events mapping. */
 static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 	PERF_MAP_ALL_UNSUPPORTED,
@@ -96,6 +106,16 @@ static const unsigned armv8_a53_perf_map[PERF_COUNT_HW_MAX] = {
 	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
 };
 
+static const unsigned armv8_a57_perf_map[PERF_COUNT_HW_MAX] = {
+	PERF_MAP_ALL_UNSUPPORTED,
+	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED,
+	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+};
+
 static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 						[PERF_COUNT_HW_CACHE_OP_MAX]
 						[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
@@ -134,6 +154,31 @@ static const unsigned armv8_a53_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
 };
 
+static const unsigned armv8_a57_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+					      [PERF_COUNT_HW_CACHE_OP_MAX]
+					      [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	PERF_CACHE_MAP_ALL_UNSUPPORTED,
+
+	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_LD,
+	[C(L1D)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_LD,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_ACCESS_ST,
+	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_L1_DCACHE_REFILL_ST,
+
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_ACCESS,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1_ICACHE_REFILL,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_DTLB_REFILL_LD,
+	[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_A57_PERFCTR_DTLB_REFILL_ST,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_ITLB_REFILL,
+
+	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED,
+	[C(BPU)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED,
+};
+
+
 /*
  * Perf Events' indices
  */
@@ -548,6 +593,13 @@ static int armv8_a53_map_event(struct perf_event *event)
 				ARMV8_EVTYPE_EVENT);
 }
 
+static int armv8_a57_map_event(struct perf_event *event)
+{
+	return armpmu_map_event(event, &armv8_a57_perf_map,
+				&armv8_a57_perf_cache_map,
+				ARMV8_EVTYPE_EVENT);
+}
+
 static void armv8pmu_read_num_pmnc_events(void *info)
 {
 	int *nb_cnt = info;
@@ -597,9 +649,18 @@ static int armv8_a53_pmu_init(struct arm_pmu *cpu_pmu)
 	return armv8pmu_probe_num_events(cpu_pmu);
 }
 
+static int armv8_a57_pmu_init(struct arm_pmu *cpu_pmu)
+{
+	armv8_pmu_init(cpu_pmu);
+	cpu_pmu->name			= "armv8_cortex_a57";
+	cpu_pmu->map_event		= armv8_a57_map_event;
+	return armv8pmu_probe_num_events(cpu_pmu);
+}
+
 static const struct of_device_id armv8_pmu_of_device_ids[] = {
 	{.compatible = "arm,armv8-pmuv3",	.data = armv8_pmuv3_init},
 	{.compatible = "arm,cortex-a53-pmu",	.data = armv8_a53_pmu_init},
+	{.compatible = "arm,cortex-a57-pmu",	.data = armv8_a57_pmu_init},
 	{},
 };
 
-- 
cgit v1.2.3-58-ga151


From 01a507a371a35a5ee2b283af2ae7f8ed0b5e36f1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 2 Oct 2015 10:55:06 +0100
Subject: arm64: dts: juno: describe PMUs separately

The A57 and A53 PMUs in Juno support different events, so describe them
separately in both the Juno and Juno R1 DTs.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Liviu Dudau <liviu.dudau@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/boot/dts/arm/juno-r1.dts | 18 +++++++++++-------
 arch/arm64/boot/dts/arm/juno.dts    | 18 +++++++++++-------
 2 files changed, 22 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/boot/dts/arm/juno-r1.dts b/arch/arm64/boot/dts/arm/juno-r1.dts
index c62751153a4f..734e1272b19f 100644
--- a/arch/arm64/boot/dts/arm/juno-r1.dts
+++ b/arch/arm64/boot/dts/arm/juno-r1.dts
@@ -91,17 +91,21 @@
 		};
 	};
 
-	pmu {
-		compatible = "arm,armv8-pmuv3";
+	pmu_a57 {
+		compatible = "arm,cortex-a57-pmu";
 		interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&A57_0>,
+				     <&A57_1>;
+	};
+
+	pmu_a53 {
+		compatible = "arm,cortex-a53-pmu";
+		interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-affinity = <&A57_0>,
-				     <&A57_1>,
-				     <&A53_0>,
+		interrupt-affinity = <&A53_0>,
 				     <&A53_1>,
 				     <&A53_2>,
 				     <&A53_3>;
diff --git a/arch/arm64/boot/dts/arm/juno.dts b/arch/arm64/boot/dts/arm/juno.dts
index d7cbdd482a61..ffa05aeab3c7 100644
--- a/arch/arm64/boot/dts/arm/juno.dts
+++ b/arch/arm64/boot/dts/arm/juno.dts
@@ -91,17 +91,21 @@
 		};
 	};
 
-	pmu {
-		compatible = "arm,armv8-pmuv3";
+	pmu_a57 {
+		compatible = "arm,cortex-a57-pmu";
 		interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 06 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-affinity = <&A57_0>,
+				     <&A57_1>;
+	};
+
+	pmu_a53 {
+		compatible = "arm,cortex-a53-pmu";
+		interrupts = <GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>,
 			     <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-affinity = <&A57_0>,
-				     <&A57_1>,
-				     <&A53_0>,
+		interrupt-affinity = <&A53_0>,
 				     <&A53_1>,
 				     <&A53_2>,
 				     <&A53_3>;
-- 
cgit v1.2.3-58-ga151


From 2ff4439bb45a05433282d4fa9ca84202147819c7 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 7 Oct 2015 12:00:19 -0500
Subject: arm64: Add contiguous page flag shifts and constants

Add the number of pages required to form a contiguous range,
as well as some supporting constants.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/page.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7d9c7e4a424b..33892ef0172d 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -20,14 +20,20 @@
 #define __ASM_PAGE_H
 
 /* PAGE_SHIFT determines the page size */
+/* CONT_SHIFT determines the number of pages which can be tracked together  */
 #ifdef CONFIG_ARM64_64K_PAGES
 #define PAGE_SHIFT		16
+#define CONT_SHIFT		5
 #else
 #define PAGE_SHIFT		12
+#define CONT_SHIFT		4
 #endif
-#define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_SIZE		(_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 
+#define CONT_SIZE		(_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
+#define CONT_MASK		(~(CONT_SIZE-1))
+
 /*
  * The idmap and swapper page tables need some space reserved in the kernel
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
-- 
cgit v1.2.3-58-ga151


From ecf35a237a85d747ef1d6f713888c782f42064ac Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 7 Oct 2015 12:00:20 -0500
Subject: arm64: PTE/PMD contiguous bit definition

Define the bit positions in the PTE and PMD for the
contiguous bit.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable-hwdef.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 24154b055835..95c1ec04a9ed 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -54,6 +54,13 @@
 #define SECTION_SIZE		(_AC(1, UL) << SECTION_SHIFT)
 #define SECTION_MASK		(~(SECTION_SIZE-1))
 
+/*
+ * Contiguous page definitions.
+ */
+#define CONT_PTES		(_AC(1, UL) << CONT_SHIFT)
+/* the the numerical offset of the PTE within a range of CONT_PTES */
+#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
+
 /*
  * Hardware page table definitions.
  *
@@ -83,6 +90,7 @@
 #define PMD_SECT_S		(_AT(pmdval_t, 3) << 8)
 #define PMD_SECT_AF		(_AT(pmdval_t, 1) << 10)
 #define PMD_SECT_NG		(_AT(pmdval_t, 1) << 11)
+#define PMD_SECT_CONT		(_AT(pmdval_t, 1) << 52)
 #define PMD_SECT_PXN		(_AT(pmdval_t, 1) << 53)
 #define PMD_SECT_UXN		(_AT(pmdval_t, 1) << 54)
 
@@ -105,6 +113,7 @@
 #define PTE_AF			(_AT(pteval_t, 1) << 10)	/* Access Flag */
 #define PTE_NG			(_AT(pteval_t, 1) << 11)	/* nG */
 #define PTE_DBM			(_AT(pteval_t, 1) << 51)	/* Dirty Bit Management */
+#define PTE_CONT		(_AT(pteval_t, 1) << 52)	/* Contiguous range */
 #define PTE_PXN			(_AT(pteval_t, 1) << 53)	/* Privileged XN */
 #define PTE_UXN			(_AT(pteval_t, 1) << 54)	/* User XN */
 
-- 
cgit v1.2.3-58-ga151


From 93ef666a094ff9c9fc8d7cf1774ef0b92e270a75 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 7 Oct 2015 12:00:21 -0500
Subject: arm64: Macros to check/set/unset the contiguous bit

Add the supporting macros to check if the contiguous bit
is set, set the bit, or clear it in a PTE entry.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 5043a84e724e..1a1a6efa75e5 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -140,6 +140,7 @@ extern struct page *empty_zero_page;
 #define pte_special(pte)	(!!(pte_val(pte) & PTE_SPECIAL))
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
+#define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -202,6 +203,16 @@ static inline pte_t pte_mkspecial(pte_t pte)
 	return set_pte_bit(pte, __pgprot(PTE_SPECIAL));
 }
 
+static inline pte_t pte_mkcont(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_CONT));
+}
+
+static inline pte_t pte_mknoncont(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(PTE_CONT));
+}
+
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
 	*ptep = pte;
-- 
cgit v1.2.3-58-ga151


From 06f90d2527845c4767207f54280df2c5ca11e82b Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 7 Oct 2015 12:00:22 -0500
Subject: arm64: Default kernel pages should be contiguous

The default page attributes for a PMD being broken should have the CONT bit
set. Create a new definition for an early boot range of PTE's that are
contiguous.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 1a1a6efa75e5..3f481ef42c07 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -72,6 +72,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
+#define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
 #define PAGE_HYP		__pgprot(_PAGE_DEFAULT | PTE_HYP)
 #define PAGE_HYP_DEVICE		__pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
-- 
cgit v1.2.3-58-ga151


From 202e41a1c257eb56a05b5ff5bfde06f7ae98e6eb Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 7 Oct 2015 12:00:23 -0500
Subject: arm64: Make the kernel page dump utility aware of the CONT bit

The kernel page dump utility needs to be aware of the CONT bit before
it will break up pages ranges for display.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/dump.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index f3d6221cd5bd..5a22a119a74c 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -67,6 +67,12 @@ static struct addr_marker address_markers[] = {
 	{ -1,			NULL },
 };
 
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
 struct pg_state {
 	struct seq_file *seq;
 	const struct addr_marker *marker;
@@ -113,6 +119,16 @@ static const struct prot_bits pte_bits[] = {
 		.val	= PTE_NG,
 		.set	= "NG",
 		.clear	= "  ",
+	}, {
+		.mask	= PTE_CONT,
+		.val	= PTE_CONT,
+		.set	= "CON",
+		.clear	= "   ",
+	}, {
+		.mask	= PTE_TABLE_BIT,
+		.val	= PTE_TABLE_BIT,
+		.set	= "   ",
+		.clear	= "BLK",
 	}, {
 		.mask	= PTE_UXN,
 		.val	= PTE_UXN,
@@ -198,7 +214,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 		unsigned long delta;
 
 		if (st->current_prot) {
-			seq_printf(st->seq, "0x%16lx-0x%16lx   ",
+			seq_printf(st->seq, "0x%016lx-0x%016lx   ",
 				   st->start_address, addr);
 
 			delta = (addr - st->start_address) >> 10;
-- 
cgit v1.2.3-58-ga151


From 348a65cdcbbf243073ee39d1f7d4413081ad7eab Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Wed, 7 Oct 2015 12:00:25 -0500
Subject: arm64: Mark kernel page ranges contiguous

With 64k pages, the next larger segment size is 512M. The linux
kernel also uses different protection flags to cover its code and data.
Because of this requirement, the vast majority of the kernel code and
data structures end up being mapped with 64k pages instead of the larger
pages common with a 4k page kernel.

Recent ARM processors support a contiguous bit in the
page tables which allows the a TLB to cover a range larger than a
single PTE if that range is mapped into physically contiguous
ram.

So, for the kernel its a good idea to set this flag. Some basic
micro benchmarks show it can significantly reduce the number of
L1 dTLB refills.

Add boot option to enable/disable CONT marking, as well as fix a
bug found by Steve Capper.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
[catalin.marinas@arm.com: remove CONFIG_ARM64_CONT_PTE altogether]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 61 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 71a310478c9e..eed6d52f5e54 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -80,19 +80,55 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
 	do {
 		/*
 		 * Need to have the least restrictive permissions available
-		 * permissions will be fixed up later
+		 * permissions will be fixed up later. Default the new page
+		 * range as contiguous ptes.
 		 */
-		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC_CONT));
 		pfn++;
 	} while (pte++, i++, i < PTRS_PER_PTE);
 }
 
+/*
+ * Given a PTE with the CONT bit set, determine where the CONT range
+ * starts, and clear the entire range of PTE CONT bits.
+ */
+static void clear_cont_pte_range(pte_t *pte, unsigned long addr)
+{
+	int i;
+
+	pte -= CONT_RANGE_OFFSET(addr);
+	for (i = 0; i < CONT_PTES; i++) {
+		set_pte(pte, pte_mknoncont(*pte));
+		pte++;
+	}
+	flush_tlb_all();
+}
+
+/*
+ * Given a range of PTEs set the pfn and provided page protection flags
+ */
+static void __populate_init_pte(pte_t *pte, unsigned long addr,
+				unsigned long end, phys_addr_t phys,
+				pgprot_t prot)
+{
+	unsigned long pfn = __phys_to_pfn(phys);
+
+	do {
+		/* clear all the bits except the pfn, then apply the prot */
+		set_pte(pte, pfn_pte(pfn, prot));
+		pte++;
+		pfn++;
+		addr += PAGE_SIZE;
+	} while (addr != end);
+}
+
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
-				  unsigned long end, unsigned long pfn,
+				  unsigned long end, phys_addr_t phys,
 				  pgprot_t prot,
 				  void *(*alloc)(unsigned long size))
 {
 	pte_t *pte;
+	unsigned long next;
 
 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
 		pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
@@ -105,9 +141,27 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
-		set_pte(pte, pfn_pte(pfn, prot));
-		pfn++;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+		next = min(end, (addr + CONT_SIZE) & CONT_MASK);
+		if (((addr | next | phys) & ~CONT_MASK) == 0) {
+			/* a block of CONT_PTES  */
+			__populate_init_pte(pte, addr, next, phys,
+					    prot | __pgprot(PTE_CONT));
+		} else {
+			/*
+			 * If the range being split is already inside of a
+			 * contiguous range but this PTE isn't going to be
+			 * contiguous, then we want to unmark the adjacent
+			 * ranges, then update the portion of the range we
+			 * are interrested in.
+			 */
+			 clear_cont_pte_range(pte, addr);
+			 __populate_init_pte(pte, addr, next, phys, prot);
+		}
+
+		pte += (next - addr) >> PAGE_SHIFT;
+		phys += next - addr;
+		addr = next;
+	} while (addr != end);
 }
 
 void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -168,8 +222,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 				}
 			}
 		} else {
-			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-				       prot, alloc);
+			alloc_init_pte(pmd, addr, next, phys, prot, alloc);
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
-- 
cgit v1.2.3-58-ga151


From 217d453d473c5ddfd140a06bf9d8575218551020 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 24 Sep 2015 17:32:14 +0800
Subject: arm64: fix a migrating irq bug when hotplug cpu

When cpu is disabled, all irqs will be migratged to another cpu.
In some cases, a new affinity is different, the old affinity need
to be updated and if irq_set_affinity's return value is IRQ_SET_MASK_OK_DONE,
the old affinity can not be updated. Fix it by using irq_do_set_affinity.

And migrating interrupts is a core code matter, so use the generic
function irq_migrate_all_off_this_cpu() to migrate interrupts in
kernel/irq/migration.c.

Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig           |  1 +
 arch/arm64/include/asm/irq.h |  1 -
 arch/arm64/kernel/irq.c      | 62 --------------------------------------------
 arch/arm64/kernel/smp.c      |  3 ++-
 4 files changed, 3 insertions(+), 64 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 98b4f98a13de..1b35bdbd5d74 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -427,6 +427,7 @@ config NR_CPUS
 
 config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
+	select GENERIC_IRQ_MIGRATION
 	help
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu.
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index bbb251b14746..09169296c3cc 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -7,7 +7,6 @@
 
 struct pt_regs;
 
-extern void migrate_irqs(void);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
 static inline void acpi_irq_init(void)
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 11dc3fd47853..9f17ec071ee0 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 #include <linux/irqchip.h>
 #include <linux/seq_file.h>
-#include <linux/ratelimit.h>
 
 unsigned long irq_err_count;
 
@@ -54,64 +53,3 @@ void __init init_IRQ(void)
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
 }
-
-#ifdef CONFIG_HOTPLUG_CPU
-static bool migrate_one_irq(struct irq_desc *desc)
-{
-	struct irq_data *d = irq_desc_get_irq_data(desc);
-	const struct cpumask *affinity = irq_data_get_affinity_mask(d);
-	struct irq_chip *c;
-	bool ret = false;
-
-	/*
-	 * If this is a per-CPU interrupt, or the affinity does not
-	 * include this CPU, then we have nothing to do.
-	 */
-	if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity))
-		return false;
-
-	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
-		affinity = cpu_online_mask;
-		ret = true;
-	}
-
-	c = irq_data_get_irq_chip(d);
-	if (!c->irq_set_affinity)
-		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
-	else if (c->irq_set_affinity(d, affinity, false) == IRQ_SET_MASK_OK && ret)
-		cpumask_copy(irq_data_get_affinity_mask(d), affinity);
-
-	return ret;
-}
-
-/*
- * The current CPU has been marked offline.  Migrate IRQs off this CPU.
- * If the affinity settings do not allow other CPUs, force them onto any
- * available CPU.
- *
- * Note: we must iterate over all IRQs, whether they have an attached
- * action structure or not, as we need to get chained interrupts too.
- */
-void migrate_irqs(void)
-{
-	unsigned int i;
-	struct irq_desc *desc;
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	for_each_irq_desc(i, desc) {
-		bool affinity_broken;
-
-		raw_spin_lock(&desc->lock);
-		affinity_broken = migrate_one_irq(desc);
-		raw_spin_unlock(&desc->lock);
-
-		if (affinity_broken)
-			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
-					    i, smp_processor_id());
-	}
-
-	local_irq_restore(flags);
-}
-#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 03b0aa28ea61..b7a973d6861e 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -231,7 +231,8 @@ int __cpu_disable(void)
 	/*
 	 * OK - migrate IRQs away from this CPU
 	 */
-	migrate_irqs();
+	irq_migrate_all_off_this_cpu();
+
 	return 0;
 }
 
-- 
cgit v1.2.3-58-ga151


From cb50ce324e72bfd5d191d0a834a0ead1a08666b7 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 12 Oct 2015 12:10:53 +0100
Subject: arm64: Fix missing #include in hw_breakpoint.c

A prior commit used to detect the hw breakpoint ABI behaviour based on
the target state missed the asm/compat.h include and the build fails
with !CONFIG_COMPAT.

Fixes: 8f48c0629049 ("arm64: hw_breakpoint: use target state to determine ABI behaviour")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/hw_breakpoint.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 46465d9fbc4d..b45c95d34b83 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -28,6 +28,7 @@
 #include <linux/ptrace.h>
 #include <linux/smp.h>
 
+#include <asm/compat.h>
 #include <asm/current.h>
 #include <asm/debug-monitors.h>
 #include <asm/hw_breakpoint.h>
-- 
cgit v1.2.3-58-ga151


From 207918461eb0aca720fddec5da79bc71c133b9f1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 8 Oct 2015 20:02:03 +0100
Subject: arm64: use ENDPIPROC() to annotate position independent assembler
 routines

For more control over which functions are called with the MMU off or
with the UEFI 1:1 mapping active, annotate some assembler routines as
position independent. This is done by introducing ENDPIPROC(), which
replaces the ENDPROC() declaration of those routines.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/assembler.h | 11 +++++++++++
 arch/arm64/lib/memchr.S            |  2 +-
 arch/arm64/lib/memcmp.S            |  2 +-
 arch/arm64/lib/memcpy.S            |  2 +-
 arch/arm64/lib/memmove.S           |  2 +-
 arch/arm64/lib/memset.S            |  2 +-
 arch/arm64/lib/strcmp.S            |  2 +-
 arch/arm64/lib/strlen.S            |  2 +-
 arch/arm64/lib/strncmp.S           |  2 +-
 arch/arm64/mm/cache.S              | 10 +++++-----
 10 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index b51f2cc22ca9..12eff928ef8b 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -193,4 +193,15 @@ lr	.req	x30		// link register
 	str	\src, [\tmp, :lo12:\sym]
 	.endm
 
+/*
+ * Annotate a function as position independent, i.e., safe to be called before
+ * the kernel virtual mapping is activated.
+ */
+#define ENDPIPROC(x)			\
+	.globl	__pi_##x;		\
+	.type 	__pi_##x, %function;	\
+	.set	__pi_##x, x;		\
+	.size	__pi_##x, . - x;	\
+	ENDPROC(x)
+
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index 8636b7549163..4444c1d25f4b 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -41,4 +41,4 @@ ENTRY(memchr)
 	ret
 2:	mov	x0, #0
 	ret
-ENDPROC(memchr)
+ENDPIPROC(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index 6ea0776ba6de..ffbdec00327d 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -255,4 +255,4 @@ CPU_LE( rev	data2, data2 )
 .Lret0:
 	mov	result, #0
 	ret
-ENDPROC(memcmp)
+ENDPIPROC(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 173a1aace9bb..36a6a62cf263 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -71,4 +71,4 @@
 ENTRY(memcpy)
 #include "copy_template.S"
 	ret
-ENDPROC(memcpy)
+ENDPIPROC(memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
index 57b19ea2dad4..68e2f2035e23 100644
--- a/arch/arm64/lib/memmove.S
+++ b/arch/arm64/lib/memmove.S
@@ -194,4 +194,4 @@ ENTRY(memmove)
 	tst	count, #0x3f
 	b.ne	.Ltail63
 	ret
-ENDPROC(memmove)
+ENDPIPROC(memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index 7c72dfd36b63..29f405f08792 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -213,4 +213,4 @@ ENTRY(memset)
 	ands	count, count, zva_bits_x
 	b.ne	.Ltail_maybe_long
 	ret
-ENDPROC(memset)
+ENDPIPROC(memset)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 42f828b06c59..471fe61760ef 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -231,4 +231,4 @@ CPU_BE(	orr	syndrome, diff, has_nul )
 	lsr	data1, data1, #56
 	sub	result, data1, data2, lsr #56
 	ret
-ENDPROC(strcmp)
+ENDPIPROC(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index 987b68b9ce44..55ccc8e24c08 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -123,4 +123,4 @@ CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
 	csinv	data1, data1, xzr, le
 	csel	data2, data2, data2a, le
 	b	.Lrealigned
-ENDPROC(strlen)
+ENDPIPROC(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index 0224cf5a5533..e267044761c6 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -307,4 +307,4 @@ CPU_BE( orr	syndrome, diff, has_nul )
 .Lret0:
 	mov	result, #0
 	ret
-ENDPROC(strncmp)
+ENDPIPROC(strncmp)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index eb48d5df4a0f..cfa44a6adc0a 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -98,7 +98,7 @@ ENTRY(__flush_dcache_area)
 	b.lo	1b
 	dsb	sy
 	ret
-ENDPROC(__flush_dcache_area)
+ENDPIPROC(__flush_dcache_area)
 
 /*
  *	__inval_cache_range(start, end)
@@ -131,7 +131,7 @@ __dma_inv_range:
 	b.lo	2b
 	dsb	sy
 	ret
-ENDPROC(__inval_cache_range)
+ENDPIPROC(__inval_cache_range)
 ENDPROC(__dma_inv_range)
 
 /*
@@ -171,7 +171,7 @@ ENTRY(__dma_flush_range)
 	b.lo	1b
 	dsb	sy
 	ret
-ENDPROC(__dma_flush_range)
+ENDPIPROC(__dma_flush_range)
 
 /*
  *	__dma_map_area(start, size, dir)
@@ -184,7 +184,7 @@ ENTRY(__dma_map_area)
 	cmp	w2, #DMA_FROM_DEVICE
 	b.eq	__dma_inv_range
 	b	__dma_clean_range
-ENDPROC(__dma_map_area)
+ENDPIPROC(__dma_map_area)
 
 /*
  *	__dma_unmap_area(start, size, dir)
@@ -197,4 +197,4 @@ ENTRY(__dma_unmap_area)
 	cmp	w2, #DMA_TO_DEVICE
 	b.ne	__dma_inv_range
 	ret
-ENDPROC(__dma_unmap_area)
+ENDPIPROC(__dma_unmap_area)
-- 
cgit v1.2.3-58-ga151


From e8f3010f7326c00368dbc057bd052bec80dfc072 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 8 Oct 2015 20:02:04 +0100
Subject: arm64/efi: isolate EFI stub from the kernel proper

Since arm64 does not use a builtin decompressor, the EFI stub is built
into the kernel proper. So far, this has been working fine, but actually,
since the stub is in fact a PE/COFF relocatable binary that is executed
at an unknown offset in the 1:1 mapping provided by the UEFI firmware, we
should not be seamlessly sharing code with the kernel proper, which is a
position dependent executable linked at a high virtual offset.

So instead, separate the contents of libstub and its dependencies, by
putting them into their own namespace by prefixing all of its symbols
with __efistub. This way, we have tight control over what parts of the
kernel proper are referenced by the stub.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/Makefile            | 12 ++++++--
 arch/arm64/kernel/efi-entry.S         | 10 +++---
 arch/arm64/kernel/head.S              | 14 ++++-----
 arch/arm64/kernel/image.h             | 27 +++++++++++++++++
 drivers/firmware/efi/libstub/Makefile | 39 ++++++++++++++++++++----
 drivers/firmware/efi/libstub/string.c | 57 +++++++++++++++++++++++++++++++++++
 6 files changed, 139 insertions(+), 20 deletions(-)
 create mode 100644 drivers/firmware/efi/libstub/string.c

(limited to 'arch')

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 22dc9bc781be..7b17f6245f1e 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -20,6 +20,14 @@ arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o
 
+stub-obj				:= efi-stub.o efi-entry.o
+extra-y					:= $(stub-obj)
+stub-obj				:= $(patsubst %.o,%.stub.o,$(stub-obj))
+
+OBJCOPYFLAGS := --prefix-symbols=__efistub_
+$(obj)/%.stub.o: $(obj)/%.o FORCE
+	$(call if_changed,objcopy)
+
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o entry32.o		\
 					   ../../arm/kernel/opcodes.o
@@ -32,7 +40,7 @@ arm64-obj-$(CONFIG_CPU_PM)		+= sleep.o suspend.o
 arm64-obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
-arm64-obj-$(CONFIG_EFI)			+= efi.o efi-stub.o efi-entry.o
+arm64-obj-$(CONFIG_EFI)			+= efi.o $(stub-obj)
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
@@ -40,7 +48,7 @@ arm64-obj-$(CONFIG_ACPI)		+= acpi.o
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
 head-y					:= head.o
-extra-y					:= $(head-y) vmlinux.lds
+extra-y					+= $(head-y) vmlinux.lds
 
 # vDSO - this must be built first to generate the symbol offsets
 $(call objectify,$(arm64-obj-y)): $(obj)/vdso/vdso-offsets.h
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 8ce9b0577442..a773db92908b 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -29,7 +29,7 @@
 	 * we want to be. The kernel image wants to be placed at TEXT_OFFSET
 	 * from start of RAM.
 	 */
-ENTRY(efi_stub_entry)
+ENTRY(entry)
 	/*
 	 * Create a stack frame to save FP/LR with extra space
 	 * for image_addr variable passed to efi_entry().
@@ -86,8 +86,8 @@ ENTRY(efi_stub_entry)
 	 * entries for the VA range of the current image, so no maintenance is
 	 * necessary.
 	 */
-	adr	x0, efi_stub_entry
-	adr	x1, efi_stub_entry_end
+	adr	x0, entry
+	adr	x1, entry_end
 	sub	x1, x1, x0
 	bl	__flush_dcache_area
 
@@ -120,5 +120,5 @@ efi_load_fail:
 	ldp	x29, x30, [sp], #32
 	ret
 
-efi_stub_entry_end:
-ENDPROC(efi_stub_entry)
+entry_end:
+ENDPROC(entry)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 90d09eddd5b2..28a81e948df9 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -120,8 +120,8 @@ efi_head:
 #endif
 
 #ifdef CONFIG_EFI
-	.globl	stext_offset
-	.set	stext_offset, stext - efi_head
+	.globl	__efistub_stext_offset
+	.set	__efistub_stext_offset, stext - efi_head
 	.align 3
 pe_header:
 	.ascii	"PE"
@@ -144,8 +144,8 @@ optional_header:
 	.long	_end - stext			// SizeOfCode
 	.long	0				// SizeOfInitializedData
 	.long	0				// SizeOfUninitializedData
-	.long	efi_stub_entry - efi_head	// AddressOfEntryPoint
-	.long	stext_offset			// BaseOfCode
+	.long	__efistub_entry - efi_head	// AddressOfEntryPoint
+	.long	__efistub_stext_offset		// BaseOfCode
 
 extra_header_fields:
 	.quad	0				// ImageBase
@@ -162,7 +162,7 @@ extra_header_fields:
 	.long	_end - efi_head			// SizeOfImage
 
 	// Everything before the kernel image is considered part of the header
-	.long	stext_offset			// SizeOfHeaders
+	.long	__efistub_stext_offset		// SizeOfHeaders
 	.long	0				// CheckSum
 	.short	0xa				// Subsystem (EFI application)
 	.short	0				// DllCharacteristics
@@ -207,9 +207,9 @@ section_table:
 	.byte	0
 	.byte	0        		// end of 0 padding of section name
 	.long	_end - stext		// VirtualSize
-	.long	stext_offset		// VirtualAddress
+	.long	__efistub_stext_offset	// VirtualAddress
 	.long	_edata - stext		// SizeOfRawData
-	.long	stext_offset		// PointerToRawData
+	.long	__efistub_stext_offset	// PointerToRawData
 
 	.long	0		// PointerToRelocations (0 for executables)
 	.long	0		// PointerToLineNumbers (0 for executables)
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 8fae0756e175..e083af0dd546 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -59,4 +59,31 @@
 	_kernel_offset_le	= DATA_LE64(TEXT_OFFSET);	\
 	_kernel_flags_le	= DATA_LE64(__HEAD_FLAGS);
 
+#ifdef CONFIG_EFI
+
+/*
+ * The EFI stub has its own symbol namespace prefixed by __efistub_, to
+ * isolate it from the kernel proper. The following symbols are legally
+ * accessed by the stub, so provide some aliases to make them accessible.
+ * Only include data symbols here, or text symbols of functions that are
+ * guaranteed to be safe when executed at another offset than they were
+ * linked at. The routines below are all implemented in assembler in a
+ * position independent manner
+ */
+__efistub_memcmp		= __pi_memcmp;
+__efistub_memchr		= __pi_memchr;
+__efistub_memcpy		= __pi_memcpy;
+__efistub_memmove		= __pi_memmove;
+__efistub_memset		= __pi_memset;
+__efistub_strlen		= __pi_strlen;
+__efistub_strcmp		= __pi_strcmp;
+__efistub_strncmp		= __pi_strncmp;
+__efistub___flush_dcache_area	= __pi___flush_dcache_area;
+
+__efistub__text			= _text;
+__efistub__end			= _end;
+__efistub__edata		= _edata;
+
+#endif
+
 #endif /* __ASM_IMAGE_H */
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 816dbe9f4b82..bca9a76cbd33 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -14,6 +14,8 @@ cflags-$(CONFIG_ARM64)		:= $(subst -pg,,$(KBUILD_CFLAGS))
 cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
 				   -fno-builtin -fpic -mno-single-pic-base
 
+cflags-$(CONFIG_EFI_ARMSTUB)	+= -I$(srctree)/scripts/dtc/libfdt
+
 KBUILD_CFLAGS			:= $(cflags-y) \
 				   $(call cc-option,-ffreestanding) \
 				   $(call cc-option,-fno-stack-protector)
@@ -22,7 +24,15 @@ GCOV_PROFILE			:= n
 KASAN_SANITIZE			:= n
 
 lib-y				:= efi-stub-helper.o
-lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o
+
+# include the stub's generic dependencies from lib/ when building for ARM/arm64
+arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c
+
+$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
+	$(call if_changed_rule,cc_o_c)
+
+lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o string.o \
+				   $(patsubst %.c,lib-%.o,$(arm-deps))
 
 #
 # arm64 puts the stub in the kernel proper, which will unnecessarily retain all
@@ -30,10 +40,27 @@ lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o
 # So let's apply the __init annotations at the section level, by prefixing
 # the section names directly. This will ensure that even all the inline string
 # literals are covered.
+# The fact that the stub and the kernel proper are essentially the same binary
+# also means that we need to be extra careful to make sure that the stub does
+# not rely on any absolute symbol references, considering that the virtual
+# kernel mapping that the linker uses is not active yet when the stub is
+# executing. So build all C dependencies of the EFI stub into libstub, and do
+# a verification pass to see if any absolute relocations exist in any of the
+# object files.
 #
-extra-$(CONFIG_ARM64)		:= $(lib-y)
-lib-$(CONFIG_ARM64)		:= $(patsubst %.o,%.init.o,$(lib-y))
+extra-$(CONFIG_EFI_ARMSTUB)	:= $(lib-y)
+lib-$(CONFIG_EFI_ARMSTUB)	:= $(patsubst %.o,%.stub.o,$(lib-y))
+
+STUBCOPY_FLAGS-y		:= -R .debug* -R *ksymtab*
+STUBCOPY_FLAGS-$(CONFIG_ARM64)	+= --prefix-alloc-sections=.init \
+				   --prefix-symbols=__efistub_
+STUBCOPY_RELOC-$(CONFIG_ARM64)	:= R_AARCH64_ABS
+
+$(obj)/%.stub.o: $(obj)/%.o FORCE
+	$(call if_changed,stubcopy)
 
-OBJCOPYFLAGS := --prefix-alloc-sections=.init
-$(obj)/%.init.o: $(obj)/%.o FORCE
-	$(call if_changed,objcopy)
+quiet_cmd_stubcopy = STUBCPY $@
+      cmd_stubcopy = if $(OBJCOPY) $(STUBCOPY_FLAGS-y) $< $@; then	\
+		     $(OBJDUMP) -r $@ | grep $(STUBCOPY_RELOC-y)	\
+		     && (echo >&2 "$@: absolute symbol references not allowed in the EFI stub"; \
+			 rm -f $@; /bin/false); else /bin/false; fi
diff --git a/drivers/firmware/efi/libstub/string.c b/drivers/firmware/efi/libstub/string.c
new file mode 100644
index 000000000000..09d5a0894343
--- /dev/null
+++ b/drivers/firmware/efi/libstub/string.c
@@ -0,0 +1,57 @@
+/*
+ * Taken from:
+ *  linux/lib/string.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+
+#ifndef __HAVE_ARCH_STRSTR
+/**
+ * strstr - Find the first substring in a %NUL terminated string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ */
+char *strstr(const char *s1, const char *s2)
+{
+	size_t l1, l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	l1 = strlen(s1);
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+#endif
+
+#ifndef __HAVE_ARCH_STRNCMP
+/**
+ * strncmp - Compare two length-limited strings
+ * @cs: One string
+ * @ct: Another string
+ * @count: The maximum number of bytes to compare
+ */
+int strncmp(const char *cs, const char *ct, size_t count)
+{
+	unsigned char c1, c2;
+
+	while (count) {
+		c1 = *cs++;
+		c2 = *ct++;
+		if (c1 != c2)
+			return c1 < c2 ? -1 : 1;
+		if (!c1)
+			break;
+		count--;
+	}
+	return 0;
+}
+#endif
-- 
cgit v1.2.3-58-ga151


From 305d454aaa292be3a09a9d674e6c35f5b4249a13 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 8 Oct 2015 20:15:18 +0100
Subject: arm64: atomics: implement native {relaxed, acquire, release} atomics

Commit 654672d4ba1a ("locking/atomics: Add _{acquire|release|relaxed}()
variants of some atomic operation") introduced a relaxed atomic API to
Linux that maps nicely onto the arm64 memory model, including the new
ARMv8.1 atomic instructions.

This patch hooks up the API to our relaxed atomic instructions, rather
than have them all expand to the full-barrier variants as they do
currently.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/atomic.h       |  63 +++++++-
 arch/arm64/include/asm/atomic_ll_sc.h |  98 +++++++-----
 arch/arm64/include/asm/atomic_lse.h   | 193 +++++++++++++----------
 arch/arm64/include/asm/cmpxchg.h      | 279 +++++++++++++++++-----------------
 4 files changed, 371 insertions(+), 262 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index 35a67783cfa0..5e13ad76a249 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -55,13 +55,42 @@
 
 #define atomic_read(v)			READ_ONCE((v)->counter)
 #define atomic_set(v, i)		(((v)->counter) = (i))
+
+#define atomic_add_return_relaxed	atomic_add_return_relaxed
+#define atomic_add_return_acquire	atomic_add_return_acquire
+#define atomic_add_return_release	atomic_add_return_release
+#define atomic_add_return		atomic_add_return
+
+#define atomic_inc_return_relaxed(v)	atomic_add_return_relaxed(1, (v))
+#define atomic_inc_return_acquire(v)	atomic_add_return_acquire(1, (v))
+#define atomic_inc_return_release(v)	atomic_add_return_release(1, (v))
+#define atomic_inc_return(v)		atomic_add_return(1, (v))
+
+#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
+#define atomic_sub_return_acquire	atomic_sub_return_acquire
+#define atomic_sub_return_release	atomic_sub_return_release
+#define atomic_sub_return		atomic_sub_return
+
+#define atomic_dec_return_relaxed(v)	atomic_sub_return_relaxed(1, (v))
+#define atomic_dec_return_acquire(v)	atomic_sub_return_acquire(1, (v))
+#define atomic_dec_return_release(v)	atomic_sub_return_release(1, (v))
+#define atomic_dec_return(v)		atomic_sub_return(1, (v))
+
+#define atomic_xchg_relaxed(v, new)	xchg_relaxed(&((v)->counter), (new))
+#define atomic_xchg_acquire(v, new)	xchg_acquire(&((v)->counter), (new))
+#define atomic_xchg_release(v, new)	xchg_release(&((v)->counter), (new))
 #define atomic_xchg(v, new)		xchg(&((v)->counter), (new))
+
+#define atomic_cmpxchg_relaxed(v, old, new)				\
+	cmpxchg_relaxed(&((v)->counter), (old), (new))
+#define atomic_cmpxchg_acquire(v, old, new)				\
+	cmpxchg_acquire(&((v)->counter), (old), (new))
+#define atomic_cmpxchg_release(v, old, new)				\
+	cmpxchg_release(&((v)->counter), (old), (new))
 #define atomic_cmpxchg(v, old, new)	cmpxchg(&((v)->counter), (old), (new))
 
 #define atomic_inc(v)			atomic_add(1, (v))
 #define atomic_dec(v)			atomic_sub(1, (v))
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
 #define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
 #define atomic_dec_and_test(v)		(atomic_dec_return(v) == 0)
 #define atomic_sub_and_test(i, v)	(atomic_sub_return((i), (v)) == 0)
@@ -75,13 +104,39 @@
 #define ATOMIC64_INIT			ATOMIC_INIT
 #define atomic64_read			atomic_read
 #define atomic64_set			atomic_set
+
+#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
+#define atomic64_add_return_acquire	atomic64_add_return_acquire
+#define atomic64_add_return_release	atomic64_add_return_release
+#define atomic64_add_return		atomic64_add_return
+
+#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1, (v))
+#define atomic64_inc_return_acquire(v)	atomic64_add_return_acquire(1, (v))
+#define atomic64_inc_return_release(v)	atomic64_add_return_release(1, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
+
+#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
+#define atomic64_sub_return_acquire	atomic64_sub_return_acquire
+#define atomic64_sub_return_release	atomic64_sub_return_release
+#define atomic64_sub_return		atomic64_sub_return
+
+#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1, (v))
+#define atomic64_dec_return_acquire(v)	atomic64_sub_return_acquire(1, (v))
+#define atomic64_dec_return_release(v)	atomic64_sub_return_release(1, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
+
+#define atomic64_xchg_relaxed		atomic_xchg_relaxed
+#define atomic64_xchg_acquire		atomic_xchg_acquire
+#define atomic64_xchg_release		atomic_xchg_release
 #define atomic64_xchg			atomic_xchg
+
+#define atomic64_cmpxchg_relaxed	atomic_cmpxchg_relaxed
+#define atomic64_cmpxchg_acquire	atomic_cmpxchg_acquire
+#define atomic64_cmpxchg_release	atomic_cmpxchg_release
 #define atomic64_cmpxchg		atomic_cmpxchg
 
 #define atomic64_inc(v)			atomic64_add(1, (v))
 #define atomic64_dec(v)			atomic64_sub(1, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
 #define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
 #define atomic64_dec_and_test(v)	(atomic64_dec_return(v) == 0)
 #define atomic64_sub_and_test(i, v)	(atomic64_sub_return((i), (v)) == 0)
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index b3b5c4ae3800..74d0b8eb0799 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -55,40 +55,47 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v))				\
 }									\
 __LL_SC_EXPORT(atomic_##op);
 
-#define ATOMIC_OP_RETURN(op, asm_op)					\
+#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)		\
 __LL_SC_INLINE int							\
-__LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v))		\
+__LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v))		\
 {									\
 	unsigned long tmp;						\
 	int result;							\
 									\
-	asm volatile("// atomic_" #op "_return\n"			\
+	asm volatile("// atomic_" #op "_return" #name "\n"		\
 "	prfm	pstl1strm, %2\n"					\
-"1:	ldxr	%w0, %2\n"						\
+"1:	ld" #acq "xr	%w0, %2\n"					\
 "	" #asm_op "	%w0, %w0, %w3\n"				\
-"	stlxr	%w1, %w0, %2\n"						\
-"	cbnz	%w1, 1b"						\
+"	st" #rel "xr	%w1, %w0, %2\n"					\
+"	cbnz	%w1, 1b\n"						\
+"	" #mb								\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
 	: "Ir" (i)							\
-	: "memory");							\
+	: cl);								\
 									\
-	smp_mb();							\
 	return result;							\
 }									\
-__LL_SC_EXPORT(atomic_##op##_return);
+__LL_SC_EXPORT(atomic_##op##_return##name);
+
+#define ATOMIC_OPS(...)							\
+	ATOMIC_OP(__VA_ARGS__)						\
+	ATOMIC_OP_RETURN(        , dmb ish,  , l, "memory", __VA_ARGS__)
 
-#define ATOMIC_OPS(op, asm_op)						\
-	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_OP_RETURN(op, asm_op)
+#define ATOMIC_OPS_RLX(...)						\
+	ATOMIC_OPS(__VA_ARGS__)						\
+	ATOMIC_OP_RETURN(_relaxed,        ,  ,  ,         , __VA_ARGS__)\
+	ATOMIC_OP_RETURN(_acquire,        , a,  , "memory", __VA_ARGS__)\
+	ATOMIC_OP_RETURN(_release,        ,  , l, "memory", __VA_ARGS__)
 
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, sub)
+ATOMIC_OPS_RLX(add, add)
+ATOMIC_OPS_RLX(sub, sub)
 
 ATOMIC_OP(and, and)
 ATOMIC_OP(andnot, bic)
 ATOMIC_OP(or, orr)
 ATOMIC_OP(xor, eor)
 
+#undef ATOMIC_OPS_RLX
 #undef ATOMIC_OPS
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
@@ -111,40 +118,47 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v))			\
 }									\
 __LL_SC_EXPORT(atomic64_##op);
 
-#define ATOMIC64_OP_RETURN(op, asm_op)					\
+#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op)		\
 __LL_SC_INLINE long							\
-__LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v))		\
+__LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v))	\
 {									\
 	long result;							\
 	unsigned long tmp;						\
 									\
-	asm volatile("// atomic64_" #op "_return\n"			\
+	asm volatile("// atomic64_" #op "_return" #name "\n"		\
 "	prfm	pstl1strm, %2\n"					\
-"1:	ldxr	%0, %2\n"						\
+"1:	ld" #acq "xr	%0, %2\n"					\
 "	" #asm_op "	%0, %0, %3\n"					\
-"	stlxr	%w1, %0, %2\n"						\
-"	cbnz	%w1, 1b"						\
+"	st" #rel "xr	%w1, %0, %2\n"					\
+"	cbnz	%w1, 1b\n"						\
+"	" #mb								\
 	: "=&r" (result), "=&r" (tmp), "+Q" (v->counter)		\
 	: "Ir" (i)							\
-	: "memory");							\
+	: cl);								\
 									\
-	smp_mb();							\
 	return result;							\
 }									\
-__LL_SC_EXPORT(atomic64_##op##_return);
+__LL_SC_EXPORT(atomic64_##op##_return##name);
+
+#define ATOMIC64_OPS(...)						\
+	ATOMIC64_OP(__VA_ARGS__)					\
+	ATOMIC64_OP_RETURN(, dmb ish,  , l, "memory", __VA_ARGS__)
 
-#define ATOMIC64_OPS(op, asm_op)					\
-	ATOMIC64_OP(op, asm_op)						\
-	ATOMIC64_OP_RETURN(op, asm_op)
+#define ATOMIC64_OPS_RLX(...)						\
+	ATOMIC64_OPS(__VA_ARGS__)					\
+	ATOMIC64_OP_RETURN(_relaxed,,  ,  ,         , __VA_ARGS__)	\
+	ATOMIC64_OP_RETURN(_acquire,, a,  , "memory", __VA_ARGS__)	\
+	ATOMIC64_OP_RETURN(_release,,  , l, "memory", __VA_ARGS__)
 
-ATOMIC64_OPS(add, add)
-ATOMIC64_OPS(sub, sub)
+ATOMIC64_OPS_RLX(add, add)
+ATOMIC64_OPS_RLX(sub, sub)
 
 ATOMIC64_OP(and, and)
 ATOMIC64_OP(andnot, bic)
 ATOMIC64_OP(or, orr)
 ATOMIC64_OP(xor, eor)
 
+#undef ATOMIC64_OPS_RLX
 #undef ATOMIC64_OPS
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
@@ -172,7 +186,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
 }
 __LL_SC_EXPORT(atomic64_dec_if_positive);
 
-#define __CMPXCHG_CASE(w, sz, name, mb, rel, cl)			\
+#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl)			\
 __LL_SC_INLINE unsigned long						\
 __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
 				     unsigned long old,			\
@@ -182,7 +196,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
 									\
 	asm volatile(							\
 	"	prfm	pstl1strm, %[v]\n"				\
-	"1:	ldxr" #sz "\t%" #w "[oldval], %[v]\n"			\
+	"1:	ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n"		\
 	"	eor	%" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"	\
 	"	cbnz	%" #w "[tmp], 2f\n"				\
 	"	st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n"	\
@@ -199,14 +213,22 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
 }									\
 __LL_SC_EXPORT(__cmpxchg_case_##name);
 
-__CMPXCHG_CASE(w, b,    1,        ,  ,         )
-__CMPXCHG_CASE(w, h,    2,        ,  ,         )
-__CMPXCHG_CASE(w,  ,    4,        ,  ,         )
-__CMPXCHG_CASE( ,  ,    8,        ,  ,         )
-__CMPXCHG_CASE(w, b, mb_1, dmb ish, l, "memory")
-__CMPXCHG_CASE(w, h, mb_2, dmb ish, l, "memory")
-__CMPXCHG_CASE(w,  , mb_4, dmb ish, l, "memory")
-__CMPXCHG_CASE( ,  , mb_8, dmb ish, l, "memory")
+__CMPXCHG_CASE(w, b,     1,        ,  ,  ,         )
+__CMPXCHG_CASE(w, h,     2,        ,  ,  ,         )
+__CMPXCHG_CASE(w,  ,     4,        ,  ,  ,         )
+__CMPXCHG_CASE( ,  ,     8,        ,  ,  ,         )
+__CMPXCHG_CASE(w, b, acq_1,        , a,  , "memory")
+__CMPXCHG_CASE(w, h, acq_2,        , a,  , "memory")
+__CMPXCHG_CASE(w,  , acq_4,        , a,  , "memory")
+__CMPXCHG_CASE( ,  , acq_8,        , a,  , "memory")
+__CMPXCHG_CASE(w, b, rel_1,        ,  , l, "memory")
+__CMPXCHG_CASE(w, h, rel_2,        ,  , l, "memory")
+__CMPXCHG_CASE(w,  , rel_4,        ,  , l, "memory")
+__CMPXCHG_CASE( ,  , rel_8,        ,  , l, "memory")
+__CMPXCHG_CASE(w, b,  mb_1, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w, h,  mb_2, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w,  ,  mb_4, dmb ish,  , l, "memory")
+__CMPXCHG_CASE( ,  ,  mb_8, dmb ish,  , l, "memory")
 
 #undef __CMPXCHG_CASE
 
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 55d740e63459..1fce7908e690 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -75,24 +75,32 @@ static inline void atomic_add(int i, atomic_t *v)
 	: "x30");
 }
 
-static inline int atomic_add_return(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
+#define ATOMIC_OP_ADD_RETURN(name, mb, cl...)				\
+static inline int atomic_add_return##name(int i, atomic_t *v)		\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC(add_return##name),				\
+	/* LSE atomics */						\
+	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
+	"	add	%w[i], %w[i], w30")				\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: "x30" , ##cl);						\
+									\
+	return w0;							\
+}
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC(add_return),
-	/* LSE atomics */
-	"	ldaddal	%w[i], w30, %[v]\n"
-	"	add	%w[i], %w[i], w30")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: "x30", "memory");
+ATOMIC_OP_ADD_RETURN(_relaxed,   )
+ATOMIC_OP_ADD_RETURN(_acquire,  a, "memory")
+ATOMIC_OP_ADD_RETURN(_release,  l, "memory")
+ATOMIC_OP_ADD_RETURN(        , al, "memory")
 
-	return w0;
-}
+#undef ATOMIC_OP_ADD_RETURN
 
 static inline void atomic_and(int i, atomic_t *v)
 {
@@ -128,27 +136,34 @@ static inline void atomic_sub(int i, atomic_t *v)
 	: "x30");
 }
 
-static inline int atomic_sub_return(int i, atomic_t *v)
-{
-	register int w0 asm ("w0") = i;
-	register atomic_t *x1 asm ("x1") = v;
-
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC(sub_return)
-	"	nop",
-	/* LSE atomics */
-	"	neg	%w[i], %w[i]\n"
-	"	ldaddal	%w[i], w30, %[v]\n"
-	"	add	%w[i], %w[i], w30")
-	: [i] "+r" (w0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: "x30", "memory");
-
-	return w0;
+#define ATOMIC_OP_SUB_RETURN(name, mb, cl...)				\
+static inline int atomic_sub_return##name(int i, atomic_t *v)		\
+{									\
+	register int w0 asm ("w0") = i;					\
+	register atomic_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC(sub_return##name)				\
+	"	nop",							\
+	/* LSE atomics */						\
+	"	neg	%w[i], %w[i]\n"					\
+	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
+	"	add	%w[i], %w[i], w30")				\
+	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: "x30" , ##cl);						\
+									\
+	return w0;							\
 }
 
+ATOMIC_OP_SUB_RETURN(_relaxed,   )
+ATOMIC_OP_SUB_RETURN(_acquire,  a, "memory")
+ATOMIC_OP_SUB_RETURN(_release,  l, "memory")
+ATOMIC_OP_SUB_RETURN(        , al, "memory")
+
+#undef ATOMIC_OP_SUB_RETURN
 #undef __LL_SC_ATOMIC
 
 #define __LL_SC_ATOMIC64(op)	__LL_SC_CALL(atomic64_##op)
@@ -201,24 +216,32 @@ static inline void atomic64_add(long i, atomic64_t *v)
 	: "x30");
 }
 
-static inline long atomic64_add_return(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)				\
+static inline long atomic64_add_return##name(long i, atomic64_t *v)	\
+{									\
+	register long x0 asm ("x0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC64(add_return##name),				\
+	/* LSE atomics */						\
+	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
+	"	add	%[i], %[i], x30")				\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: "x30" , ##cl);						\
+									\
+	return x0;							\
+}
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC64(add_return),
-	/* LSE atomics */
-	"	ldaddal	%[i], x30, %[v]\n"
-	"	add	%[i], %[i], x30")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: "x30", "memory");
+ATOMIC64_OP_ADD_RETURN(_relaxed,   )
+ATOMIC64_OP_ADD_RETURN(_acquire,  a, "memory")
+ATOMIC64_OP_ADD_RETURN(_release,  l, "memory")
+ATOMIC64_OP_ADD_RETURN(        , al, "memory")
 
-	return x0;
-}
+#undef ATOMIC64_OP_ADD_RETURN
 
 static inline void atomic64_and(long i, atomic64_t *v)
 {
@@ -254,26 +277,34 @@ static inline void atomic64_sub(long i, atomic64_t *v)
 	: "x30");
 }
 
-static inline long atomic64_sub_return(long i, atomic64_t *v)
-{
-	register long x0 asm ("x0") = i;
-	register atomic64_t *x1 asm ("x1") = v;
+#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...)				\
+static inline long atomic64_sub_return##name(long i, atomic64_t *v)	\
+{									\
+	register long x0 asm ("x0") = i;				\
+	register atomic64_t *x1 asm ("x1") = v;				\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	nop\n"							\
+	__LL_SC_ATOMIC64(sub_return##name)				\
+	"	nop",							\
+	/* LSE atomics */						\
+	"	neg	%[i], %[i]\n"					\
+	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
+	"	add	%[i], %[i], x30")				\
+	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
+	: "r" (x1)							\
+	: "x30" , ##cl);						\
+									\
+	return x0;							\
+}
 
-	asm volatile(ARM64_LSE_ATOMIC_INSN(
-	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC64(sub_return)
-	"	nop",
-	/* LSE atomics */
-	"	neg	%[i], %[i]\n"
-	"	ldaddal	%[i], x30, %[v]\n"
-	"	add	%[i], %[i], x30")
-	: [i] "+r" (x0), [v] "+Q" (v->counter)
-	: "r" (x1)
-	: "x30", "memory");
+ATOMIC64_OP_SUB_RETURN(_relaxed,   )
+ATOMIC64_OP_SUB_RETURN(_acquire,  a, "memory")
+ATOMIC64_OP_SUB_RETURN(_release,  l, "memory")
+ATOMIC64_OP_SUB_RETURN(        , al, "memory")
 
-	return x0;
-}
+#undef ATOMIC64_OP_SUB_RETURN
 
 static inline long atomic64_dec_if_positive(atomic64_t *v)
 {
@@ -333,14 +364,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,	\
 	return x0;							\
 }
 
-__CMPXCHG_CASE(w, b,    1,   )
-__CMPXCHG_CASE(w, h,    2,   )
-__CMPXCHG_CASE(w,  ,    4,   )
-__CMPXCHG_CASE(x,  ,    8,   )
-__CMPXCHG_CASE(w, b, mb_1, al, "memory")
-__CMPXCHG_CASE(w, h, mb_2, al, "memory")
-__CMPXCHG_CASE(w,  , mb_4, al, "memory")
-__CMPXCHG_CASE(x,  , mb_8, al, "memory")
+__CMPXCHG_CASE(w, b,     1,   )
+__CMPXCHG_CASE(w, h,     2,   )
+__CMPXCHG_CASE(w,  ,     4,   )
+__CMPXCHG_CASE(x,  ,     8,   )
+__CMPXCHG_CASE(w, b, acq_1,  a, "memory")
+__CMPXCHG_CASE(w, h, acq_2,  a, "memory")
+__CMPXCHG_CASE(w,  , acq_4,  a, "memory")
+__CMPXCHG_CASE(x,  , acq_8,  a, "memory")
+__CMPXCHG_CASE(w, b, rel_1,  l, "memory")
+__CMPXCHG_CASE(w, h, rel_2,  l, "memory")
+__CMPXCHG_CASE(w,  , rel_4,  l, "memory")
+__CMPXCHG_CASE(x,  , rel_8,  l, "memory")
+__CMPXCHG_CASE(w, b,  mb_1, al, "memory")
+__CMPXCHG_CASE(w, h,  mb_2, al, "memory")
+__CMPXCHG_CASE(w,  ,  mb_4, al, "memory")
+__CMPXCHG_CASE(x,  ,  mb_8, al, "memory")
 
 #undef __LL_SC_CMPXCHG
 #undef __CMPXCHG_CASE
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 899e9f1d19e4..9ea611ea69df 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -25,154 +25,151 @@
 #include <asm/barrier.h>
 #include <asm/lse.h>
 
-static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
-{
-	unsigned long ret, tmp;
-
-	switch (size) {
-	case 1:
-		asm volatile(ARM64_LSE_ATOMIC_INSN(
-		/* LL/SC */
-		"	prfm	pstl1strm, %2\n"
-		"1:	ldxrb	%w0, %2\n"
-		"	stlxrb	%w1, %w3, %2\n"
-		"	cbnz	%w1, 1b\n"
-		"	dmb	ish",
-		/* LSE atomics */
-		"	nop\n"
-		"	nop\n"
-		"	swpalb	%w3, %w0, %2\n"
-		"	nop\n"
-		"	nop")
-			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)
-			: "r" (x)
-			: "memory");
-		break;
-	case 2:
-		asm volatile(ARM64_LSE_ATOMIC_INSN(
-		/* LL/SC */
-		"	prfm	pstl1strm, %2\n"
-		"1:	ldxrh	%w0, %2\n"
-		"	stlxrh	%w1, %w3, %2\n"
-		"	cbnz	%w1, 1b\n"
-		"	dmb	ish",
-		/* LSE atomics */
-		"	nop\n"
-		"	nop\n"
-		"	swpalh	%w3, %w0, %2\n"
-		"	nop\n"
-		"	nop")
-			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr)
-			: "r" (x)
-			: "memory");
-		break;
-	case 4:
-		asm volatile(ARM64_LSE_ATOMIC_INSN(
-		/* LL/SC */
-		"	prfm	pstl1strm, %2\n"
-		"1:	ldxr	%w0, %2\n"
-		"	stlxr	%w1, %w3, %2\n"
-		"	cbnz	%w1, 1b\n"
-		"	dmb	ish",
-		/* LSE atomics */
-		"	nop\n"
-		"	nop\n"
-		"	swpal	%w3, %w0, %2\n"
-		"	nop\n"
-		"	nop")
-			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr)
-			: "r" (x)
-			: "memory");
-		break;
-	case 8:
-		asm volatile(ARM64_LSE_ATOMIC_INSN(
-		/* LL/SC */
-		"	prfm	pstl1strm, %2\n"
-		"1:	ldxr	%0, %2\n"
-		"	stlxr	%w1, %3, %2\n"
-		"	cbnz	%w1, 1b\n"
-		"	dmb	ish",
-		/* LSE atomics */
-		"	nop\n"
-		"	nop\n"
-		"	swpal	%3, %0, %2\n"
-		"	nop\n"
-		"	nop")
-			: "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr)
-			: "r" (x)
-			: "memory");
-		break;
-	default:
-		BUILD_BUG();
-	}
-
-	return ret;
+/*
+ * We need separate acquire parameters for ll/sc and lse, since the full
+ * barrier case is generated as release+dmb for the former and
+ * acquire+release for the latter.
+ */
+#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl)	\
+static inline unsigned long __xchg_case_##name(unsigned long x,		\
+					       volatile void *ptr)	\
+{									\
+	unsigned long ret, tmp;						\
+									\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"	prfm	pstl1strm, %2\n"				\
+	"1:	ld" #acq "xr" #sz "\t%" #w "0, %2\n"			\
+	"	st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n"		\
+	"	cbnz	%w1, 1b\n"					\
+	"	" #mb,							\
+	/* LSE atomics */						\
+	"	nop\n"							\
+	"	nop\n"							\
+	"	swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n"	\
+	"	nop\n"							\
+	"	" #nop_lse)						\
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)			\
+	: "r" (x)							\
+	: cl);								\
+									\
+	return ret;							\
 }
 
-#define xchg(ptr,x) \
-({ \
-	__typeof__(*(ptr)) __ret; \
-	__ret = (__typeof__(*(ptr))) \
-		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
-	__ret; \
+__XCHG_CASE(w, b,     1,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, h,     2,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w,  ,     4,        ,    ,  ,  ,  ,         )
+__XCHG_CASE( ,  ,     8,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, b, acq_1,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, h, acq_2,        ,    , a, a,  , "memory")
+__XCHG_CASE(w,  , acq_4,        ,    , a, a,  , "memory")
+__XCHG_CASE( ,  , acq_8,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, b, rel_1,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, h, rel_2,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w,  , rel_4,        ,    ,  ,  , l, "memory")
+__XCHG_CASE( ,  , rel_8,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, b,  mb_1, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w, h,  mb_2, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w,  ,  mb_4, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE( ,  ,  mb_8, dmb ish, nop,  , a, l, "memory")
+
+#undef __XCHG_CASE
+
+#define __XCHG_GEN(sfx)							\
+static inline unsigned long __xchg##sfx(unsigned long x,		\
+					volatile void *ptr,		\
+					int size)			\
+{									\
+	switch (size) {							\
+	case 1:								\
+		return __xchg_case##sfx##_1(x, ptr);			\
+	case 2:								\
+		return __xchg_case##sfx##_2(x, ptr);			\
+	case 4:								\
+		return __xchg_case##sfx##_4(x, ptr);			\
+	case 8:								\
+		return __xchg_case##sfx##_8(x, ptr);			\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+									\
+	unreachable();							\
+}
+
+__XCHG_GEN()
+__XCHG_GEN(_acq)
+__XCHG_GEN(_rel)
+__XCHG_GEN(_mb)
+
+#undef __XCHG_GEN
+
+#define __xchg_wrapper(sfx, ptr, x)					\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	__ret = (__typeof__(*(ptr)))					\
+		__xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+	__ret;								\
 })
 
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
-				      unsigned long new, int size)
-{
-	switch (size) {
-	case 1:
-		return __cmpxchg_case_1(ptr, (u8)old, new);
-	case 2:
-		return __cmpxchg_case_2(ptr, (u16)old, new);
-	case 4:
-		return __cmpxchg_case_4(ptr, old, new);
-	case 8:
-		return __cmpxchg_case_8(ptr, old, new);
-	default:
-		BUILD_BUG();
-	}
-
-	unreachable();
+/* xchg */
+#define xchg_relaxed(...)	__xchg_wrapper(    , __VA_ARGS__)
+#define xchg_acquire(...)	__xchg_wrapper(_acq, __VA_ARGS__)
+#define xchg_release(...)	__xchg_wrapper(_rel, __VA_ARGS__)
+#define xchg(...)		__xchg_wrapper( _mb, __VA_ARGS__)
+
+#define __CMPXCHG_GEN(sfx)						\
+static inline unsigned long __cmpxchg##sfx(volatile void *ptr,		\
+					   unsigned long old,		\
+					   unsigned long new,		\
+					   int size)			\
+{									\
+	switch (size) {							\
+	case 1:								\
+		return __cmpxchg_case##sfx##_1(ptr, (u8)old, new);	\
+	case 2:								\
+		return __cmpxchg_case##sfx##_2(ptr, (u16)old, new);	\
+	case 4:								\
+		return __cmpxchg_case##sfx##_4(ptr, old, new);		\
+	case 8:								\
+		return __cmpxchg_case##sfx##_8(ptr, old, new);		\
+	default:							\
+		BUILD_BUG();						\
+	}								\
+									\
+	unreachable();							\
 }
 
-static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
-					 unsigned long new, int size)
-{
-	switch (size) {
-	case 1:
-		return __cmpxchg_case_mb_1(ptr, (u8)old, new);
-	case 2:
-		return __cmpxchg_case_mb_2(ptr, (u16)old, new);
-	case 4:
-		return __cmpxchg_case_mb_4(ptr, old, new);
-	case 8:
-		return __cmpxchg_case_mb_8(ptr, old, new);
-	default:
-		BUILD_BUG();
-	}
-
-	unreachable();
-}
+__CMPXCHG_GEN()
+__CMPXCHG_GEN(_acq)
+__CMPXCHG_GEN(_rel)
+__CMPXCHG_GEN(_mb)
 
-#define cmpxchg(ptr, o, n) \
-({ \
-	__typeof__(*(ptr)) __ret; \
-	__ret = (__typeof__(*(ptr))) \
-		__cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \
-			     sizeof(*(ptr))); \
-	__ret; \
-})
+#undef __CMPXCHG_GEN
 
-#define cmpxchg_local(ptr, o, n) \
-({ \
-	__typeof__(*(ptr)) __ret; \
-	__ret = (__typeof__(*(ptr))) \
-		__cmpxchg((ptr), (unsigned long)(o), \
-			  (unsigned long)(n), sizeof(*(ptr))); \
-	__ret; \
+#define __cmpxchg_wrapper(sfx, ptr, o, n)				\
+({									\
+	__typeof__(*(ptr)) __ret;					\
+	__ret = (__typeof__(*(ptr)))					\
+		__cmpxchg##sfx((ptr), (unsigned long)(o),		\
+				(unsigned long)(n), sizeof(*(ptr)));	\
+	__ret;								\
 })
 
+/* cmpxchg */
+#define cmpxchg_relaxed(...)	__cmpxchg_wrapper(    , __VA_ARGS__)
+#define cmpxchg_acquire(...)	__cmpxchg_wrapper(_acq, __VA_ARGS__)
+#define cmpxchg_release(...)	__cmpxchg_wrapper(_rel, __VA_ARGS__)
+#define cmpxchg(...)		__cmpxchg_wrapper( _mb, __VA_ARGS__)
+#define cmpxchg_local		cmpxchg_relaxed
+
+/* cmpxchg64 */
+#define cmpxchg64_relaxed	cmpxchg_relaxed
+#define cmpxchg64_acquire	cmpxchg_acquire
+#define cmpxchg64_release	cmpxchg_release
+#define cmpxchg64		cmpxchg
+#define cmpxchg64_local		cmpxchg_local
+
+/* cmpxchg_double */
 #define system_has_cmpxchg_double()     1
 
 #define __cmpxchg_double_check(ptr1, ptr2)					\
@@ -202,6 +199,7 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 	__ret; \
 })
 
+/* this_cpu_cmpxchg */
 #define _protect_cmpxchg_local(pcp, o, n)			\
 ({								\
 	typeof(*raw_cpu_ptr(&(pcp))) __ret;			\
@@ -227,9 +225,4 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
 	__ret;								\
 })
 
-#define cmpxchg64(ptr,o,n)		cmpxchg((ptr),(o),(n))
-#define cmpxchg64_local(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
-
-#define cmpxchg64_relaxed(ptr,o,n)	cmpxchg_local((ptr),(o),(n))
-
 #endif	/* __ASM_CMPXCHG_H */
-- 
cgit v1.2.3-58-ga151


From fd2203dd3556f6553231fa026060793e67a25ce6 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Date: Mon, 12 Oct 2015 18:52:57 +0300
Subject: arm64: move PGD_SIZE definition to pgalloc.h

This will be used by KASAN latter.

Signed-off-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgalloc.h | 1 +
 arch/arm64/mm/pgd.c              | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 76420568d66a..c15053902942 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -27,6 +27,7 @@
 #define check_pgt_cache()		do { } while (0)
 
 #define PGALLOC_GFP	(GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
 
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 71ca104f97bd..cb3ba1b812e7 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -28,8 +28,6 @@
 
 #include "mm.h"
 
-#define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
-
 static struct kmem_cache *pgd_cache;
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
-- 
cgit v1.2.3-58-ga151


From 39d114ddc68223022c12ae3a1573912bc4b585e5 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Date: Mon, 12 Oct 2015 18:52:58 +0300
Subject: arm64: add KASAN support

This patch adds arch specific code for kernel address sanitizer
(see Documentation/kasan.txt).

1/8 of kernel addresses reserved for shadow memory. There was no
big enough hole for this, so virtual addresses for shadow were
stolen from vmalloc area.

At early boot stage the whole shadow region populated with just
one physical page (kasan_zero_page). Later, this page reused
as readonly zero shadow for some memory that KASan currently
don't track (vmalloc).
After mapping the physical memory, pages for shadow memory are
allocated and mapped.

Functions like memset/memmove/memcpy do a lot of memory accesses.
If bad pointer passed to one of these function it is important
to catch this. Compiler's instrumentation cannot do this since
these functions are written in assembly.
KASan replaces memory functions with manually instrumented variants.
Original functions declared as weak symbols so strong definitions
in mm/kasan/kasan.c could replace them. Original functions have aliases
with '__' prefix in name, so we could call non-instrumented variant
if needed.
Some files built without kasan instrumentation (e.g. mm/slub.c).
Original mem* function replaced (via #define) with prefixed variants
to disable memory access checks for such files.

Signed-off-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig               |   1 +
 arch/arm64/Makefile              |   7 ++
 arch/arm64/include/asm/kasan.h   |  36 +++++++++
 arch/arm64/include/asm/pgtable.h |   7 ++
 arch/arm64/include/asm/string.h  |  16 ++++
 arch/arm64/kernel/Makefile       |   2 +
 arch/arm64/kernel/arm64ksyms.c   |   3 +
 arch/arm64/kernel/head.S         |   3 +
 arch/arm64/kernel/image.h        |   6 ++
 arch/arm64/kernel/module.c       |  16 +++-
 arch/arm64/kernel/setup.c        |   4 +
 arch/arm64/lib/memcpy.S          |   3 +
 arch/arm64/lib/memmove.S         |   7 +-
 arch/arm64/lib/memset.S          |   3 +
 arch/arm64/mm/Makefile           |   3 +
 arch/arm64/mm/kasan_init.c       | 165 +++++++++++++++++++++++++++++++++++++++
 drivers/firmware/efi/Makefile    |   8 ++
 scripts/Makefile.kasan           |   4 +-
 18 files changed, 288 insertions(+), 6 deletions(-)
 create mode 100644 arch/arm64/include/asm/kasan.h
 create mode 100644 arch/arm64/mm/kasan_init.c

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1b35bdbd5d74..2782c1189208 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -48,6 +48,7 @@ config ARM64
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_BITREVERSE
 	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index f9914d7c1bb0..f41c6761ec36 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -55,6 +55,13 @@ else
 TEXT_OFFSET := 0x00080000
 endif
 
+# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - 3)) - (1 << 61)
+# in 32-bit arithmetic
+KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \
+			(0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \
+			+ (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - 3)) \
+			- (1 << (64 - 32 - 3)) )) )
+
 export	TEXT_OFFSET GZFLAGS
 
 core-y		+= arch/arm64/kernel/ arch/arm64/mm/
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
new file mode 100644
index 000000000000..71dfe14acdca
--- /dev/null
+++ b/arch/arm64/include/asm/kasan.h
@@ -0,0 +1,36 @@
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_KASAN
+
+#include <asm/memory.h>
+
+/*
+ * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
+ * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
+ */
+#define KASAN_SHADOW_START      (VA_START)
+#define KASAN_SHADOW_END        (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
+
+/*
+ * This value is used to map an address to the corresponding shadow
+ * address by the following formula:
+ *     shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
+ *
+ * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END]
+ * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET
+ * should satisfy the following equation:
+ *      KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61)
+ */
+#define KASAN_SHADOW_OFFSET     (KASAN_SHADOW_END - (1ULL << (64 - 3)))
+
+void kasan_init(void);
+
+#else
+static inline void kasan_init(void) { }
+#endif
+
+#endif
+#endif
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 3f481ef42c07..e3b515fbf0c2 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -41,7 +41,14 @@
  *	fixed mappings and modules
  */
 #define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
+
+#ifndef CONFIG_KASAN
 #define VMALLOC_START		(VA_START)
+#else
+#include <asm/kasan.h>
+#define VMALLOC_START		(KASAN_SHADOW_END + SZ_64K)
+#endif
+
 #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
 #define vmemmap			((struct page *)(VMALLOC_END + SZ_64K))
diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h
index 64d2d4884a9d..2eb714c4639f 100644
--- a/arch/arm64/include/asm/string.h
+++ b/arch/arm64/include/asm/string.h
@@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMCPY
 extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void *__memcpy(void *, const void *, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *, const void *, __kernel_size_t);
+extern void *__memmove(void *, const void *, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMCHR
 extern void *memchr(const void *, int, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *, int, __kernel_size_t);
+extern void *__memset(void *, int, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMCMP
 extern int memcmp(const void *, const void *, size_t);
 
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+#endif
+
 #endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7b17f6245f1e..1b6bda2ff102 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -7,6 +7,8 @@ AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_efi-stub.o 	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_armv8_deprecated.o := -I$(src)
 
+KASAN_SANITIZE_efi-stub.o	:= n
+
 CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_insn.o = -pg
 CFLAGS_REMOVE_return_address.o = -pg
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
index a85843ddbde8..3b6d8cc9dfe0 100644
--- a/arch/arm64/kernel/arm64ksyms.c
+++ b/arch/arm64/kernel/arm64ksyms.c
@@ -51,6 +51,9 @@ EXPORT_SYMBOL(strnlen);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(__memset);
+EXPORT_SYMBOL(__memcpy);
+EXPORT_SYMBOL(__memmove);
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(memcmp);
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 28a81e948df9..2a8c1d5b3fae 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -444,6 +444,9 @@ __mmap_switched:
 	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
 	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
 	mov	x29, #0
+#ifdef CONFIG_KASAN
+	bl	kasan_early_init
+#endif
 	b	start_kernel
 ENDPROC(__mmap_switched)
 
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index e083af0dd546..6eb8fee93321 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -80,6 +80,12 @@ __efistub_strcmp		= __pi_strcmp;
 __efistub_strncmp		= __pi_strncmp;
 __efistub___flush_dcache_area	= __pi___flush_dcache_area;
 
+#ifdef CONFIG_KASAN
+__efistub___memcpy		= __pi_memcpy;
+__efistub___memmove		= __pi_memmove;
+__efistub___memset		= __pi_memset;
+#endif
+
 __efistub__text			= _text;
 __efistub__end			= _end;
 __efistub__edata		= _edata;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 876eb8df50bf..f4bc779e62e8 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/elf.h>
 #include <linux/gfp.h>
+#include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/moduleloader.h>
@@ -34,9 +35,18 @@
 
 void *module_alloc(unsigned long size)
 {
-	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
-				    GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
-				    NUMA_NO_NODE, __builtin_return_address(0));
+	void *p;
+
+	p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+				NUMA_NO_NODE, __builtin_return_address(0));
+
+	if (p && (kasan_module_alloc(p, size) < 0)) {
+		vfree(p);
+		return NULL;
+	}
+
+	return p;
 }
 
 enum aarch64_reloc_op {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 6bab21f84a9f..79df79a2ea61 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -54,6 +54,7 @@
 #include <asm/elf.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
+#include <asm/kasan.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/smp_plat.h>
@@ -434,6 +435,9 @@ void __init setup_arch(char **cmdline_p)
 
 	paging_init();
 	relocate_initrd();
+
+	kasan_init();
+
 	request_standard_resources();
 
 	early_ioremap_reset();
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 36a6a62cf263..67613937711f 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -68,7 +68,10 @@
 	stp \ptr, \regB, [\regC], \val
 	.endm
 
+	.weak memcpy
+ENTRY(__memcpy)
 ENTRY(memcpy)
 #include "copy_template.S"
 	ret
 ENDPIPROC(memcpy)
+ENDPROC(__memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
index 68e2f2035e23..a5a4459013b1 100644
--- a/arch/arm64/lib/memmove.S
+++ b/arch/arm64/lib/memmove.S
@@ -57,12 +57,14 @@ C_h	.req	x12
 D_l	.req	x13
 D_h	.req	x14
 
+	.weak memmove
+ENTRY(__memmove)
 ENTRY(memmove)
 	cmp	dstin, src
-	b.lo	memcpy
+	b.lo	__memcpy
 	add	tmp1, src, count
 	cmp	dstin, tmp1
-	b.hs	memcpy		/* No overlap.  */
+	b.hs	__memcpy		/* No overlap.  */
 
 	add	dst, dstin, count
 	add	src, src, count
@@ -195,3 +197,4 @@ ENTRY(memmove)
 	b.ne	.Ltail63
 	ret
 ENDPIPROC(memmove)
+ENDPROC(__memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index 29f405f08792..f2670a9f218c 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -54,6 +54,8 @@ dst		.req	x8
 tmp3w		.req	w9
 tmp3		.req	x9
 
+	.weak memset
+ENTRY(__memset)
 ENTRY(memset)
 	mov	dst, dstin	/* Preserve return value.  */
 	and	A_lw, val, #255
@@ -214,3 +216,4 @@ ENTRY(memset)
 	b.ne	.Ltail_maybe_long
 	ret
 ENDPIPROC(memset)
+ENDPROC(__memset)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 773d37a14039..57f57fde5722 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -4,3 +4,6 @@ obj-y				:= dma-mapping.o extable.o fault.o init.o \
 				   context.o proc.o pageattr.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
 obj-$(CONFIG_ARM64_PTDUMP)	+= dump.o
+
+obj-$(CONFIG_KASAN)		+= kasan_init.o
+KASAN_SANITIZE_kasan_init.o	:= n
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
new file mode 100644
index 000000000000..b6a92f5dd568
--- /dev/null
+++ b/arch/arm64/mm/kasan_init.c
@@ -0,0 +1,165 @@
+/*
+ * This file contains kasan initialization code for ARM64.
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/start_kernel.h>
+
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+
+static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
+					unsigned long end)
+{
+	pte_t *pte;
+	unsigned long next;
+
+	if (pmd_none(*pmd))
+		pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+
+	pte = pte_offset_kernel(pmd, addr);
+	do {
+		next = addr + PAGE_SIZE;
+		set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
+					PAGE_KERNEL));
+	} while (pte++, addr = next, addr != end && pte_none(*pte));
+}
+
+static void __init kasan_early_pmd_populate(pud_t *pud,
+					unsigned long addr,
+					unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	if (pud_none(*pud))
+		pud_populate(&init_mm, pud, kasan_zero_pmd);
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		kasan_early_pte_populate(pmd, addr, next);
+	} while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+}
+
+static void __init kasan_early_pud_populate(pgd_t *pgd,
+					unsigned long addr,
+					unsigned long end)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	if (pgd_none(*pgd))
+		pgd_populate(&init_mm, pgd, kasan_zero_pud);
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		kasan_early_pmd_populate(pud, addr, next);
+	} while (pud++, addr = next, addr != end && pud_none(*pud));
+}
+
+static void __init kasan_map_early_shadow(void)
+{
+	unsigned long addr = KASAN_SHADOW_START;
+	unsigned long end = KASAN_SHADOW_END;
+	unsigned long next;
+	pgd_t *pgd;
+
+	pgd = pgd_offset_k(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		kasan_early_pud_populate(pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+void __init kasan_early_init(void)
+{
+	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+	kasan_map_early_shadow();
+}
+
+static void __init clear_pgds(unsigned long start,
+			unsigned long end)
+{
+	/*
+	 * Remove references to kasan page tables from
+	 * swapper_pg_dir. pgd_clear() can't be used
+	 * here because it's nop on 2,3-level pagetable setups
+	 */
+	for (; start < end; start += PGDIR_SIZE)
+		set_pgd(pgd_offset_k(start), __pgd(0));
+}
+
+static void __init cpu_set_ttbr1(unsigned long ttbr1)
+{
+	asm(
+	"	msr	ttbr1_el1, %0\n"
+	"	isb"
+	:
+	: "r" (ttbr1));
+}
+
+void __init kasan_init(void)
+{
+	struct memblock_region *reg;
+
+	/*
+	 * We are going to perform proper setup of shadow memory.
+	 * At first we should unmap early shadow (clear_pgds() call bellow).
+	 * However, instrumented code couldn't execute without shadow memory.
+	 * tmp_pg_dir used to keep early shadow mapped until full shadow
+	 * setup will be finished.
+	 */
+	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+	cpu_set_ttbr1(__pa(tmp_pg_dir));
+	flush_tlb_all();
+
+	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
+			kasan_mem_to_shadow((void *)MODULES_VADDR));
+
+	for_each_memblock(memory, reg) {
+		void *start = (void *)__phys_to_virt(reg->base);
+		void *end = (void *)__phys_to_virt(reg->base + reg->size);
+
+		if (start >= end)
+			break;
+
+		/*
+		 * end + 1 here is intentional. We check several shadow bytes in
+		 * advance to slightly speed up fastpath. In some rare cases
+		 * we could cross boundary of mapped shadow, so we just map
+		 * some more here.
+		 */
+		vmemmap_populate((unsigned long)kasan_mem_to_shadow(start),
+				(unsigned long)kasan_mem_to_shadow(end) + 1,
+				pfn_to_nid(virt_to_pfn(start)));
+	}
+
+	memset(kasan_zero_page, 0, PAGE_SIZE);
+	cpu_set_ttbr1(__pa(swapper_pg_dir));
+	flush_tlb_all();
+
+	/* At this point kasan is fully initialized. Enable error messages */
+	init_task.kasan_depth = 0;
+	pr_info("KernelAddressSanitizer initialized\n");
+}
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 6fd3da938717..413fcf2970c0 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -1,6 +1,14 @@
 #
 # Makefile for linux kernel
 #
+
+#
+# ARM64 maps efi runtime services in userspace addresses
+# which don't have KASAN shadow. So dereference of these addresses
+# in efi_call_virt() will cause crash if this code instrumented.
+#
+KASAN_SANITIZE_runtime-wrappers.o	:= n
+
 obj-$(CONFIG_EFI)			+= efi.o vars.o reboot.o
 obj-$(CONFIG_EFI_VARS)			+= efivars.o
 obj-$(CONFIG_EFI_ESRT)			+= esrt.o
diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan
index 3f874d24234f..37323b0df374 100644
--- a/scripts/Makefile.kasan
+++ b/scripts/Makefile.kasan
@@ -5,10 +5,12 @@ else
 	call_threshold := 0
 endif
 
+KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET)
+
 CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address
 
 CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \
-		-fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \
+		-fasan-shadow-offset=$(KASAN_SHADOW_OFFSET) \
 		--param asan-stack=1 --param asan-globals=1 \
 		--param asan-instrumentation-with-call-threshold=$(call_threshold))
 
-- 
cgit v1.2.3-58-ga151


From ee7f881b59de4e0e0be250fd0c5d4ade3e30ec34 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 12 Oct 2015 18:52:59 +0300
Subject: ARM64: kasan: print memory assignment

This prints out the virtual memory assigned to KASan in the
boot crawl along with other memory assignments, if and only
if KASan is activated.

Example dmesg from the Juno Development board:

Memory: 1691156K/2080768K available (5465K kernel code, 444K rwdata,
2160K rodata, 340K init, 217K bss, 373228K reserved, 16384K cma-reserved)
Virtual kernel memory layout:
    kasan   : 0xffffff8000000000 - 0xffffff9000000000   (    64 GB)
    vmalloc : 0xffffff9000000000 - 0xffffffbdbfff0000   (   182 GB)
    vmemmap : 0xffffffbdc0000000 - 0xffffffbfc0000000   (     8 GB maximum)
              0xffffffbdc2000000 - 0xffffffbdc3fc0000   (    31 MB actual)
    fixed   : 0xffffffbffabfd000 - 0xffffffbffac00000   (    12 KB)
    PCI I/O : 0xffffffbffae00000 - 0xffffffbffbe00000   (    16 MB)
    modules : 0xffffffbffc000000 - 0xffffffc000000000   (    64 MB)
    memory  : 0xffffffc000000000 - 0xffffffc07f000000   (  2032 MB)
      .init : 0xffffffc0007f5000 - 0xffffffc00084a000   (   340 KB)
      .text : 0xffffffc000080000 - 0xffffffc0007f45b4   (  7634 KB)
      .data : 0xffffffc000850000 - 0xffffffc0008bf200   (   445 KB)

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f5c0680d17d9..7a1f9a05dc82 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -298,6 +298,9 @@ void __init mem_init(void)
 #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
 
 	pr_notice("Virtual kernel memory layout:\n"
+#ifdef CONFIG_KASAN
+		  "    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n"
+#endif
 		  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
@@ -310,6 +313,9 @@ void __init mem_init(void)
 		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
 		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
 		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+#ifdef CONFIG_KASAN
+		  MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
+#endif
 		  MLG(VMALLOC_START, VMALLOC_END),
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  MLG((unsigned long)vmemmap,
-- 
cgit v1.2.3-58-ga151


From 83040123fde42ec532d3b632efb5f7f84024e61d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 13 Oct 2015 14:01:06 +0100
Subject: arm64: kasan: fix issues reported by sparse

Sparse reports some new issues introduced by the kasan patches:

  arch/arm64/mm/kasan_init.c:91:13: warning: no previous prototype for
  'kasan_early_init' [-Wmissing-prototypes] void __init kasan_early_init(void)
             ^
  arch/arm64/mm/kasan_init.c:91:13: warning: symbol 'kasan_early_init'
  was not declared. Should it be static? [sparse]

This patch resolves the problem by adding a prototype for
kasan_early_init and marking the function as asmlinkage, since it's only
called from head.S.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/kasan.h | 2 ++
 arch/arm64/mm/kasan_init.c     | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index 71dfe14acdca..2774fa384c47 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -5,6 +5,7 @@
 
 #ifdef CONFIG_KASAN
 
+#include <linux/linkage.h>
 #include <asm/memory.h>
 
 /*
@@ -27,6 +28,7 @@
 #define KASAN_SHADOW_OFFSET     (KASAN_SHADOW_END - (1ULL << (64 - 3)))
 
 void kasan_init(void);
+asmlinkage void kasan_early_init(void);
 
 #else
 static inline void kasan_init(void) { }
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index b6a92f5dd568..cf038c7d9fa9 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -88,7 +88,7 @@ static void __init kasan_map_early_shadow(void)
 	} while (pgd++, addr = next, addr != end);
 }
 
-void __init kasan_early_init(void)
+asmlinkage void __init kasan_early_init(void)
 {
 	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
 	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
-- 
cgit v1.2.3-58-ga151


From 1b6d7f8742d5d46c478f10c9e57da18d049b116d Mon Sep 17 00:00:00 2001
From: yalin wang <yalin.wang2010@gmail.com>
Date: Mon, 12 Oct 2015 10:28:18 +0800
Subject: arm64: ioremap: add ioremap_cache macro

Add ioremap_cache macro, because some code will test if this macro
is defined or not, and will generate a generric version if not defined,
for example, memremap.c do like this.

Signed-off-by: yalin wang <yalin.wang2010@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/io.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 44be1e03ed65..d6b620c9a112 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -172,6 +172,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 #define ioremap_wc(addr, size)		__ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
 #define ioremap_wt(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 #define iounmap				__iounmap
+#define ioremap_cache                  ioremap_cache
 
 /*
  * io{read,write}{16,32}be() macros
-- 
cgit v1.2.3-58-ga151


From 03875ad52fdde1f110f663470a45d3dcc34b8fef Mon Sep 17 00:00:00 2001
From: yalin wang <yalin.wang2010@gmail.com>
Date: Mon, 12 Oct 2015 14:52:59 +0800
Subject: arm64: add kc_offset_to_vaddr and kc_vaddr_to_offset macro

This patch add kc_offset_to_vaddr() and kc_vaddr_to_offset(),
the default version doesn't work on arm64, because arm64 kernel address
is below the PAGE_OFFSET, like module address and vmemmap address are
all below PAGE_OFFSET address.

Signed-off-by: yalin wang <yalin.wang2010@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index e3b515fbf0c2..83965dd63d2f 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -673,6 +673,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 
 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
+#define	kc_vaddr_to_offset(v) ((v) & ~VA_START)
+#define	kc_offset_to_vaddr(o) ((o) | VA_START)
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */
-- 
cgit v1.2.3-58-ga151


From 52fa1927e953d8b1fbdcf8a19dc75c1499cc3d5d Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 13 Oct 2015 16:18:17 +0100
Subject: Revert "arm64: ioremap: add ioremap_cache macro"

This reverts commit 1b6d7f8742d5d46c478f10c9e57da18d049b116d.

This patch would conflict with Dan Williams' "tree-wide convert to
memremap()" series (ioremap_cache replaced by arch_memremap)

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/io.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index d6b620c9a112..44be1e03ed65 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -172,7 +172,6 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 #define ioremap_wc(addr, size)		__ioremap((addr), (size), __pgprot(PROT_NORMAL_NC))
 #define ioremap_wt(addr, size)		__ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE))
 #define iounmap				__iounmap
-#define ioremap_cache                  ioremap_cache
 
 /*
  * io{read,write}{16,32}be() macros
-- 
cgit v1.2.3-58-ga151


From 7db743c6d8bb7a08396a0ea3084fb9f8ebccc577 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 16 Oct 2015 14:34:50 +0100
Subject: arm64: Minor coding style fixes for kc_offset_to_vaddr and
 kc_vaddr_to_offset

These were introduced by commit 03875ad52fdd (arm64: add
kc_offset_to_vaddr and kc_vaddr_to_offset macro).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 83965dd63d2f..c3d22a507648 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -673,8 +673,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 
 #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
-#define	kc_vaddr_to_offset(v) ((v) & ~VA_START)
-#define	kc_offset_to_vaddr(o) ((o) | VA_START)
+#define kc_vaddr_to_offset(v)	((v) & ~VA_START)
+#define kc_offset_to_vaddr(o)	((o) | VA_START)
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */
-- 
cgit v1.2.3-58-ga151


From 77f3228f771506bac4b1308571a62c769552f8f8 Mon Sep 17 00:00:00 2001
From: Mark Salyzyn <salyzyn@android.com>
Date: Tue, 13 Oct 2015 14:30:51 -0700
Subject: arm64: AArch32 user space PC alignment exception

ARMv7 does not have a PC alignment exception. ARMv8 AArch32
user space however can produce a PC alignment exception. Add
handler so that we do not dump an unexpected stack trace in
the logs.

Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 4306c937b1ff..7ed3d75f6304 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -430,6 +430,8 @@ el0_sync_compat:
 	b.eq	el0_fpsimd_acc
 	cmp	x24, #ESR_ELx_EC_FP_EXC32	// FP/ASIMD exception
 	b.eq	el0_fpsimd_exc
+	cmp	x24, #ESR_ELx_EC_PC_ALIGN	// pc alignment exception
+	b.eq	el0_sp_pc
 	cmp	x24, #ESR_ELx_EC_UNKNOWN	// unknown exception in EL0
 	b.eq	el0_undef
 	cmp	x24, #ESR_ELx_EC_CP15_32	// CP15 MRC/MCR trap
-- 
cgit v1.2.3-58-ga151


From fbc61a26e6b7a2ebc399559ea22df4a35ac05fcb Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Fri, 18 Sep 2015 14:09:00 -0700
Subject: arm64: debug: Fix typo in debug-monitors.c

Fix handers to handlers.

Signed-off-by: Yang Shi <yang.shi@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/debug-monitors.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index cebf78661a55..8eef30f92651 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -201,7 +201,7 @@ void unregister_step_hook(struct step_hook *hook)
 }
 
 /*
- * Call registered single step handers
+ * Call registered single step handlers
  * There is no Syndrome info to check for determining the handler.
  * So we call all the registered handlers, until the right handler is
  * found which returns zero.
-- 
cgit v1.2.3-58-ga151


From 87d1587bef394cd8a77dbca8cc92885fe7041b8f Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:19:27 +0100
Subject: arm64: Move swapper pagetable definitions

Move the kernel pagetable (both swapper and idmap) definitions
from the generic asm/page.h to a new file, asm/kernel-pgtable.h.

This is mostly a cosmetic change, to clean up the asm/page.h to
get rid of the arch specific details which are not needed by the
generic code.

Also renames the symbols to prevent conflicts. e.g,
 	BLOCK_SHIFT => SWAPPER_BLOCK_SHIFT

Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/kernel-pgtable.h | 65 +++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/page.h           | 18 ---------
 arch/arm64/kernel/head.S                | 37 ++++---------------
 arch/arm64/kernel/vmlinux.lds.S         |  1 +
 4 files changed, 74 insertions(+), 47 deletions(-)
 create mode 100644 arch/arm64/include/asm/kernel-pgtable.h

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
new file mode 100644
index 000000000000..3c92fa7bad2b
--- /dev/null
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -0,0 +1,65 @@
+/*
+ * Kernel page table mapping
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ASM_KERNEL_PGTABLE_H
+#define __ASM_KERNEL_PGTABLE_H
+
+/*
+ * The idmap and swapper page tables need some space reserved in the kernel
+ * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
+ * map the kernel. With the 64K page configuration, swapper and idmap need to
+ * map to pte level. The swapper also maps the FDT (see __create_page_tables
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so 3 pages are reserved in all cases.
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
+#else
+#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS - 1)
+#endif
+
+#define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
+#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
+
+/* Initial memory map size */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define SWAPPER_BLOCK_SHIFT	PAGE_SHIFT
+#define SWAPPER_BLOCK_SIZE	PAGE_SIZE
+#define SWAPPER_TABLE_SHIFT	PMD_SHIFT
+#else
+#define SWAPPER_BLOCK_SHIFT	SECTION_SHIFT
+#define SWAPPER_BLOCK_SIZE	SECTION_SIZE
+#define SWAPPER_TABLE_SHIFT	PUD_SHIFT
+#endif
+
+
+/*
+ * Initial memory map attributes.
+ */
+#define SWAPPER_PTE_FLAGS	(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#ifdef CONFIG_ARM64_64K_PAGES
+#define SWAPPER_MM_MMUFLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
+#else
+#define SWAPPER_MM_MMUFLAGS	(PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#endif
+
+
+#endif	/* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 33892ef0172d..da3235494ffd 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -34,24 +34,6 @@
 #define CONT_SIZE		(_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT))
 #define CONT_MASK		(~(CONT_SIZE-1))
 
-/*
- * The idmap and swapper page tables need some space reserved in the kernel
- * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
- * map the kernel. With the 64K page configuration, swapper and idmap need to
- * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information). Note that the number of ID map translation levels
- * could be increased on the fly if system RAM is out of reach for the default
- * VA range, so 3 pages are reserved in all cases.
- */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
-#else
-#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS - 1)
-#endif
-
-#define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
-
 #ifndef __ASSEMBLY__
 
 #include <asm/pgtable-types.h>
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 2a8c1d5b3fae..149fce35fd9a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -29,6 +29,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/cputype.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
 #include <asm/thread_info.h>
 #include <asm/pgtable-hwdef.h>
@@ -46,31 +47,9 @@
 #error TEXT_OFFSET must be less than 2MB
 #endif
 
-#ifdef CONFIG_ARM64_64K_PAGES
-#define BLOCK_SHIFT	PAGE_SHIFT
-#define BLOCK_SIZE	PAGE_SIZE
-#define TABLE_SHIFT	PMD_SHIFT
-#else
-#define BLOCK_SHIFT	SECTION_SHIFT
-#define BLOCK_SIZE	SECTION_SIZE
-#define TABLE_SHIFT	PUD_SHIFT
-#endif
-
 #define KERNEL_START	_text
 #define KERNEL_END	_end
 
-/*
- * Initial memory map attributes.
- */
-#define PTE_FLAGS	PTE_TYPE_PAGE | PTE_AF | PTE_SHARED
-#define PMD_FLAGS	PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S
-
-#ifdef CONFIG_ARM64_64K_PAGES
-#define MM_MMUFLAGS	PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS
-#else
-#define MM_MMUFLAGS	PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS
-#endif
-
 /*
  * Kernel startup entry point.
  * ---------------------------
@@ -293,7 +272,7 @@ ENDPROC(preserve_boot_args)
 	.macro	create_pgd_entry, tbl, virt, tmp1, tmp2
 	create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
 #if SWAPPER_PGTABLE_LEVELS == 3
-	create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
+	create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
 #endif
 	.endm
 
@@ -305,15 +284,15 @@ ENDPROC(preserve_boot_args)
  * Corrupts:	phys, start, end, pstate
  */
 	.macro	create_block_map, tbl, flags, phys, start, end
-	lsr	\phys, \phys, #BLOCK_SHIFT
-	lsr	\start, \start, #BLOCK_SHIFT
+	lsr	\phys, \phys, #SWAPPER_BLOCK_SHIFT
+	lsr	\start, \start, #SWAPPER_BLOCK_SHIFT
 	and	\start, \start, #PTRS_PER_PTE - 1	// table index
-	orr	\phys, \flags, \phys, lsl #BLOCK_SHIFT	// table entry
-	lsr	\end, \end, #BLOCK_SHIFT
+	orr	\phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT	// table entry
+	lsr	\end, \end, #SWAPPER_BLOCK_SHIFT
 	and	\end, \end, #PTRS_PER_PTE - 1		// table end index
 9999:	str	\phys, [\tbl, \start, lsl #3]		// store the entry
 	add	\start, \start, #1			// next entry
-	add	\phys, \phys, #BLOCK_SIZE		// next block
+	add	\phys, \phys, #SWAPPER_BLOCK_SIZE		// next block
 	cmp	\start, \end
 	b.ls	9999b
 	.endm
@@ -350,7 +329,7 @@ __create_page_tables:
 	cmp	x0, x6
 	b.lo	1b
 
-	ldr	x7, =MM_MMUFLAGS
+	ldr	x7, =SWAPPER_MM_MMUFLAGS
 
 	/*
 	 * Create the identity mapping.
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 98073332e2d0..8a5d97b461c0 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -5,6 +5,7 @@
  */
 
 #include <asm-generic/vmlinux.lds.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/page.h>
-- 
cgit v1.2.3-58-ga151


From b433dce056d3814dc4b33e5a8a533d6401ffcfb0 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:19:28 +0100
Subject: arm64: Handle section maps for swapper/idmap

We use section maps with 4K page size to create the swapper/idmaps.
So far we have used !64K or 4K checks to handle the case where we
use the section maps.
This patch adds a new symbol, ARM64_SWAPPER_USES_SECTION_MAPS, to
handle cases where we use section maps, instead of using the page size
symbols.

Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/kernel-pgtable.h | 37 ++++++++++++-----
 arch/arm64/mm/mmu.c                     | 74 +++++++++++++++------------------
 2 files changed, 59 insertions(+), 52 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 3c92fa7bad2b..4e08faabd9e4 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -19,6 +19,19 @@
 #ifndef __ASM_KERNEL_PGTABLE_H
 #define __ASM_KERNEL_PGTABLE_H
 
+
+/*
+ * The linear mapping and the start of memory are both 2M aligned (per
+ * the arm64 booting.txt requirements). Hence we can use section mapping
+ * with 4K (section size = 2M) but not with 16K (section size = 32M) or
+ * 64K (section size = 512M).
+ */
+#ifdef CONFIG_ARM64_4K_PAGES
+#define ARM64_SWAPPER_USES_SECTION_MAPS 1
+#else
+#define ARM64_SWAPPER_USES_SECTION_MAPS 0
+#endif
+
 /*
  * The idmap and swapper page tables need some space reserved in the kernel
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
@@ -28,26 +41,28 @@
  * could be increased on the fly if system RAM is out of reach for the default
  * VA range, so 3 pages are reserved in all cases.
  */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
-#else
+#if ARM64_SWAPPER_USES_SECTION_MAPS
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS - 1)
+#else
+#define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
 #endif
 
 #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
 #define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
 
 /* Initial memory map size */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define SWAPPER_BLOCK_SHIFT	PAGE_SHIFT
-#define SWAPPER_BLOCK_SIZE	PAGE_SIZE
-#define SWAPPER_TABLE_SHIFT	PMD_SHIFT
-#else
+#if ARM64_SWAPPER_USES_SECTION_MAPS
 #define SWAPPER_BLOCK_SHIFT	SECTION_SHIFT
 #define SWAPPER_BLOCK_SIZE	SECTION_SIZE
 #define SWAPPER_TABLE_SHIFT	PUD_SHIFT
+#else
+#define SWAPPER_BLOCK_SHIFT	PAGE_SHIFT
+#define SWAPPER_BLOCK_SIZE	PAGE_SIZE
+#define SWAPPER_TABLE_SHIFT	PMD_SHIFT
 #endif
 
+/* The size of the initial kernel direct mapping */
+#define SWAPPER_INIT_MAP_SIZE	(_AC(1, UL) << SWAPPER_TABLE_SHIFT)
 
 /*
  * Initial memory map attributes.
@@ -55,10 +70,10 @@
 #define SWAPPER_PTE_FLAGS	(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#ifdef CONFIG_ARM64_64K_PAGES
-#define SWAPPER_MM_MMUFLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
-#else
+#if ARM64_SWAPPER_USES_SECTION_MAPS
 #define SWAPPER_MM_MMUFLAGS	(PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
+#else
+#define SWAPPER_MM_MMUFLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
 #endif
 
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index eed6d52f5e54..c2fa6b56613c 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -32,6 +32,7 @@
 
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 #include <asm/sizes.h>
@@ -406,14 +407,11 @@ static void __init map_mem(void)
 	 * memory addressable from the initial direct kernel mapping.
 	 *
 	 * The initial direct kernel mapping, located at swapper_pg_dir, gives
-	 * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from
-	 * PHYS_OFFSET (which must be aligned to 2MB as per
-	 * Documentation/arm64/booting.txt).
+	 * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
+	 * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
+	 * per Documentation/arm64/booting.txt).
 	 */
-	if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
-		limit = PHYS_OFFSET + PMD_SIZE;
-	else
-		limit = PHYS_OFFSET + PUD_SIZE;
+	limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
 	memblock_set_current_limit(limit);
 
 	/* map all the memory banks */
@@ -424,21 +422,24 @@ static void __init map_mem(void)
 		if (start >= end)
 			break;
 
-#ifndef CONFIG_ARM64_64K_PAGES
-		/*
-		 * For the first memory bank align the start address and
-		 * current memblock limit to prevent create_mapping() from
-		 * allocating pte page tables from unmapped memory.
-		 * When 64K pages are enabled, the pte page table for the
-		 * first PGDIR_SIZE is already present in swapper_pg_dir.
-		 */
-		if (start < limit)
-			start = ALIGN(start, PMD_SIZE);
-		if (end < limit) {
-			limit = end & PMD_MASK;
-			memblock_set_current_limit(limit);
+		if (ARM64_SWAPPER_USES_SECTION_MAPS) {
+			/*
+			 * For the first memory bank align the start address and
+			 * current memblock limit to prevent create_mapping() from
+			 * allocating pte page tables from unmapped memory. With
+			 * the section maps, if the first block doesn't end on section
+			 * size boundary, create_mapping() will try to allocate a pte
+			 * page, which may be returned from an unmapped area.
+			 * When section maps are not used, the pte page table for the
+			 * current limit is already present in swapper_pg_dir.
+			 */
+			if (start < limit)
+				start = ALIGN(start, SECTION_SIZE);
+			if (end < limit) {
+				limit = end & SECTION_MASK;
+				memblock_set_current_limit(limit);
+			}
 		}
-#endif
 		__map_memblock(start, end);
 	}
 
@@ -551,12 +552,12 @@ int kern_addr_valid(unsigned long addr)
 	return pfn_valid(pte_pfn(*pte));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-#ifdef CONFIG_ARM64_64K_PAGES
+#if !ARM64_SWAPPER_USES_SECTION_MAPS
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 {
 	return vmemmap_populate_basepages(start, end, node);
 }
-#else	/* !CONFIG_ARM64_64K_PAGES */
+#else	/* !ARM64_SWAPPER_USES_SECTION_MAPS */
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 {
 	unsigned long addr = start;
@@ -691,7 +692,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 {
 	const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
 	pgprot_t prot = PAGE_KERNEL | PTE_RDONLY;
-	int granularity, size, offset;
+	int size, offset;
 	void *dt_virt;
 
 	/*
@@ -717,24 +718,15 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 	 */
 	BUILD_BUG_ON(dt_virt_base % SZ_2M);
 
-	if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) {
-		BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PMD_SHIFT !=
-			     __fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT);
-
-		granularity = PAGE_SIZE;
-	} else {
-		BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> PUD_SHIFT !=
-			     __fix_to_virt(FIX_BTMAP_BEGIN) >> PUD_SHIFT);
-
-		granularity = PMD_SIZE;
-	}
+	BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
+		     __fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
 
-	offset = dt_phys % granularity;
+	offset = dt_phys % SWAPPER_BLOCK_SIZE;
 	dt_virt = (void *)dt_virt_base + offset;
 
 	/* map the first chunk so we can read the size from the header */
-	create_mapping(round_down(dt_phys, granularity), dt_virt_base,
-		       granularity, prot);
+	create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+		       SWAPPER_BLOCK_SIZE, prot);
 
 	if (fdt_check_header(dt_virt) != 0)
 		return NULL;
@@ -743,9 +735,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 	if (size > MAX_FDT_SIZE)
 		return NULL;
 
-	if (offset + size > granularity)
-		create_mapping(round_down(dt_phys, granularity), dt_virt_base,
-			       round_up(offset + size, granularity), prot);
+	if (offset + size > SWAPPER_BLOCK_SIZE)
+		create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+			       round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
 
 	memblock_reserve(dt_phys, size);
 
-- 
cgit v1.2.3-58-ga151


From 686e783869d37935510b4d2c266948677be82665 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:19:29 +0100
Subject: arm64: Introduce helpers for page table levels

Introduce helpers for finding the number of page table
levels required for a given VA width, shift for a particular
page table level.

Convert the existing users to the new helpers. More users
to follow.

Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/pgtable-hwdef.h | 39 +++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 95c1ec04a9ed..d6739e836f7b 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -16,13 +16,46 @@
 #ifndef __ASM_PGTABLE_HWDEF_H
 #define __ASM_PGTABLE_HWDEF_H
 
+/*
+ * Number of page-table levels required to address 'va_bits' wide
+ * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
+ * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence:
+ *
+ *  levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3))
+ *
+ * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d))
+ *
+ * We cannot include linux/kernel.h which defines DIV_ROUND_UP here
+ * due to build issues. So we open code DIV_ROUND_UP here:
+ *
+ *	((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3))
+ *
+ * which gets simplified as :
+ */
+#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3))
+
+/*
+ * Size mapped by an entry at level n ( 0 <= n <= 3)
+ * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits
+ * in the final page. The maximum number of translation levels supported by
+ * the architecture is 4. Hence, starting at at level n, we have further
+ * ((4 - n) - 1) levels of translation excluding the offset within the page.
+ * So, the total number of bits mapped by an entry at level n is :
+ *
+ *  ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT
+ *
+ * Rearranging it a bit we get :
+ *   (4 - n) * (PAGE_SHIFT - 3) + 3
+ */
+#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n)	((PAGE_SHIFT - 3) * (4 - (n)) + 3)
+
 #define PTRS_PER_PTE		(1 << (PAGE_SHIFT - 3))
 
 /*
  * PMD_SHIFT determines the size a level 2 page table entry can map.
  */
 #if CONFIG_PGTABLE_LEVELS > 2
-#define PMD_SHIFT		((PAGE_SHIFT - 3) * 2 + 3)
+#define PMD_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(2)
 #define PMD_SIZE		(_AC(1, UL) << PMD_SHIFT)
 #define PMD_MASK		(~(PMD_SIZE-1))
 #define PTRS_PER_PMD		PTRS_PER_PTE
@@ -32,7 +65,7 @@
  * PUD_SHIFT determines the size a level 1 page table entry can map.
  */
 #if CONFIG_PGTABLE_LEVELS > 3
-#define PUD_SHIFT		((PAGE_SHIFT - 3) * 3 + 3)
+#define PUD_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
 #define PUD_SIZE		(_AC(1, UL) << PUD_SHIFT)
 #define PUD_MASK		(~(PUD_SIZE-1))
 #define PTRS_PER_PUD		PTRS_PER_PTE
@@ -42,7 +75,7 @@
  * PGDIR_SHIFT determines the size a top-level page table entry can map
  * (depending on the configuration, this level can be 0, 1 or 2).
  */
-#define PGDIR_SHIFT		((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3)
+#define PGDIR_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
 #define PGDIR_SIZE		(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
 #define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
-- 
cgit v1.2.3-58-ga151


From c265af51c5f17ec5c0cf0d960133bd6fe5f39eb9 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:19:30 +0100
Subject: arm64: Calculate size for idmap_pg_dir at compile time

Now that we can calculate the number of levels required for
mapping a va width, reserve exact number of pages that would
be required to cover the idmap. The idmap should be able to handle
the maximum physical address size supported.

Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/kernel-pgtable.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 4e08faabd9e4..a459714ee29e 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -39,16 +39,19 @@
  * map to pte level. The swapper also maps the FDT (see __create_page_tables
  * for more information). Note that the number of ID map translation levels
  * could be increased on the fly if system RAM is out of reach for the default
- * VA range, so 3 pages are reserved in all cases.
+ * VA range, so pages required to map highest possible PA are reserved in all
+ * cases.
  */
 #if ARM64_SWAPPER_USES_SECTION_MAPS
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS - 1)
+#define IDMAP_PGTABLE_LEVELS	(ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
 #else
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS)
+#define IDMAP_PGTABLE_LEVELS	(ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
 #endif
 
 #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
+#define IDMAP_DIR_SIZE		(IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
 
 /* Initial memory map size */
 #if ARM64_SWAPPER_USES_SECTION_MAPS
-- 
cgit v1.2.3-58-ga151


From 6a3fd4026c0c0ac279265c2a5d228233b5bbd28f Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:19:31 +0100
Subject: arm64: Handle 4 level page table for swapper

At the moment, we only support maximum of 3-level page table for
swapper. With 48bit VA, 64K has only 3 levels and 4K uses section
mapping. Add support for 4-level page table for swapper, needed
by 16K pages.

Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/head.S | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 149fce35fd9a..7ace955b0f31 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -271,7 +271,10 @@ ENDPROC(preserve_boot_args)
  */
 	.macro	create_pgd_entry, tbl, virt, tmp1, tmp2
 	create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
-#if SWAPPER_PGTABLE_LEVELS == 3
+#if SWAPPER_PGTABLE_LEVELS > 3
+	create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2
+#endif
+#if SWAPPER_PGTABLE_LEVELS > 2
 	create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
 #endif
 	.endm
-- 
cgit v1.2.3-58-ga151


From 755e70b7e3f189aa2503c510fb98208e477a5030 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:19:32 +0100
Subject: arm64: Clean config usages for page size

We use !CONFIG_ARM64_64K_PAGES for CONFIG_ARM64_4K_PAGES
(and vice versa) in code. It all worked well, so far since
we only had two options. Now, with the introduction of 16K,
these cases will break. This patch cleans up the code to
use the required CONFIG symbol expression without the assumption
that !64K => 4K (and vice versa)

Cc: Will Deacon <will.deacon@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig                   | 4 ++--
 arch/arm64/Kconfig.debug             | 2 +-
 arch/arm64/include/asm/thread_info.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 2782c1189208..4654c2761b7c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -466,7 +466,7 @@ config ARCH_WANT_GENERAL_HUGETLB
 	def_bool y
 
 config ARCH_WANT_HUGE_PMD_SHARE
-	def_bool y if !ARM64_64K_PAGES
+	def_bool y if ARM64_4K_PAGES
 
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	def_bool y
@@ -678,7 +678,7 @@ source "fs/Kconfig.binfmt"
 
 config COMPAT
 	bool "Kernel support for 32-bit EL0"
-	depends on !ARM64_64K_PAGES || EXPERT
+	depends on ARM64_4K_PAGES || EXPERT
 	select COMPAT_BINFMT_ELF
 	select HAVE_UID16
 	select OLD_SIGSUSPEND3
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index d6285ef9b5f9..c24d6adc0420 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -77,7 +77,7 @@ config DEBUG_RODATA
           If in doubt, say Y
 
 config DEBUG_ALIGN_RODATA
-	depends on DEBUG_RODATA && !ARM64_64K_PAGES
+	depends on DEBUG_RODATA && ARM64_4K_PAGES
 	bool "Align linker sections up to SECTION_SIZE"
 	help
 	  If this option is enabled, sections that may potentially be marked as
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 555c6dec5ef2..5eac6a2300af 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -23,7 +23,7 @@
 
 #include <linux/compiler.h>
 
-#ifndef CONFIG_ARM64_64K_PAGES
+#ifdef CONFIG_ARM64_4K_PAGES
 #define THREAD_SIZE_ORDER	2
 #endif
 
-- 
cgit v1.2.3-58-ga151


From e25781e3dea914cc4c34c946bae5fa3d4516516f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 19 Oct 2015 14:19:33 +0100
Subject: arm64: Simplify NR_FIX_BTMAPS calculation

We choose NR_FIX_BTMAPS such that each slot (NR_FIX_BTMAPS * PAGE_SIZE)
can address 256K.

Use division to derive NR_FIX_BTMAPS rather than defining it for each
page size.

Signed-off-by:  Mark Rutland <mark.rutland@arm.com>
Signed-off-by:  Suzuki K. Poulose <suzuki.poulose@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/fixmap.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 8b9884c726ad..309704544d22 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -17,6 +17,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
+#include <linux/sizes.h>
 #include <asm/boot.h>
 #include <asm/page.h>
 
@@ -55,11 +56,7 @@ enum fixed_addresses {
 	 * Temporary boot-time mappings, used by early_ioremap(),
 	 * before ioremap() is functional.
 	 */
-#ifdef CONFIG_ARM64_64K_PAGES
-#define NR_FIX_BTMAPS		4
-#else
-#define NR_FIX_BTMAPS		64
-#endif
+#define NR_FIX_BTMAPS		(SZ_256K / PAGE_SIZE)
 #define FIX_BTMAPS_SLOTS	7
 #define TOTAL_FIX_BTMAPS	(NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
 
-- 
cgit v1.2.3-58-ga151


From db488be354bc85724d7b9523e94435fdaa761a35 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:19:34 +0100
Subject: arm64: Kconfig: Fix help text about AArch32 support with 64K pages

Update the help text for ARM64_64K_PAGES to reflect the reality
about AArch32 support.

Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4654c2761b7c..13b51a0c4e7f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -368,8 +368,8 @@ config ARM64_64K_PAGES
 	help
 	  This feature enables 64KB pages support (4KB by default)
 	  allowing only two levels of page tables and faster TLB
-	  look-up. AArch32 emulation is not available when this feature
-	  is enabled.
+	  look-up. AArch32 emulation requires applications compiled
+	  with 64K aligned segments.
 
 endchoice
 
-- 
cgit v1.2.3-58-ga151


From 4bf8b96ed3f7e11422d8b4f58cf43896ed02d1f6 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:19:35 +0100
Subject: arm64: Check for selected granule support

Ensure that the selected page size is supported by the CPU(s). If it doesn't
park it.

Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/sysreg.h | 20 ++++++++++++++++++++
 arch/arm64/kernel/head.S        | 17 +++++++++++++++--
 2 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index a7f3d4b2514d..d59cb231a673 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -44,6 +44,26 @@
 #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
 				     (!!x)<<8 | 0x1f)
 
+
+#define ID_AA64MMFR0_TGRAN4_SHIFT	28
+#define ID_AA64MMFR0_TGRAN64_SHIFT	24
+#define ID_AA64MMFR0_TGRAN16_SHIFT	20
+
+#define ID_AA64MMFR0_TGRAN4_NI		0xf
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED	0x0
+#define ID_AA64MMFR0_TGRAN64_NI		0xf
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0
+#define ID_AA64MMFR0_TGRAN16_NI		0x0
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1
+
+#if defined(CONFIG_ARM64_4K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT	ID_AA64MMFR0_TGRAN4_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED	ID_AA64MMFR0_TGRAN4_SUPPORTED
+#elif defined(CONFIG_ARM64_64K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT	ID_AA64MMFR0_TGRAN64_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED	ID_AA64MMFR0_TGRAN64_SUPPORTED
+#endif
+
 #ifdef __ASSEMBLY__
 
 	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 7ace955b0f31..514c1cc9fdc5 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -31,10 +31,11 @@
 #include <asm/cputype.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
-#include <asm/thread_info.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
+#include <asm/sysreg.h>
+#include <asm/thread_info.h>
 #include <asm/virt.h>
 
 #define __PHYS_OFFSET	(KERNEL_START - TEXT_OFFSET)
@@ -613,10 +614,17 @@ ENDPROC(__secondary_switched)
  *  x0  = SCTLR_EL1 value for turning on the MMU.
  *  x27 = *virtual* address to jump to upon completion
  *
- * other registers depend on the function called upon completion
+ * Other registers depend on the function called upon completion.
+ *
+ * Checks if the selected granule size is supported by the CPU.
+ * If it isn't, park the CPU
  */
 	.section	".idmap.text", "ax"
 __enable_mmu:
+	mrs	x1, ID_AA64MMFR0_EL1
+	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
+	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
+	b.ne	__no_granule_support
 	ldr	x5, =vectors
 	msr	vbar_el1, x5
 	msr	ttbr0_el1, x25			// load TTBR0
@@ -634,3 +642,8 @@ __enable_mmu:
 	isb
 	br	x27
 ENDPROC(__enable_mmu)
+
+__no_granule_support:
+	wfe
+	b __no_granule_support
+ENDPROC(__no_granule_support)
-- 
cgit v1.2.3-58-ga151


From 9d372c9fab34cd8803141871195141995f85c7f7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 19 Oct 2015 14:19:36 +0100
Subject: arm64: Add page size to the kernel image header

This patch adds the page size to the arm64 kernel image header
so that one can infer the PAGESIZE used by the kernel. This will
be helpful to diagnose failures to boot the kernel with page size
not supported by the CPU.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/booting.txt | 7 ++++++-
 arch/arm64/kernel/image.h       | 5 ++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index 7d9d3c2286b2..aaf6d77e4148 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt
@@ -104,7 +104,12 @@ Header notes:
 - The flags field (introduced in v3.17) is a little-endian 64-bit field
   composed as follows:
   Bit 0:	Kernel endianness.  1 if BE, 0 if LE.
-  Bits 1-63:	Reserved.
+  Bit 1-2:	Kernel Page size.
+			0 - Unspecified.
+			1 - 4K
+			2 - 16K
+			3 - 64K
+  Bits 3-63:	Reserved.
 
 - When image_size is zero, a bootloader should attempt to keep as much
   memory as possible free for use by the kernel immediately after the
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 6eb8fee93321..bc2abb8b1599 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -47,7 +47,10 @@
 #define __HEAD_FLAG_BE	0
 #endif
 
-#define __HEAD_FLAGS	(__HEAD_FLAG_BE << 0)
+#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+
+#define __HEAD_FLAGS	((__HEAD_FLAG_BE << 0) |	\
+			 (__HEAD_FLAG_PAGE_SIZE << 1))
 
 /*
  * These will output as part of the Image header, which should be little-endian
-- 
cgit v1.2.3-58-ga151


From 44eaacf1b8999b15cec89bd9d9cd989da4798d53 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:19:37 +0100
Subject: arm64: Add 16K page size support

This patch turns on the 16K page support in the kernel. We
support 48bit VA (4 level page tables) and 47bit VA (3 level
page tables).

With 16K we can map 128 entries using contiguous bit hint
at level 3 to map 2M using single TLB entry.

TODO: 16K supports 32 contiguous entries at level 2 to get us
1G(which is not yet supported by the infrastructure). That should
be a separate patch altogether.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Steve Capper <steve.capper@linaro.org>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig                   | 42 +++++++++++++++++++++++++++++++-----
 arch/arm64/include/asm/page.h        |  3 +++
 arch/arm64/include/asm/sysreg.h      |  3 +++
 arch/arm64/include/asm/thread_info.h |  2 ++
 arch/arm64/kvm/Kconfig               |  3 +++
 arch/arm64/mm/proc.S                 |  4 +++-
 6 files changed, 51 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 13b51a0c4e7f..854166422c42 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -173,7 +173,8 @@ config PGTABLE_LEVELS
 	default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
 	default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
 	default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
-	default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48
+	default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
+	default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
 
 source "init/Kconfig"
 
@@ -363,6 +364,13 @@ config ARM64_4K_PAGES
 	help
 	  This feature enables 4KB pages support.
 
+config ARM64_16K_PAGES
+	bool "16KB"
+	help
+	  The system will use 16KB pages support. AArch32 emulation
+	  requires applications compiled with 16K (or a multiple of 16K)
+	  aligned segments.
+
 config ARM64_64K_PAGES
 	bool "64KB"
 	help
@@ -376,6 +384,7 @@ endchoice
 choice
 	prompt "Virtual address space size"
 	default ARM64_VA_BITS_39 if ARM64_4K_PAGES
+	default ARM64_VA_BITS_47 if ARM64_16K_PAGES
 	default ARM64_VA_BITS_42 if ARM64_64K_PAGES
 	help
 	  Allows choosing one of multiple possible virtual address
@@ -390,6 +399,10 @@ config ARM64_VA_BITS_42
 	bool "42-bit"
 	depends on ARM64_64K_PAGES
 
+config ARM64_VA_BITS_47
+	bool "47-bit"
+	depends on ARM64_16K_PAGES
+
 config ARM64_VA_BITS_48
 	bool "48-bit"
 
@@ -399,6 +412,7 @@ config ARM64_VA_BITS
 	int
 	default 39 if ARM64_VA_BITS_39
 	default 42 if ARM64_VA_BITS_42
+	default 47 if ARM64_VA_BITS_47
 	default 48 if ARM64_VA_BITS_48
 
 config CPU_BIG_ENDIAN
@@ -466,7 +480,7 @@ config ARCH_WANT_GENERAL_HUGETLB
 	def_bool y
 
 config ARCH_WANT_HUGE_PMD_SHARE
-	def_bool y if ARM64_4K_PAGES
+	def_bool y if ARM64_4K_PAGES || ARM64_16K_PAGES
 
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	def_bool y
@@ -503,7 +517,25 @@ config XEN
 config FORCE_MAX_ZONEORDER
 	int
 	default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
+	default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE)
 	default "11"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 11 means that the largest free memory block is 2^10 pages.
+
+	  We make sure that we can allocate upto a HugePage size for each configuration.
+	  Hence we have :
+		MAX_ORDER = (PMD_SHIFT - PAGE_SHIFT) + 1 => PAGE_SHIFT - 2
+
+	  However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
+	  4M allocations matching the default size used by generic code.
 
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
@@ -689,9 +721,9 @@ config COMPAT
 	  the user helper functions, VFP support and the ptrace interface are
 	  handled appropriately by the kernel.
 
-	  If you also enabled CONFIG_ARM64_64K_PAGES, please be aware that you
-	  will only be able to execute AArch32 binaries that were compiled with
-	  64k aligned segments.
+	  If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
+	  that you will only be able to execute AArch32 binaries that were compiled
+	  with page size aligned segments.
 
 	  If you want to execute 32-bit userspace applications, say Y.
 
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index da3235494ffd..9b2f5a9d019d 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -24,6 +24,9 @@
 #ifdef CONFIG_ARM64_64K_PAGES
 #define PAGE_SHIFT		16
 #define CONT_SHIFT		5
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define PAGE_SHIFT		14
+#define CONT_SHIFT		7
 #else
 #define PAGE_SHIFT		12
 #define CONT_SHIFT		4
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d59cb231a673..4b57e1080538 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -59,6 +59,9 @@
 #if defined(CONFIG_ARM64_4K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT	ID_AA64MMFR0_TGRAN4_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED	ID_AA64MMFR0_TGRAN4_SUPPORTED
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define ID_AA64MMFR0_TGRAN_SHIFT	ID_AA64MMFR0_TGRAN16_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED	ID_AA64MMFR0_TGRAN16_SUPPORTED
 #elif defined(CONFIG_ARM64_64K_PAGES)
 #define ID_AA64MMFR0_TGRAN_SHIFT	ID_AA64MMFR0_TGRAN64_SHIFT
 #define ID_AA64MMFR0_TGRAN_SUPPORTED	ID_AA64MMFR0_TGRAN64_SUPPORTED
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 5eac6a2300af..90c7ff233735 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -25,6 +25,8 @@
 
 #ifdef CONFIG_ARM64_4K_PAGES
 #define THREAD_SIZE_ORDER	2
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define THREAD_SIZE_ORDER	0
 #endif
 
 #define THREAD_SIZE		16384
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 5c7e920e4861..6a7d5cd772e6 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -19,6 +19,7 @@ if VIRTUALIZATION
 config KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	depends on OF
+	depends on !ARM64_16K_PAGES
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
@@ -33,6 +34,8 @@ config KVM
 	select HAVE_KVM_IRQFD
 	---help---
 	  Support hosting virtualized guest machines.
+	  We don't support KVM with 16K page tables yet, due to the multiple
+	  levels of fake page tables.
 
 	  If unsure, say N.
 
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 91cb2eaac256..3a4b8b19978b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -30,7 +30,9 @@
 
 #ifdef CONFIG_ARM64_64K_PAGES
 #define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
-#else
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define TCR_TG_FLAGS	TCR_TG0_16K | TCR_TG1_16K
+#else /* CONFIG_ARM64_4K_PAGES */
 #define TCR_TG_FLAGS	TCR_TG0_4K | TCR_TG1_4K
 #endif
 
-- 
cgit v1.2.3-58-ga151


From 215399392fe40f33880ea9de49a1ed8ee26edd10 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:19:38 +0100
Subject: arm64: 36 bit VA

36bit VA lets us use 2 level page tables while limiting the
available address space to 64GB.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Steve Capper <steve.capper@linaro.org>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 854166422c42..a346671d474e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -170,6 +170,7 @@ config FIX_EARLYCON_MEM
 
 config PGTABLE_LEVELS
 	int
+	default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36
 	default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42
 	default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48
 	default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39
@@ -391,6 +392,10 @@ choice
 	  space sizes. The level of translation table is determined by
 	  a combination of page size and virtual address space size.
 
+config ARM64_VA_BITS_36
+	bool "36-bit"
+	depends on ARM64_16K_PAGES
+
 config ARM64_VA_BITS_39
 	bool "39-bit"
 	depends on ARM64_4K_PAGES
@@ -410,6 +415,7 @@ endchoice
 
 config ARM64_VA_BITS
 	int
+	default 36 if ARM64_VA_BITS_36
 	default 39 if ARM64_VA_BITS_39
 	default 42 if ARM64_VA_BITS_42
 	default 47 if ARM64_VA_BITS_47
@@ -480,7 +486,7 @@ config ARCH_WANT_GENERAL_HUGETLB
 	def_bool y
 
 config ARCH_WANT_HUGE_PMD_SHARE
-	def_bool y if ARM64_4K_PAGES || ARM64_16K_PAGES
+	def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
 
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	def_bool y
-- 
cgit v1.2.3-58-ga151


From 096b3224d5e7239ec3e5033bbc7612ac2d5dec3a Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Wed, 16 Sep 2015 22:23:21 +0800
Subject: arm64: add cpu_idle tracepoints to arch_cpu_idle

Currently, if cpuidle is disabled or not supported, powertop reports
zero wakeups and zero events. This is due to the cpu_idle tracepoints
are missing.

This patch is to make cpu_idle tracepoints always available even if
cpuidle is disabled or not supported.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/process.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 223b093c9440..f75b540bc3b4 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -44,6 +44,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/personality.h>
 #include <linux/notifier.h>
+#include <trace/events/power.h>
 
 #include <asm/compat.h>
 #include <asm/cacheflush.h>
@@ -75,8 +76,10 @@ void arch_cpu_idle(void)
 	 * This should do all the clock switching and wait for interrupt
 	 * tricks
 	 */
+	trace_cpu_idle_rcuidle(1, smp_processor_id());
 	cpu_do_idle();
 	local_irq_enable();
+	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-- 
cgit v1.2.3-58-ga151


From 9f93f3e9461a30f425fdba15784db67ce878ce00 Mon Sep 17 00:00:00 2001
From: Jungseok Lee <jungseoklee85@gmail.com>
Date: Sat, 17 Oct 2015 14:28:11 +0000
Subject: arm64: Synchronise dump_backtrace() with perf callchain

Unlike perf callchain relying on walk_stackframe(), dump_backtrace()
has its own backtrace logic. A major difference between them is the
moment a symbol is recorded. Perf writes down a symbol *before*
calling unwind_frame(), but dump_backtrace() prints it out *after*
unwind_frame(). As a result, the last valid symbol cannot be hooked
in case of dump_backtrace(). This patch addresses the issue as
synchronising dump_backtrace() with perf callchain.

A simple test and its results are as follows:

- crash trigger

 $ sudo echo c > /proc/sysrq-trigger

- current status

 Call trace:
 [<fffffe00003dc738>] sysrq_handle_crash+0x24/0x30
 [<fffffe00003dd2ac>] __handle_sysrq+0x128/0x19c
 [<fffffe00003dd730>] write_sysrq_trigger+0x60/0x74
 [<fffffe0000249fc4>] proc_reg_write+0x84/0xc0
 [<fffffe00001f2638>] __vfs_write+0x44/0x104
 [<fffffe00001f2e60>] vfs_write+0x98/0x1a8
 [<fffffe00001f3730>] SyS_write+0x50/0xb0

- with this change

 Call trace:
 [<fffffe00003dc738>] sysrq_handle_crash+0x24/0x30
 [<fffffe00003dd2ac>] __handle_sysrq+0x128/0x19c
 [<fffffe00003dd730>] write_sysrq_trigger+0x60/0x74
 [<fffffe0000249fc4>] proc_reg_write+0x84/0xc0
 [<fffffe00001f2638>] __vfs_write+0x44/0x104
 [<fffffe00001f2e60>] vfs_write+0x98/0x1a8
 [<fffffe00001f3730>] SyS_write+0x50/0xb0
 [<fffffe00000939ec>] el0_svc_naked+0x20/0x28

Note that this patch does not cover a case where MMU is disabled. The
last stack frame of swapper, for example, has PC in a form of physical
address. Unfortunately, a simple conversion using phys_to_virt() cannot
cover all scenarios since PC is retrieved from LR - 4, not LR. It is
a big tradeoff to change both head.S and unwind_frame() for only a few
of symbols in *.S. Thus, this hunk does not take care of the case.

Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: James Morse <james.morse@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Jungseok Lee <jungseoklee85@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/traps.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index f93aae5e4307..e9b9b5364393 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -103,12 +103,12 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
 	set_fs(fs);
 }
 
-static void dump_backtrace_entry(unsigned long where, unsigned long stack)
+static void dump_backtrace_entry(unsigned long where)
 {
+	/*
+	 * Note that 'where' can have a physical address, but it's not handled.
+	 */
 	print_ip_sym(where);
-	if (in_exception_text(where))
-		dump_mem("", "Exception stack", stack,
-			 stack + sizeof(struct pt_regs), false);
 }
 
 static void dump_instr(const char *lvl, struct pt_regs *regs)
@@ -172,12 +172,17 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	pr_emerg("Call trace:\n");
 	while (1) {
 		unsigned long where = frame.pc;
+		unsigned long stack;
 		int ret;
 
+		dump_backtrace_entry(where);
 		ret = unwind_frame(&frame);
 		if (ret < 0)
 			break;
-		dump_backtrace_entry(where, frame.sp);
+		stack = frame.sp;
+		if (in_exception_text(where))
+			dump_mem("", "Exception stack", stack,
+				 stack + sizeof(struct pt_regs), false);
 	}
 }
 
-- 
cgit v1.2.3-58-ga151


From 56a3f30e029396948989b96716f27b87e3510e0f Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 20 Oct 2015 14:59:20 +0100
Subject: arm64: Make 36-bit VA depend on EXPERT

Commit 215399392fe4 (arm64: 36 bit VA) introduced 36-bit VA support for
the arm64 kernel when the 16KB page configuration is enabled. While this
is a valid hardware configuration, it's not something we want to
encourage since it reduces the memory (and I/O) range that the kernel
can access. Make this depend on EXPERT to avoid complaints of Linux not
mapping the whole RAM, especially on platforms following the ARM
recommended memory map.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a346671d474e..c62fb0393567 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -393,7 +393,7 @@ choice
 	  a combination of page size and virtual address space size.
 
 config ARM64_VA_BITS_36
-	bool "36-bit"
+	bool "36-bit" if EXPERT
 	depends on ARM64_16K_PAGES
 
 config ARM64_VA_BITS_39
-- 
cgit v1.2.3-58-ga151


From 64f17818977d0989f7d05347670777611b295799 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:38 +0100
Subject: arm64: Make the CPU information more clear

At early boot, we print the CPU version/revision. On a heterogeneous
system, we could have different types of CPUs. Print the CPU info for
all active cpus. Also, the secondary CPUs prints the message only when
they turn online.

Also, remove the redundant 'revision' information which doesn't
make any sense without the 'variant' field.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/setup.c | 3 +--
 arch/arm64/kernel/smp.c   | 3 ++-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 79df79a2ea61..1d503e2d6957 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -203,8 +203,7 @@ static void __init setup_processor(void)
 	u32 cwg;
 	int cls;
 
-	printk("CPU: AArch64 Processor [%08x] revision %d\n",
-	       read_cpuid_id(), read_cpuid_id() & 15);
+	pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
 
 	sprintf(init_utsname()->machine, ELF_PLATFORM);
 	elf_hwcap = 0;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b7a973d6861e..c1d044b52f42 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -144,7 +144,6 @@ asmlinkage void secondary_start_kernel(void)
 	current->active_mm = mm;
 
 	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-	printk("CPU%u: Booted secondary processor\n", cpu);
 
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
@@ -177,6 +176,8 @@ asmlinkage void secondary_start_kernel(void)
 	 * the CPU migration code to notice that the CPU is online
 	 * before we continue.
 	 */
+	pr_info("CPU%u: Booted secondary processor [%08x]\n",
+					 cpu, read_cpuid_id());
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);
 
-- 
cgit v1.2.3-58-ga151


From 3a75578efae64b94d76eacbf8adf2a3ab13c6aa1 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:39 +0100
Subject: arm64: Delay ELF HWCAP initialisation until all CPUs are up

Delay the ELF HWCAP initialisation until all the (enabled) CPUs are
up, i.e, smp_cpus_done(). This is in preparation for detecting the
common features across the CPUS and creating a consistent ELF HWCAP
for the system.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h |  1 +
 arch/arm64/kernel/setup.c           | 16 ++++++++--------
 arch/arm64/kernel/smp.c             |  1 +
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 171570702bb8..b7769f698a75 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -81,6 +81,7 @@ static inline int __attribute_const__ cpuid_feature_extract_field(u64 features,
 	return (s64)(features << (64 - 4 - field)) >> (64 - 4);
 }
 
+void __init setup_cpu_features(void);
 
 void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info);
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 1d503e2d6957..4f0408e1df0d 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -196,20 +196,13 @@ static void __init smp_build_mpidr_hash(void)
 	__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
 }
 
-static void __init setup_processor(void)
+void __init setup_cpu_features(void)
 {
 	u64 features;
 	s64 block;
 	u32 cwg;
 	int cls;
 
-	pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
-
-	sprintf(init_utsname()->machine, ELF_PLATFORM);
-	elf_hwcap = 0;
-
-	cpuinfo_store_boot_cpu();
-
 	/*
 	 * Check for sane CTR_EL0.CWG value.
 	 */
@@ -293,6 +286,13 @@ static void __init setup_processor(void)
 #endif
 }
 
+static void __init setup_processor(void)
+{
+	pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
+	sprintf(init_utsname()->machine, ELF_PLATFORM);
+	cpuinfo_store_boot_cpu();
+}
+
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
 {
 	void *dt_virt = fixmap_remap_fdt(dt_phys);
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index c1d044b52f42..77763d9cb510 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -320,6 +320,7 @@ static void __init hyp_mode_check(void)
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
+	setup_cpu_features();
 	hyp_mode_check();
 	apply_alternatives_all();
 }
-- 
cgit v1.2.3-58-ga151


From 4b998ff1885eecd3dc330bf057e24667c1db84a4 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:40 +0100
Subject: arm64: Delay cpuinfo_store_boot_cpu

At the moment the boot CPU stores the cpuinfo long before the
PERCPU areas are initialised by the kernel. This could be problematic
as the non-boot CPU data structures might get copied with the data
from the boot CPU, giving us no chance to detect if a particular CPU
updated its cpuinfo. This patch delays the boot cpu store to
smp_prepare_boot_cpu().

Also kills the setup_processor() which no longer does meaningful
work.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/setup.c | 10 ++--------
 arch/arm64/kernel/smp.c   |  1 +
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 4f0408e1df0d..47e005141598 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -286,13 +286,6 @@ void __init setup_cpu_features(void)
 #endif
 }
 
-static void __init setup_processor(void)
-{
-	pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
-	sprintf(init_utsname()->machine, ELF_PLATFORM);
-	cpuinfo_store_boot_cpu();
-}
-
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
 {
 	void *dt_virt = fixmap_remap_fdt(dt_phys);
@@ -404,8 +397,9 @@ u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID };
 
 void __init setup_arch(char **cmdline_p)
 {
-	setup_processor();
+	pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
 
+	sprintf(init_utsname()->machine, ELF_PLATFORM);
 	init_mm.start_code = (unsigned long) _text;
 	init_mm.end_code   = (unsigned long) _etext;
 	init_mm.end_data   = (unsigned long) _edata;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 77763d9cb510..d1d00499f0cf 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -327,6 +327,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_prepare_boot_cpu(void)
 {
+	cpuinfo_store_boot_cpu();
 	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 }
 
-- 
cgit v1.2.3-58-ga151


From 9cdf8ec4a86b9310111f741bbaf11df9120e0482 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:41 +0100
Subject: arm64: Move cpu feature detection code

This patch moves the CPU feature detection code from
 arch/arm64/kernel/{setup.c to cpufeature.c}

The plan is to consolidate all the CPU feature handling
in cpufeature.c.

Apart from changing pr_fmt from "alternatives" to "cpu features",
there are no functional changes.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 110 ++++++++++++++++++++++++++++++++++++++++-
 arch/arm64/kernel/setup.c      | 107 ---------------------------------------
 2 files changed, 109 insertions(+), 108 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 3c9aed32f70b..e49be15c7ff9 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -16,13 +16,31 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#define pr_fmt(fmt) "alternatives: " fmt
+#define pr_fmt(fmt) "CPU features: " fmt
 
 #include <linux/types.h>
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
 
+unsigned long elf_hwcap __read_mostly;
+EXPORT_SYMBOL_GPL(elf_hwcap);
+
+#ifdef CONFIG_COMPAT
+#define COMPAT_ELF_HWCAP_DEFAULT	\
+				(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
+				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
+				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
+				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
+				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
+				 COMPAT_HWCAP_LPAE)
+unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
+unsigned int compat_elf_hwcap2 __read_mostly;
+#endif
+
+DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+
+
 static bool
 feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 {
@@ -100,3 +118,93 @@ void check_local_cpu_features(void)
 {
 	check_cpu_capabilities(arm64_features, "detected feature:");
 }
+
+void __init setup_cpu_features(void)
+{
+	u64 features;
+	s64 block;
+	u32 cwg;
+	int cls;
+
+	/*
+	 * Check for sane CTR_EL0.CWG value.
+	 */
+	cwg = cache_type_cwg();
+	cls = cache_line_size();
+	if (!cwg)
+		pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
+			cls);
+	if (L1_CACHE_BYTES < cls)
+		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
+			L1_CACHE_BYTES, cls);
+
+	/*
+	 * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
+	 * The blocks we test below represent incremental functionality
+	 * for non-negative values. Negative values are reserved.
+	 */
+	features = read_cpuid(ID_AA64ISAR0_EL1);
+	block = cpuid_feature_extract_field(features, 4);
+	if (block > 0) {
+		switch (block) {
+		default:
+		case 2:
+			elf_hwcap |= HWCAP_PMULL;
+		case 1:
+			elf_hwcap |= HWCAP_AES;
+		case 0:
+			break;
+		}
+	}
+
+	if (cpuid_feature_extract_field(features, 8) > 0)
+		elf_hwcap |= HWCAP_SHA1;
+
+	if (cpuid_feature_extract_field(features, 12) > 0)
+		elf_hwcap |= HWCAP_SHA2;
+
+	if (cpuid_feature_extract_field(features, 16) > 0)
+		elf_hwcap |= HWCAP_CRC32;
+
+	block = cpuid_feature_extract_field(features, 20);
+	if (block > 0) {
+		switch (block) {
+		default:
+		case 2:
+			elf_hwcap |= HWCAP_ATOMICS;
+		case 1:
+			/* RESERVED */
+		case 0:
+			break;
+		}
+	}
+
+#ifdef CONFIG_COMPAT
+	/*
+	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to
+	 * the AArch32 32-bit execution state.
+	 */
+	features = read_cpuid(ID_ISAR5_EL1);
+	block = cpuid_feature_extract_field(features, 4);
+	if (block > 0) {
+		switch (block) {
+		default:
+		case 2:
+			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
+		case 1:
+			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
+		case 0:
+			break;
+		}
+	}
+
+	if (cpuid_feature_extract_field(features, 8) > 0)
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
+
+	if (cpuid_feature_extract_field(features, 12) > 0)
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
+
+	if (cpuid_feature_extract_field(features, 16) > 0)
+		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
+#endif
+}
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 47e005141598..baf0da8c7d8a 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -65,23 +65,6 @@
 #include <asm/efi.h>
 #include <asm/xen/hypervisor.h>
 
-unsigned long elf_hwcap __read_mostly;
-EXPORT_SYMBOL_GPL(elf_hwcap);
-
-#ifdef CONFIG_COMPAT
-#define COMPAT_ELF_HWCAP_DEFAULT	\
-				(COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\
-				 COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\
-				 COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\
-				 COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\
-				 COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\
-				 COMPAT_HWCAP_LPAE)
-unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT;
-unsigned int compat_elf_hwcap2 __read_mostly;
-#endif
-
-DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
-
 phys_addr_t __fdt_pointer __initdata;
 
 /*
@@ -196,96 +179,6 @@ static void __init smp_build_mpidr_hash(void)
 	__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
 }
 
-void __init setup_cpu_features(void)
-{
-	u64 features;
-	s64 block;
-	u32 cwg;
-	int cls;
-
-	/*
-	 * Check for sane CTR_EL0.CWG value.
-	 */
-	cwg = cache_type_cwg();
-	cls = cache_line_size();
-	if (!cwg)
-		pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n",
-			cls);
-	if (L1_CACHE_BYTES < cls)
-		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
-			L1_CACHE_BYTES, cls);
-
-	/*
-	 * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
-	 * The blocks we test below represent incremental functionality
-	 * for non-negative values. Negative values are reserved.
-	 */
-	features = read_cpuid(ID_AA64ISAR0_EL1);
-	block = cpuid_feature_extract_field(features, 4);
-	if (block > 0) {
-		switch (block) {
-		default:
-		case 2:
-			elf_hwcap |= HWCAP_PMULL;
-		case 1:
-			elf_hwcap |= HWCAP_AES;
-		case 0:
-			break;
-		}
-	}
-
-	if (cpuid_feature_extract_field(features, 8) > 0)
-		elf_hwcap |= HWCAP_SHA1;
-
-	if (cpuid_feature_extract_field(features, 12) > 0)
-		elf_hwcap |= HWCAP_SHA2;
-
-	if (cpuid_feature_extract_field(features, 16) > 0)
-		elf_hwcap |= HWCAP_CRC32;
-
-	block = cpuid_feature_extract_field(features, 20);
-	if (block > 0) {
-		switch (block) {
-		default:
-		case 2:
-			elf_hwcap |= HWCAP_ATOMICS;
-		case 1:
-			/* RESERVED */
-		case 0:
-			break;
-		}
-	}
-
-#ifdef CONFIG_COMPAT
-	/*
-	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to
-	 * the AArch32 32-bit execution state.
-	 */
-	features = read_cpuid(ID_ISAR5_EL1);
-	block = cpuid_feature_extract_field(features, 4);
-	if (block > 0) {
-		switch (block) {
-		default:
-		case 2:
-			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
-		case 1:
-			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
-		case 0:
-			break;
-		}
-	}
-
-	if (cpuid_feature_extract_field(features, 8) > 0)
-		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
-
-	if (cpuid_feature_extract_field(features, 12) > 0)
-		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
-
-	if (cpuid_feature_extract_field(features, 16) > 0)
-		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
-#endif
-}
-
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
 {
 	void *dt_virt = fixmap_remap_fdt(dt_phys);
-- 
cgit v1.2.3-58-ga151


From cdcf817b7e4b62b935d8797f7d07ea0b97760884 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:42 +0100
Subject: arm64: Move mixed endian support detection

Move the mixed endian support detection code to cpufeature.c
from cpuinfo.c. This also moves the update_cpu_features()
used by mixed endian detection code, which will get more
functionality.

Also moves the ID register field shifts to asm/sysreg.h,
where all the useful definitions will end up in later patches.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpu.h        |  2 ++
 arch/arm64/include/asm/cpufeature.h |  7 +++++++
 arch/arm64/include/asm/cputype.h    | 15 ---------------
 arch/arm64/include/asm/sysreg.h     |  2 ++
 arch/arm64/kernel/cpufeature.c      | 22 ++++++++++++++++++++++
 arch/arm64/kernel/cpuinfo.c         | 21 ---------------------
 6 files changed, 33 insertions(+), 36 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 8e797b2fcc01..30db691b0b7e 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -63,4 +63,6 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 void cpuinfo_store_cpu(void);
 void __init cpuinfo_store_boot_cpu(void);
 
+void update_cpu_features(struct cpuinfo_arm64 *info);
+
 #endif /* __ASM_CPU_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b7769f698a75..b5f313d42d79 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -10,6 +10,7 @@
 #define __ASM_CPUFEATURE_H
 
 #include <asm/hwcap.h>
+#include <asm/sysreg.h>
 
 /*
  * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
@@ -81,6 +82,12 @@ static inline int __attribute_const__ cpuid_feature_extract_field(u64 features,
 	return (s64)(features << (64 - 4 - field)) >> (64 - 4);
 }
 
+static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
+{
+	return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
+		cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1;
+}
+
 void __init setup_cpu_features(void);
 
 void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index ee6403df9fe4..31678b2f295f 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -72,15 +72,6 @@
 
 #define APM_CPU_PART_POTENZA	0x000
 
-#define ID_AA64MMFR0_BIGENDEL0_SHIFT	16
-#define ID_AA64MMFR0_BIGENDEL0_MASK	(0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT)
-#define ID_AA64MMFR0_BIGENDEL0(mmfr0)	\
-	(((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT)
-#define ID_AA64MMFR0_BIGEND_SHIFT	8
-#define ID_AA64MMFR0_BIGEND_MASK	(0xf << ID_AA64MMFR0_BIGEND_SHIFT)
-#define ID_AA64MMFR0_BIGEND(mmfr0)	\
-	(((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT)
-
 #ifndef __ASSEMBLY__
 
 /*
@@ -112,12 +103,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void)
 {
 	return read_cpuid(CTR_EL0);
 }
-
-static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
-{
-	return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) ||
-		(ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1);
-}
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4b57e1080538..061a2792179a 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -44,6 +44,8 @@
 #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
 				     (!!x)<<8 | 0x1f)
 
+#define ID_AA64MMFR0_BIGENDEL0_SHIFT	16
+#define ID_AA64MMFR0_BIGENDEL_SHIFT	8
 
 #define ID_AA64MMFR0_TGRAN4_SHIFT	28
 #define ID_AA64MMFR0_TGRAN64_SHIFT	24
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e49be15c7ff9..1ae8b24f334a 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -22,7 +22,9 @@
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
+#include <asm/sysreg.h>
 
+static bool mixed_endian_el0 = true;
 unsigned long elf_hwcap __read_mostly;
 EXPORT_SYMBOL_GPL(elf_hwcap);
 
@@ -41,6 +43,26 @@ unsigned int compat_elf_hwcap2 __read_mostly;
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
 
+bool cpu_supports_mixed_endian_el0(void)
+{
+	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
+}
+
+bool system_supports_mixed_endian_el0(void)
+{
+	return mixed_endian_el0;
+}
+
+static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info)
+{
+	mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0);
+}
+
+void update_cpu_features(struct cpuinfo_arm64 *info)
+{
+	update_mixed_endian_el0_support(info);
+}
+
 static bool
 feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 {
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 75d5a867e7fb..8307b336dc1f 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -35,7 +35,6 @@
  */
 DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 static struct cpuinfo_arm64 boot_cpu_data;
-static bool mixed_endian_el0 = true;
 
 static char *icache_policy_str[] = {
 	[ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN",
@@ -69,26 +68,6 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 	pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
 }
 
-bool cpu_supports_mixed_endian_el0(void)
-{
-	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
-}
-
-bool system_supports_mixed_endian_el0(void)
-{
-	return mixed_endian_el0;
-}
-
-static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info)
-{
-	mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0);
-}
-
-static void update_cpu_features(struct cpuinfo_arm64 *info)
-{
-	update_mixed_endian_el0_support(info);
-}
-
 static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)
 {
 	if ((boot & mask) == (cur & mask))
-- 
cgit v1.2.3-58-ga151


From 12d11817eaafa414eeb47af684093eb2165ebe37 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:43 +0100
Subject: arm64: Move /proc/cpuinfo handling code

This patch moves the /proc/cpuinfo handling code:

arch/arm64/kernel/{setup.c to cpuinfo.c}

No functional changes

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpuinfo.c | 124 ++++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/setup.c   | 123 -------------------------------------------
 2 files changed, 124 insertions(+), 123 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 8307b336dc1f..0dadb6922ab3 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -24,8 +24,11 @@
 #include <linux/bug.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/personality.h>
 #include <linux/preempt.h>
 #include <linux/printk.h>
+#include <linux/seq_file.h>
+#include <linux/sched.h>
 #include <linux/smp.h>
 
 /*
@@ -45,6 +48,127 @@ static char *icache_policy_str[] = {
 
 unsigned long __icache_flags;
 
+static const char *hwcap_str[] = {
+	"fp",
+	"asimd",
+	"evtstrm",
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
+	"atomics",
+	NULL
+};
+
+#ifdef CONFIG_COMPAT
+static const char *compat_hwcap_str[] = {
+	"swp",
+	"half",
+	"thumb",
+	"26bit",
+	"fastmult",
+	"fpa",
+	"vfp",
+	"edsp",
+	"java",
+	"iwmmxt",
+	"crunch",
+	"thumbee",
+	"neon",
+	"vfpv3",
+	"vfpv3d16",
+	"tls",
+	"vfpv4",
+	"idiva",
+	"idivt",
+	"vfpd32",
+	"lpae",
+	"evtstrm"
+};
+
+static const char *compat_hwcap2_str[] = {
+	"aes",
+	"pmull",
+	"sha1",
+	"sha2",
+	"crc32",
+	NULL
+};
+#endif /* CONFIG_COMPAT */
+
+static int c_show(struct seq_file *m, void *v)
+{
+	int i, j;
+
+	for_each_online_cpu(i) {
+		struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
+		u32 midr = cpuinfo->reg_midr;
+
+		/*
+		 * glibc reads /proc/cpuinfo to determine the number of
+		 * online processors, looking for lines beginning with
+		 * "processor".  Give glibc what it expects.
+		 */
+		seq_printf(m, "processor\t: %d\n", i);
+
+		/*
+		 * Dump out the common processor features in a single line.
+		 * Userspace should read the hwcaps with getauxval(AT_HWCAP)
+		 * rather than attempting to parse this, but there's a body of
+		 * software which does already (at least for 32-bit).
+		 */
+		seq_puts(m, "Features\t:");
+		if (personality(current->personality) == PER_LINUX32) {
+#ifdef CONFIG_COMPAT
+			for (j = 0; compat_hwcap_str[j]; j++)
+				if (compat_elf_hwcap & (1 << j))
+					seq_printf(m, " %s", compat_hwcap_str[j]);
+
+			for (j = 0; compat_hwcap2_str[j]; j++)
+				if (compat_elf_hwcap2 & (1 << j))
+					seq_printf(m, " %s", compat_hwcap2_str[j]);
+#endif /* CONFIG_COMPAT */
+		} else {
+			for (j = 0; hwcap_str[j]; j++)
+				if (elf_hwcap & (1 << j))
+					seq_printf(m, " %s", hwcap_str[j]);
+		}
+		seq_puts(m, "\n");
+
+		seq_printf(m, "CPU implementer\t: 0x%02x\n",
+			   MIDR_IMPLEMENTOR(midr));
+		seq_printf(m, "CPU architecture: 8\n");
+		seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
+		seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
+		seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
+	}
+
+	return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos < 1 ? (void *)1 : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return NULL;
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= c_show
+};
+
 static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 {
 	unsigned int cpu = smp_processor_id();
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index baf0da8c7d8a..556301fbeeef 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -28,7 +28,6 @@
 #include <linux/console.h>
 #include <linux/cache.h>
 #include <linux/bootmem.h>
-#include <linux/seq_file.h>
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
@@ -44,7 +43,6 @@
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
 #include <linux/efi.h>
-#include <linux/personality.h>
 #include <linux/psci.h>
 
 #include <asm/acpi.h>
@@ -381,124 +379,3 @@ static int __init topology_init(void)
 	return 0;
 }
 subsys_initcall(topology_init);
-
-static const char *hwcap_str[] = {
-	"fp",
-	"asimd",
-	"evtstrm",
-	"aes",
-	"pmull",
-	"sha1",
-	"sha2",
-	"crc32",
-	"atomics",
-	NULL
-};
-
-#ifdef CONFIG_COMPAT
-static const char *compat_hwcap_str[] = {
-	"swp",
-	"half",
-	"thumb",
-	"26bit",
-	"fastmult",
-	"fpa",
-	"vfp",
-	"edsp",
-	"java",
-	"iwmmxt",
-	"crunch",
-	"thumbee",
-	"neon",
-	"vfpv3",
-	"vfpv3d16",
-	"tls",
-	"vfpv4",
-	"idiva",
-	"idivt",
-	"vfpd32",
-	"lpae",
-	"evtstrm"
-};
-
-static const char *compat_hwcap2_str[] = {
-	"aes",
-	"pmull",
-	"sha1",
-	"sha2",
-	"crc32",
-	NULL
-};
-#endif /* CONFIG_COMPAT */
-
-static int c_show(struct seq_file *m, void *v)
-{
-	int i, j;
-
-	for_each_online_cpu(i) {
-		struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i);
-		u32 midr = cpuinfo->reg_midr;
-
-		/*
-		 * glibc reads /proc/cpuinfo to determine the number of
-		 * online processors, looking for lines beginning with
-		 * "processor".  Give glibc what it expects.
-		 */
-		seq_printf(m, "processor\t: %d\n", i);
-
-		/*
-		 * Dump out the common processor features in a single line.
-		 * Userspace should read the hwcaps with getauxval(AT_HWCAP)
-		 * rather than attempting to parse this, but there's a body of
-		 * software which does already (at least for 32-bit).
-		 */
-		seq_puts(m, "Features\t:");
-		if (personality(current->personality) == PER_LINUX32) {
-#ifdef CONFIG_COMPAT
-			for (j = 0; compat_hwcap_str[j]; j++)
-				if (compat_elf_hwcap & (1 << j))
-					seq_printf(m, " %s", compat_hwcap_str[j]);
-
-			for (j = 0; compat_hwcap2_str[j]; j++)
-				if (compat_elf_hwcap2 & (1 << j))
-					seq_printf(m, " %s", compat_hwcap2_str[j]);
-#endif /* CONFIG_COMPAT */
-		} else {
-			for (j = 0; hwcap_str[j]; j++)
-				if (elf_hwcap & (1 << j))
-					seq_printf(m, " %s", hwcap_str[j]);
-		}
-		seq_puts(m, "\n");
-
-		seq_printf(m, "CPU implementer\t: 0x%02x\n",
-			   MIDR_IMPLEMENTOR(midr));
-		seq_printf(m, "CPU architecture: 8\n");
-		seq_printf(m, "CPU variant\t: 0x%x\n", MIDR_VARIANT(midr));
-		seq_printf(m, "CPU part\t: 0x%03x\n", MIDR_PARTNUM(midr));
-		seq_printf(m, "CPU revision\t: %d\n\n", MIDR_REVISION(midr));
-	}
-
-	return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-	return *pos < 1 ? (void *)1 : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	++*pos;
-	return NULL;
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-	.start	= c_start,
-	.next	= c_next,
-	.stop	= c_stop,
-	.show	= c_show
-};
-- 
cgit v1.2.3-58-ga151


From ce98a677d897dbaac86905652292fab1eeeb2b93 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:44 +0100
Subject: arm64: Handle width of a cpuid feature

Introduce a helper to extract cpuid feature for any given
width.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b5f313d42d79..85507fecf287 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -76,10 +76,16 @@ static inline void cpus_set_cap(unsigned int num)
 		__set_bit(num, cpu_hwcaps);
 }
 
-static inline int __attribute_const__ cpuid_feature_extract_field(u64 features,
-								  int field)
+static inline int __attribute_const__
+cpuid_feature_extract_field_width(u64 features, int field, int width)
 {
-	return (s64)(features << (64 - 4 - field)) >> (64 - 4);
+	return (s64)(features << (64 - width - field)) >> (64 - width);
+}
+
+static inline int __attribute_const__
+cpuid_feature_extract_field(u64 features, int field)
+{
+	return cpuid_feature_extract_field_width(features, field, 4);
 }
 
 static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
-- 
cgit v1.2.3-58-ga151


From 3c739b57108436211c7f798ba3de0bb0cd8ef469 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:45 +0100
Subject: arm64: Keep track of CPU feature registers

This patch adds an infrastructure to keep track of the CPU feature
registers on the system. For each register, the infrastructure keeps
track of the system wide safe value of the feature bits. Also, tracks
the which fields of a register should be matched strictly across all
the CPUs on the system for the SANITY check infrastructure.

The feature bits are classified into following 3 types depending on
the implication of the possible values. This information is used to
decide the safe value for a feature.

LOWER_SAFE  - The smaller value is safer
HIGHER_SAFE - The bigger value is safer
EXACT       - We can't decide between the two, so a predefined safe_value is used.

This infrastructure will be later used to make better decisions for:

 - Kernel features (e.g, KVM, Debug)
 - SANITY Check
 - CPU capability
 - ELF HWCAP
 - Exposing CPU Feature register to userspace.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
[catalin.marinas@arm.com: whitespace fix]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpu.h        |   1 +
 arch/arm64/include/asm/cpufeature.h |  41 ++++
 arch/arm64/include/asm/sysreg.h     | 132 ++++++++++-
 arch/arm64/kernel/cpufeature.c      | 429 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/cpuinfo.c         |   3 +-
 5 files changed, 600 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 30db691b0b7e..704c17ba3ab0 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -63,6 +63,7 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data);
 void cpuinfo_store_cpu(void);
 void __init cpuinfo_store_boot_cpu(void);
 
+void __init init_cpu_features(struct cpuinfo_arm64 *info);
 void update_cpu_features(struct cpuinfo_arm64 *info);
 
 #endif /* __ASM_CPU_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 85507fecf287..01bb5cf995af 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -35,6 +35,37 @@
 
 #include <linux/kernel.h>
 
+/* CPU feature register tracking */
+enum ftr_type {
+	FTR_EXACT,	/* Use a predefined safe value */
+	FTR_LOWER_SAFE,	/* Smaller value is safe */
+	FTR_HIGHER_SAFE,/* Bigger value is safe */
+};
+
+#define FTR_STRICT	true	/* SANITY check strict matching required */
+#define FTR_NONSTRICT	false	/* SANITY check ignored */
+
+struct arm64_ftr_bits {
+	bool		strict;	  /* CPU Sanity check: strict matching required ? */
+	enum ftr_type	type;
+	u8		shift;
+	u8		width;
+	s64		safe_val; /* safe value for discrete features */
+};
+
+/*
+ * @arm64_ftr_reg - Feature register
+ * @strict_mask		Bits which should match across all CPUs for sanity.
+ * @sys_val		Safe value across the CPUs (system view)
+ */
+struct arm64_ftr_reg {
+	u32			sys_id;
+	const char		*name;
+	u64			strict_mask;
+	u64			sys_val;
+	struct arm64_ftr_bits	*ftr_bits;
+};
+
 struct arm64_cpu_capabilities {
 	const char *desc;
 	u16 capability;
@@ -88,6 +119,16 @@ cpuid_feature_extract_field(u64 features, int field)
 	return cpuid_feature_extract_field_width(features, field, 4);
 }
 
+static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
+{
+	return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
+}
+
+static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
+{
+	return cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width);
+}
+
 static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
 {
 	return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 ||
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 061a2792179a..d48ab5b41f52 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -22,9 +22,6 @@
 
 #include <asm/opcodes.h>
 
-#define SCTLR_EL1_CP15BEN	(0x1 << 5)
-#define SCTLR_EL1_SED		(0x1 << 8)
-
 /*
  * ARMv8 ARM reserves the following encoding for system registers:
  * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
@@ -38,14 +35,139 @@
 #define sys_reg(op0, op1, crn, crm, op2) \
 	((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
 
-#define REG_PSTATE_PAN_IMM                     sys_reg(0, 0, 4, 0, 4)
-#define SCTLR_EL1_SPAN                         (1 << 23)
+#define SYS_MIDR_EL1			sys_reg(3, 0, 0, 0, 0)
+#define SYS_MPIDR_EL1			sys_reg(3, 0, 0, 0, 5)
+#define SYS_REVIDR_EL1			sys_reg(3, 0, 0, 0, 6)
+
+#define SYS_ID_PFR0_EL1			sys_reg(3, 0, 0, 1, 0)
+#define SYS_ID_PFR1_EL1			sys_reg(3, 0, 0, 1, 1)
+#define SYS_ID_DFR0_EL1			sys_reg(3, 0, 0, 1, 2)
+#define SYS_ID_MMFR0_EL1		sys_reg(3, 0, 0, 1, 4)
+#define SYS_ID_MMFR1_EL1		sys_reg(3, 0, 0, 1, 5)
+#define SYS_ID_MMFR2_EL1		sys_reg(3, 0, 0, 1, 6)
+#define SYS_ID_MMFR3_EL1		sys_reg(3, 0, 0, 1, 7)
+
+#define SYS_ID_ISAR0_EL1		sys_reg(3, 0, 0, 2, 0)
+#define SYS_ID_ISAR1_EL1		sys_reg(3, 0, 0, 2, 1)
+#define SYS_ID_ISAR2_EL1		sys_reg(3, 0, 0, 2, 2)
+#define SYS_ID_ISAR3_EL1		sys_reg(3, 0, 0, 2, 3)
+#define SYS_ID_ISAR4_EL1		sys_reg(3, 0, 0, 2, 4)
+#define SYS_ID_ISAR5_EL1		sys_reg(3, 0, 0, 2, 5)
+#define SYS_ID_MMFR4_EL1		sys_reg(3, 0, 0, 2, 6)
+
+#define SYS_MVFR0_EL1			sys_reg(3, 0, 0, 3, 0)
+#define SYS_MVFR1_EL1			sys_reg(3, 0, 0, 3, 1)
+#define SYS_MVFR2_EL1			sys_reg(3, 0, 0, 3, 2)
+
+#define SYS_ID_AA64PFR0_EL1		sys_reg(3, 0, 0, 4, 0)
+#define SYS_ID_AA64PFR1_EL1		sys_reg(3, 0, 0, 4, 1)
+
+#define SYS_ID_AA64DFR0_EL1		sys_reg(3, 0, 0, 5, 0)
+#define SYS_ID_AA64DFR1_EL1		sys_reg(3, 0, 0, 5, 1)
+
+#define SYS_ID_AA64ISAR0_EL1		sys_reg(3, 0, 0, 6, 0)
+#define SYS_ID_AA64ISAR1_EL1		sys_reg(3, 0, 0, 6, 1)
+
+#define SYS_ID_AA64MMFR0_EL1		sys_reg(3, 0, 0, 7, 0)
+#define SYS_ID_AA64MMFR1_EL1		sys_reg(3, 0, 0, 7, 1)
+
+#define SYS_CNTFRQ_EL0			sys_reg(3, 3, 14, 0, 0)
+#define SYS_CTR_EL0			sys_reg(3, 3, 0, 0, 1)
+#define SYS_DCZID_EL0			sys_reg(3, 3, 0, 0, 7)
+
+#define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
 
 #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
 				     (!!x)<<8 | 0x1f)
 
+/* SCTLR_EL1 */
+#define SCTLR_EL1_CP15BEN	(0x1 << 5)
+#define SCTLR_EL1_SED		(0x1 << 8)
+#define SCTLR_EL1_SPAN		(0x1 << 23)
+
+
+/* id_aa64isar0 */
+#define ID_AA64ISAR0_RDM_SHIFT		28
+#define ID_AA64ISAR0_ATOMICS_SHIFT	20
+#define ID_AA64ISAR0_CRC32_SHIFT	16
+#define ID_AA64ISAR0_SHA2_SHIFT		12
+#define ID_AA64ISAR0_SHA1_SHIFT		8
+#define ID_AA64ISAR0_AES_SHIFT		4
+
+/* id_aa64pfr0 */
+#define ID_AA64PFR0_GIC_SHIFT		24
+#define ID_AA64PFR0_ASIMD_SHIFT		20
+#define ID_AA64PFR0_FP_SHIFT		16
+#define ID_AA64PFR0_EL3_SHIFT		12
+#define ID_AA64PFR0_EL2_SHIFT		8
+#define ID_AA64PFR0_EL1_SHIFT		4
+#define ID_AA64PFR0_EL0_SHIFT		0
+
+#define ID_AA64PFR0_FP_NI		0xf
+#define ID_AA64PFR0_FP_SUPPORTED	0x0
+#define ID_AA64PFR0_ASIMD_NI		0xf
+#define ID_AA64PFR0_ASIMD_SUPPORTED	0x0
+#define ID_AA64PFR0_EL1_64BIT_ONLY	0x1
+#define ID_AA64PFR0_EL0_64BIT_ONLY	0x1
+
+/* id_aa64mmfr0 */
+#define ID_AA64MMFR0_TGRAN4_SHIFT	28
+#define ID_AA64MMFR0_TGRAN64_SHIFT	24
+#define ID_AA64MMFR0_TGRAN16_SHIFT	20
 #define ID_AA64MMFR0_BIGENDEL0_SHIFT	16
+#define ID_AA64MMFR0_SNSMEM_SHIFT	12
 #define ID_AA64MMFR0_BIGENDEL_SHIFT	8
+#define ID_AA64MMFR0_ASID_SHIFT		4
+#define ID_AA64MMFR0_PARANGE_SHIFT	0
+
+#define ID_AA64MMFR0_TGRAN4_NI		0xf
+#define ID_AA64MMFR0_TGRAN4_SUPPORTED	0x0
+#define ID_AA64MMFR0_TGRAN64_NI		0xf
+#define ID_AA64MMFR0_TGRAN64_SUPPORTED	0x0
+#define ID_AA64MMFR0_TGRAN16_NI		0x0
+#define ID_AA64MMFR0_TGRAN16_SUPPORTED	0x1
+
+/* id_aa64mmfr1 */
+#define ID_AA64MMFR1_PAN_SHIFT		20
+#define ID_AA64MMFR1_LOR_SHIFT		16
+#define ID_AA64MMFR1_HPD_SHIFT		12
+#define ID_AA64MMFR1_VHE_SHIFT		8
+#define ID_AA64MMFR1_VMIDBITS_SHIFT	4
+#define ID_AA64MMFR1_HADBS_SHIFT	0
+
+/* id_aa64dfr0 */
+#define ID_AA64DFR0_CTX_CMPS_SHIFT	28
+#define ID_AA64DFR0_WRPS_SHIFT		20
+#define ID_AA64DFR0_BRPS_SHIFT		12
+#define ID_AA64DFR0_PMUVER_SHIFT	8
+#define ID_AA64DFR0_TRACEVER_SHIFT	4
+#define ID_AA64DFR0_DEBUGVER_SHIFT	0
+
+#define ID_ISAR5_RDM_SHIFT		24
+#define ID_ISAR5_CRC32_SHIFT		16
+#define ID_ISAR5_SHA2_SHIFT		12
+#define ID_ISAR5_SHA1_SHIFT		8
+#define ID_ISAR5_AES_SHIFT		4
+#define ID_ISAR5_SEVL_SHIFT		0
+
+#define MVFR0_FPROUND_SHIFT		28
+#define MVFR0_FPSHVEC_SHIFT		24
+#define MVFR0_FPSQRT_SHIFT		20
+#define MVFR0_FPDIVIDE_SHIFT		16
+#define MVFR0_FPTRAP_SHIFT		12
+#define MVFR0_FPDP_SHIFT		8
+#define MVFR0_FPSP_SHIFT		4
+#define MVFR0_SIMD_SHIFT		0
+
+#define MVFR1_SIMDFMAC_SHIFT		28
+#define MVFR1_FPHP_SHIFT		24
+#define MVFR1_SIMDHP_SHIFT		20
+#define MVFR1_SIMDSP_SHIFT		16
+#define MVFR1_SIMDINT_SHIFT		12
+#define MVFR1_SIMDLS_SHIFT		8
+#define MVFR1_FPDNAN_SHIFT		4
+#define MVFR1_FPFTZ_SHIFT		0
+
 
 #define ID_AA64MMFR0_TGRAN4_SHIFT	28
 #define ID_AA64MMFR0_TGRAN64_SHIFT	24
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 1ae8b24f334a..8d7abaa3a7f9 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -18,6 +18,8 @@
 
 #define pr_fmt(fmt) "CPU features: " fmt
 
+#include <linux/bsearch.h>
+#include <linux/sort.h>
 #include <linux/types.h>
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
@@ -58,8 +60,435 @@ static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info)
 	mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0);
 }
 
+#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
+	{						\
+		.strict = STRICT,			\
+		.type = TYPE,				\
+		.shift = SHIFT,				\
+		.width = WIDTH,				\
+		.safe_val = SAFE_VAL,			\
+	}
+
+#define ARM64_FTR_END					\
+	{						\
+		.width = 0,				\
+	}
+
+static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),	/* RAZ */
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
+	/* Linux doesn't care about the EL3 */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
+	/* Linux shouldn't care about secure memory */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0),
+	/*
+	 * Differing PARange is fine as long as all peripherals and memory are mapped
+	 * within the minimum PARange of all CPUs
+	 */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_ctr[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* ERG */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */
+	/*
+	 * Linux can handle differing I-cache policies. Userspace JITs will
+	 * make use of *minLine
+	 */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0),	/* L1Ip */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* IminLine */
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_mmfr0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),	/* InnerShr */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),	/* FCSE */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* AuxReg */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0),	/* TCM */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* ShareLvl */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0),	/* OuterShr */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),	/* PMSA */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),	/* VMSA */
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_mvfr2[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* FPMisc */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* SIMDMisc */
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_dczid[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1),		/* DZP */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* BS */
+	ARM64_FTR_END,
+};
+
+
+static struct arm64_ftr_bits ftr_id_isar5[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_mmfr4[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* ac2 */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* RAZ */
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_id_pfr0[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0),	/* RAZ */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* State3 */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0),		/* State2 */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* State1 */
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* State0 */
+	ARM64_FTR_END,
+};
+
+/*
+ * Common ftr bits for a 32bit register with all hidden, strict
+ * attributes, with 4bit feature fields and a default safe value of
+ * 0. Covers the following 32bit registers:
+ * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
+ */
+static struct arm64_ftr_bits ftr_generic_32bits[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0),
+	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_generic[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_generic32[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0),
+	ARM64_FTR_END,
+};
+
+static struct arm64_ftr_bits ftr_aa64raz[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
+	ARM64_FTR_END,
+};
+
+#define ARM64_FTR_REG(id, table)		\
+	{					\
+		.sys_id = id,			\
+		.name = #id,			\
+		.ftr_bits = &((table)[0]),	\
+	}
+
+static struct arm64_ftr_reg arm64_ftr_regs[] = {
+
+	/* Op1 = 0, CRn = 0, CRm = 1 */
+	ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
+	ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0),
+	ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits),
+
+	/* Op1 = 0, CRn = 0, CRm = 2 */
+	ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5),
+	ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4),
+
+	/* Op1 = 0, CRn = 0, CRm = 3 */
+	ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits),
+	ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2),
+
+	/* Op1 = 0, CRn = 0, CRm = 4 */
+	ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
+	ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz),
+
+	/* Op1 = 0, CRn = 0, CRm = 5 */
+	ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
+	ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic),
+
+	/* Op1 = 0, CRn = 0, CRm = 6 */
+	ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
+	ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz),
+
+	/* Op1 = 0, CRn = 0, CRm = 7 */
+	ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
+	ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+
+	/* Op1 = 3, CRn = 0, CRm = 0 */
+	ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
+	ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
+
+	/* Op1 = 3, CRn = 14, CRm = 0 */
+	ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32),
+};
+
+static int search_cmp_ftr_reg(const void *id, const void *regp)
+{
+	return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id;
+}
+
+/*
+ * get_arm64_ftr_reg - Lookup a feature register entry using its
+ * sys_reg() encoding. With the array arm64_ftr_regs sorted in the
+ * ascending order of sys_id , we use binary search to find a matching
+ * entry.
+ *
+ * returns - Upon success,  matching ftr_reg entry for id.
+ *         - NULL on failure. It is upto the caller to decide
+ *	     the impact of a failure.
+ */
+static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
+{
+	return bsearch((const void *)(unsigned long)sys_id,
+			arm64_ftr_regs,
+			ARRAY_SIZE(arm64_ftr_regs),
+			sizeof(arm64_ftr_regs[0]),
+			search_cmp_ftr_reg);
+}
+
+static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val)
+{
+	u64 mask = arm64_ftr_mask(ftrp);
+
+	reg &= ~mask;
+	reg |= (ftr_val << ftrp->shift) & mask;
+	return reg;
+}
+
+static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur)
+{
+	s64 ret = 0;
+
+	switch (ftrp->type) {
+	case FTR_EXACT:
+		ret = ftrp->safe_val;
+		break;
+	case FTR_LOWER_SAFE:
+		ret = new < cur ? new : cur;
+		break;
+	case FTR_HIGHER_SAFE:
+		ret = new > cur ? new : cur;
+		break;
+	default:
+		BUG();
+	}
+
+	return ret;
+}
+
+static int __init sort_cmp_ftr_regs(const void *a, const void *b)
+{
+	return ((const struct arm64_ftr_reg *)a)->sys_id -
+		 ((const struct arm64_ftr_reg *)b)->sys_id;
+}
+
+static void __init swap_ftr_regs(void *a, void *b, int size)
+{
+	struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a;
+	*(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b;
+	*(struct arm64_ftr_reg *)b = tmp;
+}
+
+static void __init sort_ftr_regs(void)
+{
+	/* Keep the array sorted so that we can do the binary search */
+	sort(arm64_ftr_regs,
+		ARRAY_SIZE(arm64_ftr_regs),
+		sizeof(arm64_ftr_regs[0]),
+		sort_cmp_ftr_regs,
+		swap_ftr_regs);
+}
+
+/*
+ * Initialise the CPU feature register from Boot CPU values.
+ * Also initiliases the strict_mask for the register.
+ */
+static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
+{
+	u64 val = 0;
+	u64 strict_mask = ~0x0ULL;
+	struct arm64_ftr_bits *ftrp;
+	struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
+
+	BUG_ON(!reg);
+
+	for (ftrp  = reg->ftr_bits; ftrp->width; ftrp++) {
+		s64 ftr_new = arm64_ftr_value(ftrp, new);
+
+		val = arm64_ftr_set_value(ftrp, val, ftr_new);
+		if (!ftrp->strict)
+			strict_mask &= ~arm64_ftr_mask(ftrp);
+	}
+	reg->sys_val = val;
+	reg->strict_mask = strict_mask;
+}
+
+void __init init_cpu_features(struct cpuinfo_arm64 *info)
+{
+	/* Before we start using the tables, make sure it is sorted */
+	sort_ftr_regs();
+
+	init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr);
+	init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid);
+	init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq);
+	init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0);
+	init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
+	init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
+	init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
+	init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
+	init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+	init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
+	init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
+	init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+	init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+	init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+	init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+	init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+	init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+	init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+	init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+	init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+	init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+	init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+	init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+	init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+	init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+	init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+	init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+
+	/* This will be removed later, once we start using the infrastructure */
+	update_mixed_endian_el0_support(info);
+}
+
+static void update_cpu_ftr_reg(u32 sys_reg, u64 new)
+{
+	struct arm64_ftr_bits *ftrp;
+	struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
+
+	BUG_ON(!reg);
+
+	for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
+		s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val);
+		s64 ftr_new = arm64_ftr_value(ftrp, new);
+
+		if (ftr_cur == ftr_new)
+			continue;
+		/* Find a safe value */
+		ftr_new = arm64_ftr_safe_value(ftrp, ftr_new, ftr_cur);
+		reg->sys_val = arm64_ftr_set_value(ftrp, reg->sys_val, ftr_new);
+	}
+
+}
+
+/* Update CPU feature register from non-boot CPU */
 void update_cpu_features(struct cpuinfo_arm64 *info)
 {
+	update_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr);
+	update_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid);
+	update_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq);
+	update_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0);
+	update_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
+	update_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
+	update_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
+	update_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
+	update_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+	update_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
+	update_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
+	update_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+	update_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+	update_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+	update_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+	update_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+	update_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+	update_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+	update_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+	update_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+	update_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+	update_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+	update_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+	update_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+	update_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+	update_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+	update_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+
 	update_mixed_endian_el0_support(info);
 }
 
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 0dadb6922ab3..857aaf05a143 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -340,7 +340,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 
 	check_local_cpu_errata();
 	check_local_cpu_features();
-	update_cpu_features(info);
 }
 
 void cpuinfo_store_cpu(void)
@@ -348,6 +347,7 @@ void cpuinfo_store_cpu(void)
 	struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data);
 	__cpuinfo_store_cpu(info);
 	cpuinfo_sanity_check(info);
+	update_cpu_features(info);
 }
 
 void __init cpuinfo_store_boot_cpu(void)
@@ -356,4 +356,5 @@ void __init cpuinfo_store_boot_cpu(void)
 	__cpuinfo_store_cpu(info);
 
 	boot_cpu_data = *info;
+	init_cpu_features(&boot_cpu_data);
 }
-- 
cgit v1.2.3-58-ga151


From 3086d391f992984def0aa6aa4a36f54853c58536 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:46 +0100
Subject: arm64: Consolidate CPU Sanity check to CPU Feature infrastructure

This patch consolidates the CPU Sanity check to the new infrastructure.

Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpu.h   |   3 +-
 arch/arm64/kernel/cpufeature.c | 164 ++++++++++++++++++++++++++++++++---------
 arch/arm64/kernel/cpuinfo.c    | 113 +---------------------------
 3 files changed, 134 insertions(+), 146 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 704c17ba3ab0..b5e9cee4b5f8 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -64,6 +64,7 @@ void cpuinfo_store_cpu(void);
 void __init cpuinfo_store_boot_cpu(void);
 
 void __init init_cpu_features(struct cpuinfo_arm64 *info);
-void update_cpu_features(struct cpuinfo_arm64 *info);
+void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
+				 struct cpuinfo_arm64 *boot);
 
 #endif /* __ASM_CPU_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 8d7abaa3a7f9..aae181760e1f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -438,12 +438,9 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 	update_mixed_endian_el0_support(info);
 }
 
-static void update_cpu_ftr_reg(u32 sys_reg, u64 new)
+static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
 {
 	struct arm64_ftr_bits *ftrp;
-	struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
-
-	BUG_ON(!reg);
 
 	for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
 		s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val);
@@ -458,36 +455,137 @@ static void update_cpu_ftr_reg(u32 sys_reg, u64 new)
 
 }
 
-/* Update CPU feature register from non-boot CPU */
-void update_cpu_features(struct cpuinfo_arm64 *info)
+static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot)
 {
-	update_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr);
-	update_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid);
-	update_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq);
-	update_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0);
-	update_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
-	update_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
-	update_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
-	update_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
-	update_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
-	update_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
-	update_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
-	update_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
-	update_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
-	update_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
-	update_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
-	update_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
-	update_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
-	update_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
-	update_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
-	update_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
-	update_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
-	update_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
-	update_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
-	update_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
-	update_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
-	update_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
-	update_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+	struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id);
+
+	BUG_ON(!regp);
+	update_cpu_ftr_reg(regp, val);
+	if ((boot & regp->strict_mask) == (val & regp->strict_mask))
+		return 0;
+	pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016llx, CPU%d: %#016llx\n",
+			regp->name, boot, cpu, val);
+	return 1;
+}
+
+/*
+ * Update system wide CPU feature registers with the values from a
+ * non-boot CPU. Also performs SANITY checks to make sure that there
+ * aren't any insane variations from that of the boot CPU.
+ */
+void update_cpu_features(int cpu,
+			 struct cpuinfo_arm64 *info,
+			 struct cpuinfo_arm64 *boot)
+{
+	int taint = 0;
+
+	/*
+	 * The kernel can handle differing I-cache policies, but otherwise
+	 * caches should look identical. Userspace JITs will make use of
+	 * *minLine.
+	 */
+	taint |= check_update_ftr_reg(SYS_CTR_EL0, cpu,
+				      info->reg_ctr, boot->reg_ctr);
+
+	/*
+	 * Userspace may perform DC ZVA instructions. Mismatched block sizes
+	 * could result in too much or too little memory being zeroed if a
+	 * process is preempted and migrated between CPUs.
+	 */
+	taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu,
+				      info->reg_dczid, boot->reg_dczid);
+
+	/* If different, timekeeping will be broken (especially with KVM) */
+	taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu,
+				      info->reg_cntfrq, boot->reg_cntfrq);
+
+	/*
+	 * The kernel uses self-hosted debug features and expects CPUs to
+	 * support identical debug features. We presently need CTX_CMPs, WRPs,
+	 * and BRPs to be identical.
+	 * ID_AA64DFR1 is currently RES0.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_AA64DFR0_EL1, cpu,
+				      info->reg_id_aa64dfr0, boot->reg_id_aa64dfr0);
+	taint |= check_update_ftr_reg(SYS_ID_AA64DFR1_EL1, cpu,
+				      info->reg_id_aa64dfr1, boot->reg_id_aa64dfr1);
+	/*
+	 * Even in big.LITTLE, processors should be identical instruction-set
+	 * wise.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_AA64ISAR0_EL1, cpu,
+				      info->reg_id_aa64isar0, boot->reg_id_aa64isar0);
+	taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu,
+				      info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
+
+	/*
+	 * Differing PARange support is fine as long as all peripherals and
+	 * memory are mapped within the minimum PARange of all CPUs.
+	 * Linux should not care about secure memory.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_AA64MMFR0_EL1, cpu,
+				      info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0);
+	taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu,
+				      info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
+
+	/*
+	 * EL3 is not our concern.
+	 * ID_AA64PFR1 is currently RES0.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu,
+				      info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0);
+	taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu,
+				      info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1);
+
+	/*
+	 * If we have AArch32, we care about 32-bit features for compat. These
+	 * registers should be RES0 otherwise.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu,
+					info->reg_id_dfr0, boot->reg_id_dfr0);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu,
+					info->reg_id_isar0, boot->reg_id_isar0);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu,
+					info->reg_id_isar1, boot->reg_id_isar1);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu,
+					info->reg_id_isar2, boot->reg_id_isar2);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu,
+					info->reg_id_isar3, boot->reg_id_isar3);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu,
+					info->reg_id_isar4, boot->reg_id_isar4);
+	taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu,
+					info->reg_id_isar5, boot->reg_id_isar5);
+
+	/*
+	 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
+	 * ACTLR formats could differ across CPUs and therefore would have to
+	 * be trapped for virtualization anyway.
+	 */
+	taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu,
+					info->reg_id_mmfr0, boot->reg_id_mmfr0);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu,
+					info->reg_id_mmfr1, boot->reg_id_mmfr1);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu,
+					info->reg_id_mmfr2, boot->reg_id_mmfr2);
+	taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu,
+					info->reg_id_mmfr3, boot->reg_id_mmfr3);
+	taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu,
+					info->reg_id_pfr0, boot->reg_id_pfr0);
+	taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu,
+					info->reg_id_pfr1, boot->reg_id_pfr1);
+	taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu,
+					info->reg_mvfr0, boot->reg_mvfr0);
+	taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu,
+					info->reg_mvfr1, boot->reg_mvfr1);
+	taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu,
+					info->reg_mvfr2, boot->reg_mvfr2);
+
+	/*
+	 * Mismatched CPU features are a recipe for disaster. Don't even
+	 * pretend to support them.
+	 */
+	WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC,
+			"Unsupported CPU feature variation.\n");
 
 	update_mixed_endian_el0_support(info);
 }
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 857aaf05a143..f25869ea2646 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -192,116 +192,6 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 	pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
 }
 
-static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)
-{
-	if ((boot & mask) == (cur & mask))
-		return 0;
-
-	pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n",
-		name, (unsigned long)boot, cpu, (unsigned long)cur);
-
-	return 1;
-}
-
-#define CHECK_MASK(field, mask, boot, cur, cpu) \
-	check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu)
-
-#define CHECK(field, boot, cur, cpu) \
-	CHECK_MASK(field, ~0ULL, boot, cur, cpu)
-
-/*
- * Verify that CPUs don't have unexpected differences that will cause problems.
- */
-static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur)
-{
-	unsigned int cpu = smp_processor_id();
-	struct cpuinfo_arm64 *boot = &boot_cpu_data;
-	unsigned int diff = 0;
-
-	/*
-	 * The kernel can handle differing I-cache policies, but otherwise
-	 * caches should look identical. Userspace JITs will make use of
-	 * *minLine.
-	 */
-	diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu);
-
-	/*
-	 * Userspace may perform DC ZVA instructions. Mismatched block sizes
-	 * could result in too much or too little memory being zeroed if a
-	 * process is preempted and migrated between CPUs.
-	 */
-	diff |= CHECK(dczid, boot, cur, cpu);
-
-	/* If different, timekeeping will be broken (especially with KVM) */
-	diff |= CHECK(cntfrq, boot, cur, cpu);
-
-	/*
-	 * The kernel uses self-hosted debug features and expects CPUs to
-	 * support identical debug features. We presently need CTX_CMPs, WRPs,
-	 * and BRPs to be identical.
-	 * ID_AA64DFR1 is currently RES0.
-	 */
-	diff |= CHECK(id_aa64dfr0, boot, cur, cpu);
-	diff |= CHECK(id_aa64dfr1, boot, cur, cpu);
-
-	/*
-	 * Even in big.LITTLE, processors should be identical instruction-set
-	 * wise.
-	 */
-	diff |= CHECK(id_aa64isar0, boot, cur, cpu);
-	diff |= CHECK(id_aa64isar1, boot, cur, cpu);
-
-	/*
-	 * Differing PARange support is fine as long as all peripherals and
-	 * memory are mapped within the minimum PARange of all CPUs.
-	 * Linux should not care about secure memory.
-	 * ID_AA64MMFR1 is currently RES0.
-	 */
-	diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu);
-	diff |= CHECK(id_aa64mmfr1, boot, cur, cpu);
-
-	/*
-	 * EL3 is not our concern.
-	 * ID_AA64PFR1 is currently RES0.
-	 */
-	diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu);
-	diff |= CHECK(id_aa64pfr1, boot, cur, cpu);
-
-	/*
-	 * If we have AArch32, we care about 32-bit features for compat. These
-	 * registers should be RES0 otherwise.
-	 */
-	diff |= CHECK(id_dfr0, boot, cur, cpu);
-	diff |= CHECK(id_isar0, boot, cur, cpu);
-	diff |= CHECK(id_isar1, boot, cur, cpu);
-	diff |= CHECK(id_isar2, boot, cur, cpu);
-	diff |= CHECK(id_isar3, boot, cur, cpu);
-	diff |= CHECK(id_isar4, boot, cur, cpu);
-	diff |= CHECK(id_isar5, boot, cur, cpu);
-	/*
-	 * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and
-	 * ACTLR formats could differ across CPUs and therefore would have to
-	 * be trapped for virtualization anyway.
-	 */
-	diff |= CHECK_MASK(id_mmfr0, 0xff0fffff, boot, cur, cpu);
-	diff |= CHECK(id_mmfr1, boot, cur, cpu);
-	diff |= CHECK(id_mmfr2, boot, cur, cpu);
-	diff |= CHECK(id_mmfr3, boot, cur, cpu);
-	diff |= CHECK(id_pfr0, boot, cur, cpu);
-	diff |= CHECK(id_pfr1, boot, cur, cpu);
-
-	diff |= CHECK(mvfr0, boot, cur, cpu);
-	diff |= CHECK(mvfr1, boot, cur, cpu);
-	diff |= CHECK(mvfr2, boot, cur, cpu);
-
-	/*
-	 * Mismatched CPU features are a recipe for disaster. Don't even
-	 * pretend to support them.
-	 */
-	WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC,
-			"Unsupported CPU feature variation.\n");
-}
-
 static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 {
 	info->reg_cntfrq = arch_timer_get_cntfrq();
@@ -346,8 +236,7 @@ void cpuinfo_store_cpu(void)
 {
 	struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data);
 	__cpuinfo_store_cpu(info);
-	cpuinfo_sanity_check(info);
-	update_cpu_features(info);
+	update_cpu_features(smp_processor_id(), info, &boot_cpu_data);
 }
 
 void __init cpuinfo_store_boot_cpu(void)
-- 
cgit v1.2.3-58-ga151


From b3f1537893b54d0f42f52e0f4cde5e17e21f564c Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:47 +0100
Subject: arm64: Read system wide CPUID value

Add an API for reading the safe CPUID value across the
system from the new infrastructure.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 2 ++
 arch/arm64/kernel/cpufeature.c      | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 01bb5cf995af..81217220eb92 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -144,6 +144,8 @@ void check_local_cpu_features(void);
 bool cpu_supports_mixed_endian_el0(void);
 bool system_supports_mixed_endian_el0(void);
 
+u64 read_system_reg(u32 id);
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index aae181760e1f..b4656cabf87f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -590,6 +590,15 @@ void update_cpu_features(int cpu,
 	update_mixed_endian_el0_support(info);
 }
 
+u64 read_system_reg(u32 id)
+{
+	struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id);
+
+	/* We shouldn't get a request for an unsupported register */
+	BUG_ON(!regp);
+	return regp->sys_val;
+}
+
 static bool
 feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 {
-- 
cgit v1.2.3-58-ga151


From c1e8656cbae139c8aaf34d7b802edecbc8a1cf58 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:48 +0100
Subject: arm64: Cleanup mixed endian support detection

Make use of the system wide safe register to decide the support
for mixed endian.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 12 ++++++++++--
 arch/arm64/kernel/cpufeature.c      | 22 ----------------------
 2 files changed, 10 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 81217220eb92..1e281b27732b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -141,11 +141,19 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info);
 void check_local_cpu_errata(void);
 void check_local_cpu_features(void);
-bool cpu_supports_mixed_endian_el0(void);
-bool system_supports_mixed_endian_el0(void);
 
 u64 read_system_reg(u32 id);
 
+static inline bool cpu_supports_mixed_endian_el0(void)
+{
+	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
+}
+
+static inline bool system_supports_mixed_endian_el0(void)
+{
+	return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index b4656cabf87f..a90a64dd905a 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -26,7 +26,6 @@
 #include <asm/processor.h>
 #include <asm/sysreg.h>
 
-static bool mixed_endian_el0 = true;
 unsigned long elf_hwcap __read_mostly;
 EXPORT_SYMBOL_GPL(elf_hwcap);
 
@@ -44,22 +43,6 @@ unsigned int compat_elf_hwcap2 __read_mostly;
 
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
-
-bool cpu_supports_mixed_endian_el0(void)
-{
-	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
-}
-
-bool system_supports_mixed_endian_el0(void)
-{
-	return mixed_endian_el0;
-}
-
-static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info)
-{
-	mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0);
-}
-
 #define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
 	{						\
 		.strict = STRICT,			\
@@ -433,9 +416,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 	init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
 	init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
 	init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
-
-	/* This will be removed later, once we start using the infrastructure */
-	update_mixed_endian_el0_support(info);
 }
 
 static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
@@ -586,8 +566,6 @@ void update_cpu_features(int cpu,
 	 */
 	WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC,
 			"Unsupported CPU feature variation.\n");
-
-	update_mixed_endian_el0_support(info);
 }
 
 u64 read_system_reg(u32 id)
-- 
cgit v1.2.3-58-ga151


From ce8b602c694c9482e0ffb7432cd59fa2276673fe Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:49 +0100
Subject: arm64: Refactor check_cpu_capabilities

check_cpu_capabilities runs through a given list of caps and
checks if the system has the cap, updates the system capability
bitmap and also runs any enable() methods associated with them.
All of this is not quite obvious from the name 'check'. This
patch splits the check_cpu_capabilities into two parts :

1) update_cpu_capabilities
 => Runs through the given list and updates the system
    wide capability map.
2) enable_cpu_capabilities
 => Runs through the given list and invokes enable() (if any)
    for the caps enabled on the system.

Cc: Andre Przywara <andre.przywara@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Suggested-by: Catalin Marinas <catalin.marinsa@arm.com>
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h |  2 +-
 arch/arm64/kernel/cpu_errata.c      |  2 +-
 arch/arm64/kernel/cpufeature.c      | 14 +++++++++++---
 3 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 1e281b27732b..12d883a067c4 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -137,7 +137,7 @@ static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0)
 
 void __init setup_cpu_features(void);
 
-void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info);
 void check_local_cpu_errata(void);
 void check_local_cpu_features(void);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 6ffd91438560..09eab326ef93 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -88,5 +88,5 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 
 void check_local_cpu_errata(void)
 {
-	check_cpu_capabilities(arm64_errata, "enabling workaround for");
+	update_cpu_capabilities(arm64_errata, "enabling workaround for");
 }
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a90a64dd905a..78c4cb7af20e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -629,7 +629,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{},
 };
 
-void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info)
 {
 	int i;
@@ -642,8 +642,15 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			pr_info("%s %s\n", info, caps[i].desc);
 		cpus_set_cap(caps[i].capability);
 	}
+}
+
+/*
+ * Run through the enabled capabilities and enable() it on the CPUs
+ */
+void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+{
+	int i;
 
-	/* second pass allows enable() to consider interacting capabilities */
 	for (i = 0; caps[i].desc; i++) {
 		if (cpus_have_cap(caps[i].capability) && caps[i].enable)
 			caps[i].enable();
@@ -652,7 +659,8 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 
 void check_local_cpu_features(void)
 {
-	check_cpu_capabilities(arm64_features, "detected feature:");
+	update_cpu_capabilities(arm64_features, "detected feature:");
+	enable_cpu_capabilities(arm64_features);
 }
 
 void __init setup_cpu_features(void)
-- 
cgit v1.2.3-58-ga151


From dbb4e152b8da1f977d9d8cd7e494ab4ee3622f72 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:50 +0100
Subject: arm64: Delay cpu feature capability checks

At the moment we run through the arm64_features capability list for
each CPU and set the capability if one of the CPU supports it. This
could be problematic in a heterogeneous system with differing capabilities.
Delay the CPU feature checks until all the enabled CPUs are up(i.e,
smp_cpus_done(), so that we can make better decisions based on the
overall system capability. Once we decide and advertise the capabilities
the alternatives can be applied. From this state, we cannot roll back
a feature to disabled based on the values from a new hotplugged CPU,
due to the runtime patching and other reasons. So, for all new CPUs,
we need to make sure that they have the established system capabilities.
Failing which, we bring the CPU down, preventing it from turning online.
Once the capabilities are decided, any new CPU booting up goes through
verification to ensure that it has all the enabled capabilities and also
invokes the respective enable() method on the CPU.

The CPU errata checks are not delayed and is still executed per-CPU
to detect the respective capabilities. If we ever come across a non-errata
capability that needs to be checked on each-CPU, we could introduce them via
a new capability table(or introduce a flag), which can be processed per CPU.

The next patch will make the feature checks use the system wide
safe value of a feature register.

NOTE: The enable() methods associated with the capability is scheduled
on all the CPUs (which is the only use case at the moment). If we need
a different type of 'enable()' which only needs to be run once on any CPU,
we should be able to handle that when needed.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
[catalin.marinas@arm.com: static variable and coding style fixes]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 11 ++++-
 arch/arm64/include/asm/processor.h  |  2 +-
 arch/arm64/kernel/cpufeature.c      | 97 +++++++++++++++++++++++++++++++++++--
 arch/arm64/kernel/cpuinfo.c         |  1 -
 arch/arm64/kernel/smp.c             |  7 +++
 arch/arm64/mm/fault.c               |  2 +-
 6 files changed, 111 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 12d883a067c4..1603ae84ea45 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -70,7 +70,7 @@ struct arm64_cpu_capabilities {
 	const char *desc;
 	u16 capability;
 	bool (*matches)(const struct arm64_cpu_capabilities *);
-	void (*enable)(void);
+	void (*enable)(void *);		/* Called on all active CPUs */
 	union {
 		struct {	/* To be used for erratum handling only */
 			u32 midr_model;
@@ -140,7 +140,14 @@ void __init setup_cpu_features(void);
 void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info);
 void check_local_cpu_errata(void);
-void check_local_cpu_features(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+void verify_local_cpu_capabilities(void);
+#else
+static inline void verify_local_cpu_capabilities(void)
+{
+}
+#endif
 
 u64 read_system_reg(u32 id);
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 98f32355dc97..4acb7ca94fcd 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x)
 
 #endif
 
-void cpu_enable_pan(void);
+void cpu_enable_pan(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 78c4cb7af20e..e002cadf84bf 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
+#include <asm/cpu_ops.h>
 #include <asm/processor.h>
 #include <asm/sysreg.h>
 
@@ -645,19 +646,101 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 }
 
 /*
- * Run through the enabled capabilities and enable() it on the CPUs
+ * Run through the enabled capabilities and enable() it on all active
+ * CPUs
  */
 void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
 	int i;
 
+	for (i = 0; caps[i].desc; i++)
+		if (caps[i].enable && cpus_have_cap(caps[i].capability))
+			on_each_cpu(caps[i].enable, NULL, true);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+/*
+ * Flag to indicate if we have computed the system wide
+ * capabilities based on the boot time active CPUs. This
+ * will be used to determine if a new booting CPU should
+ * go through the verification process to make sure that it
+ * supports the system capabilities, without using a hotplug
+ * notifier.
+ */
+static bool sys_caps_initialised;
+
+static inline void set_sys_caps_initialised(void)
+{
+	sys_caps_initialised = true;
+}
+
+/*
+ * Park the CPU which doesn't have the capability as advertised
+ * by the system.
+ */
+static void fail_incapable_cpu(char *cap_type,
+				 const struct arm64_cpu_capabilities *cap)
+{
+	int cpu = smp_processor_id();
+
+	pr_crit("CPU%d: missing %s : %s\n", cpu, cap_type, cap->desc);
+	/* Mark this CPU absent */
+	set_cpu_present(cpu, 0);
+
+	/* Check if we can park ourselves */
+	if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_die)
+		cpu_ops[cpu]->cpu_die(cpu);
+	asm(
+	"1:	wfe\n"
+	"	wfi\n"
+	"	b	1b");
+}
+
+/*
+ * Run through the enabled system capabilities and enable() it on this CPU.
+ * The capabilities were decided based on the available CPUs at the boot time.
+ * Any new CPU should match the system wide status of the capability. If the
+ * new CPU doesn't have a capability which the system now has enabled, we
+ * cannot do anything to fix it up and could cause unexpected failures. So
+ * we park the CPU.
+ */
+void verify_local_cpu_capabilities(void)
+{
+	int i;
+	const struct arm64_cpu_capabilities *caps;
+
+	/*
+	 * If we haven't computed the system capabilities, there is nothing
+	 * to verify.
+	 */
+	if (!sys_caps_initialised)
+		return;
+
+	caps = arm64_features;
 	for (i = 0; caps[i].desc; i++) {
-		if (cpus_have_cap(caps[i].capability) && caps[i].enable)
-			caps[i].enable();
+		if (!cpus_have_cap(caps[i].capability))
+			continue;
+		/*
+		 * If the new CPU misses an advertised feature, we cannot proceed
+		 * further, park the cpu.
+		 */
+		if (!caps[i].matches(&caps[i]))
+			fail_incapable_cpu("arm64_features", &caps[i]);
+		if (caps[i].enable)
+			caps[i].enable(NULL);
 	}
 }
 
-void check_local_cpu_features(void)
+#else	/* !CONFIG_HOTPLUG_CPU */
+
+static inline void set_sys_caps_initialised(void)
+{
+}
+
+#endif	/* CONFIG_HOTPLUG_CPU */
+
+static void setup_feature_capabilities(void)
 {
 	update_cpu_capabilities(arm64_features, "detected feature:");
 	enable_cpu_capabilities(arm64_features);
@@ -670,6 +753,12 @@ void __init setup_cpu_features(void)
 	u32 cwg;
 	int cls;
 
+	/* Set the CPU feature capabilies */
+	setup_feature_capabilities();
+
+	/* Advertise that we have computed the system capabilities */
+	set_sys_caps_initialised();
+
 	/*
 	 * Check for sane CTR_EL0.CWG value.
 	 */
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index f25869ea2646..789fbea92b79 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -229,7 +229,6 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	cpuinfo_detect_icache_policy(info);
 
 	check_local_cpu_errata();
-	check_local_cpu_features();
 }
 
 void cpuinfo_store_cpu(void)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d1d00499f0cf..2bbdc0e4fd14 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -156,6 +156,13 @@ asmlinkage void secondary_start_kernel(void)
 	preempt_disable();
 	trace_hardirqs_off();
 
+	/*
+	 * If the system has established the capabilities, make sure
+	 * this CPU ticks all of those. If it doesn't, the CPU will
+	 * fail to come online.
+	 */
+	verify_local_cpu_capabilities();
+
 	if (cpu_ops[cpu]->cpu_postboot)
 		cpu_ops[cpu]->cpu_postboot();
 
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index aba9ead1384c..066d5aea16c6 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -555,7 +555,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
 }
 
 #ifdef CONFIG_ARM64_PAN
-void cpu_enable_pan(void)
+void cpu_enable_pan(void *__unused)
 {
 	config_sctlr_el1(SCTLR_EL1_SPAN, 0);
 }
-- 
cgit v1.2.3-58-ga151


From da8d02d19ffdd201af632c755a473b6df4b3e4cc Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:51 +0100
Subject: arm64/capabilities: Make use of system wide safe value

Now that we can reliably read the system wide safe value for a
feature register, use that to compute the system capability.
This patch also replaces the 'feature-register-specific'
methods with a generic routine to check the capability.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h |  1 +
 arch/arm64/kernel/cpufeature.c      | 79 +++++++++++++++++++++++++++----------
 2 files changed, 60 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 1603ae84ea45..eec584e7c701 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -78,6 +78,7 @@ struct arm64_cpu_capabilities {
 		};
 
 		struct {	/* Feature register checking */
+			u32 sys_reg;
 			int field_pos;
 			int min_field_value;
 		};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e002cadf84bf..8aff8fcbeb96 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -586,34 +586,31 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry)
 	return val >= entry->min_field_value;
 }
 
-#define __ID_FEAT_CHK(reg)						\
-static bool __maybe_unused						\
-has_##reg##_feature(const struct arm64_cpu_capabilities *entry)		\
-{									\
-	u64 val;							\
-									\
-	val = read_cpuid(reg##_el1);					\
-	return feature_matches(val, entry);				\
-}
+static bool
+has_cpuid_feature(const struct arm64_cpu_capabilities *entry)
+{
+	u64 val;
 
-__ID_FEAT_CHK(id_aa64pfr0);
-__ID_FEAT_CHK(id_aa64mmfr1);
-__ID_FEAT_CHK(id_aa64isar0);
+	val = read_system_reg(entry->sys_reg);
+	return feature_matches(val, entry);
+}
 
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
 		.capability = ARM64_HAS_SYSREG_GIC_CPUIF,
-		.matches = has_id_aa64pfr0_feature,
-		.field_pos = 24,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64PFR0_EL1,
+		.field_pos = ID_AA64PFR0_GIC_SHIFT,
 		.min_field_value = 1,
 	},
 #ifdef CONFIG_ARM64_PAN
 	{
 		.desc = "Privileged Access Never",
 		.capability = ARM64_HAS_PAN,
-		.matches = has_id_aa64mmfr1_feature,
-		.field_pos = 20,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64MMFR1_EL1,
+		.field_pos = ID_AA64MMFR1_PAN_SHIFT,
 		.min_field_value = 1,
 		.enable = cpu_enable_pan,
 	},
@@ -622,8 +619,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "LSE atomic instructions",
 		.capability = ARM64_HAS_LSE_ATOMICS,
-		.matches = has_id_aa64isar0_feature,
-		.field_pos = 20,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64ISAR0_EL1,
+		.field_pos = ID_AA64ISAR0_ATOMICS_SHIFT,
 		.min_field_value = 2,
 	},
 #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
@@ -675,6 +673,47 @@ static inline void set_sys_caps_initialised(void)
 	sys_caps_initialised = true;
 }
 
+/*
+ * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated.
+ */
+static u64 __raw_read_system_reg(u32 sys_id)
+{
+	switch (sys_id) {
+	case SYS_ID_PFR0_EL1:		return (u64)read_cpuid(ID_PFR0_EL1);
+	case SYS_ID_PFR1_EL1:		return (u64)read_cpuid(ID_PFR1_EL1);
+	case SYS_ID_DFR0_EL1:		return (u64)read_cpuid(ID_DFR0_EL1);
+	case SYS_ID_MMFR0_EL1:		return (u64)read_cpuid(ID_MMFR0_EL1);
+	case SYS_ID_MMFR1_EL1:		return (u64)read_cpuid(ID_MMFR1_EL1);
+	case SYS_ID_MMFR2_EL1:		return (u64)read_cpuid(ID_MMFR2_EL1);
+	case SYS_ID_MMFR3_EL1:		return (u64)read_cpuid(ID_MMFR3_EL1);
+	case SYS_ID_ISAR0_EL1:		return (u64)read_cpuid(ID_ISAR0_EL1);
+	case SYS_ID_ISAR1_EL1:		return (u64)read_cpuid(ID_ISAR1_EL1);
+	case SYS_ID_ISAR2_EL1:		return (u64)read_cpuid(ID_ISAR2_EL1);
+	case SYS_ID_ISAR3_EL1:		return (u64)read_cpuid(ID_ISAR3_EL1);
+	case SYS_ID_ISAR4_EL1:		return (u64)read_cpuid(ID_ISAR4_EL1);
+	case SYS_ID_ISAR5_EL1:		return (u64)read_cpuid(ID_ISAR4_EL1);
+	case SYS_MVFR0_EL1:		return (u64)read_cpuid(MVFR0_EL1);
+	case SYS_MVFR1_EL1:		return (u64)read_cpuid(MVFR1_EL1);
+	case SYS_MVFR2_EL1:		return (u64)read_cpuid(MVFR2_EL1);
+
+	case SYS_ID_AA64PFR0_EL1:	return (u64)read_cpuid(ID_AA64PFR0_EL1);
+	case SYS_ID_AA64PFR1_EL1:	return (u64)read_cpuid(ID_AA64PFR0_EL1);
+	case SYS_ID_AA64DFR0_EL1:	return (u64)read_cpuid(ID_AA64DFR0_EL1);
+	case SYS_ID_AA64DFR1_EL1:	return (u64)read_cpuid(ID_AA64DFR0_EL1);
+	case SYS_ID_AA64MMFR0_EL1:	return (u64)read_cpuid(ID_AA64MMFR0_EL1);
+	case SYS_ID_AA64MMFR1_EL1:	return (u64)read_cpuid(ID_AA64MMFR1_EL1);
+	case SYS_ID_AA64ISAR0_EL1:	return (u64)read_cpuid(ID_AA64ISAR0_EL1);
+	case SYS_ID_AA64ISAR1_EL1:	return (u64)read_cpuid(ID_AA64ISAR1_EL1);
+
+	case SYS_CNTFRQ_EL0:		return (u64)read_cpuid(CNTFRQ_EL0);
+	case SYS_CTR_EL0:		return (u64)read_cpuid(CTR_EL0);
+	case SYS_DCZID_EL0:		return (u64)read_cpuid(DCZID_EL0);
+	default:
+		BUG();
+		return 0;
+	}
+}
+
 /*
  * Park the CPU which doesn't have the capability as advertised
  * by the system.
@@ -719,13 +758,13 @@ void verify_local_cpu_capabilities(void)
 
 	caps = arm64_features;
 	for (i = 0; caps[i].desc; i++) {
-		if (!cpus_have_cap(caps[i].capability))
+		if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
 			continue;
 		/*
 		 * If the new CPU misses an advertised feature, we cannot proceed
 		 * further, park the cpu.
 		 */
-		if (!caps[i].matches(&caps[i]))
+		if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
 			fail_incapable_cpu("arm64_features", &caps[i]);
 		if (caps[i].enable)
 			caps[i].enable(NULL);
-- 
cgit v1.2.3-58-ga151


From 37b01d53ceefa390d6eee7a82f3c156b64951bf3 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:52 +0100
Subject: arm64/HWCAP: Use system wide safe values

Extend struct arm64_cpu_capabilities to handle the HWCAP detection
and make use of the system wide value of the feature registers for
a reliable set of HWCAPs.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cpufeature.h |   2 +
 arch/arm64/include/asm/hwcap.h      |   8 ++
 arch/arm64/kernel/cpufeature.c      | 163 ++++++++++++++++++++----------------
 3 files changed, 101 insertions(+), 72 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index eec584e7c701..ca4621033ca0 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -81,6 +81,8 @@ struct arm64_cpu_capabilities {
 			u32 sys_reg;
 			int field_pos;
 			int min_field_value;
+			int hwcap_type;
+			unsigned long hwcap;
 		};
 	};
 };
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 0ad735166d9f..400b80b49595 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -52,6 +52,14 @@
 extern unsigned int compat_elf_hwcap, compat_elf_hwcap2;
 #endif
 
+enum {
+	CAP_HWCAP = 1,
+#ifdef CONFIG_COMPAT
+	CAP_COMPAT_HWCAP,
+	CAP_COMPAT_HWCAP2,
+#endif
+};
+
 extern unsigned long elf_hwcap;
 #endif
 #endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 8aff8fcbeb96..dd6997eed61f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -628,6 +628,89 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{},
 };
 
+#define HWCAP_CAP(reg, field, min_value, type, cap)		\
+	{							\
+		.desc = #cap,					\
+		.matches = has_cpuid_feature,			\
+		.sys_reg = reg,					\
+		.field_pos = field,				\
+		.min_field_value = min_value,			\
+		.hwcap_type = type,				\
+		.hwcap = cap,					\
+	}
+
+static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 2, CAP_HWCAP, HWCAP_PMULL),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 1, CAP_HWCAP, HWCAP_AES),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 1, CAP_HWCAP, HWCAP_SHA1),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32),
+	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS),
+#ifdef CONFIG_COMPAT
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2),
+	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32),
+#endif
+	{},
+};
+
+static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+	switch (cap->hwcap_type) {
+	case CAP_HWCAP:
+		elf_hwcap |= cap->hwcap;
+		break;
+#ifdef CONFIG_COMPAT
+	case CAP_COMPAT_HWCAP:
+		compat_elf_hwcap |= (u32)cap->hwcap;
+		break;
+	case CAP_COMPAT_HWCAP2:
+		compat_elf_hwcap2 |= (u32)cap->hwcap;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		break;
+	}
+}
+
+/* Check if we have a particular HWCAP enabled */
+static bool cpus_have_hwcap(const struct arm64_cpu_capabilities *cap)
+{
+	bool rc;
+
+	switch (cap->hwcap_type) {
+	case CAP_HWCAP:
+		rc = (elf_hwcap & cap->hwcap) != 0;
+		break;
+#ifdef CONFIG_COMPAT
+	case CAP_COMPAT_HWCAP:
+		rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0;
+		break;
+	case CAP_COMPAT_HWCAP2:
+		rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0;
+		break;
+#endif
+	default:
+		WARN_ON(1);
+		rc = false;
+	}
+
+	return rc;
+}
+
+static void setup_cpu_hwcaps(void)
+{
+	int i;
+	const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
+
+	for (i = 0; hwcaps[i].desc; i++)
+		if (hwcaps[i].matches(&hwcaps[i]))
+			cap_set_hwcap(&hwcaps[i]);
+}
+
 void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info)
 {
@@ -769,6 +852,13 @@ void verify_local_cpu_capabilities(void)
 		if (caps[i].enable)
 			caps[i].enable(NULL);
 	}
+
+	for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) {
+		if (!cpus_have_hwcap(&caps[i]))
+			continue;
+		if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))
+			fail_incapable_cpu("arm64_hwcaps", &caps[i]);
+	}
 }
 
 #else	/* !CONFIG_HOTPLUG_CPU */
@@ -787,13 +877,12 @@ static void setup_feature_capabilities(void)
 
 void __init setup_cpu_features(void)
 {
-	u64 features;
-	s64 block;
 	u32 cwg;
 	int cls;
 
 	/* Set the CPU feature capabilies */
 	setup_feature_capabilities();
+	setup_cpu_hwcaps();
 
 	/* Advertise that we have computed the system capabilities */
 	set_sys_caps_initialised();
@@ -809,74 +898,4 @@ void __init setup_cpu_features(void)
 	if (L1_CACHE_BYTES < cls)
 		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
 			L1_CACHE_BYTES, cls);
-
-	/*
-	 * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks.
-	 * The blocks we test below represent incremental functionality
-	 * for non-negative values. Negative values are reserved.
-	 */
-	features = read_cpuid(ID_AA64ISAR0_EL1);
-	block = cpuid_feature_extract_field(features, 4);
-	if (block > 0) {
-		switch (block) {
-		default:
-		case 2:
-			elf_hwcap |= HWCAP_PMULL;
-		case 1:
-			elf_hwcap |= HWCAP_AES;
-		case 0:
-			break;
-		}
-	}
-
-	if (cpuid_feature_extract_field(features, 8) > 0)
-		elf_hwcap |= HWCAP_SHA1;
-
-	if (cpuid_feature_extract_field(features, 12) > 0)
-		elf_hwcap |= HWCAP_SHA2;
-
-	if (cpuid_feature_extract_field(features, 16) > 0)
-		elf_hwcap |= HWCAP_CRC32;
-
-	block = cpuid_feature_extract_field(features, 20);
-	if (block > 0) {
-		switch (block) {
-		default:
-		case 2:
-			elf_hwcap |= HWCAP_ATOMICS;
-		case 1:
-			/* RESERVED */
-		case 0:
-			break;
-		}
-	}
-
-#ifdef CONFIG_COMPAT
-	/*
-	 * ID_ISAR5_EL1 carries similar information as above, but pertaining to
-	 * the AArch32 32-bit execution state.
-	 */
-	features = read_cpuid(ID_ISAR5_EL1);
-	block = cpuid_feature_extract_field(features, 4);
-	if (block > 0) {
-		switch (block) {
-		default:
-		case 2:
-			compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL;
-		case 1:
-			compat_elf_hwcap2 |= COMPAT_HWCAP2_AES;
-		case 0:
-			break;
-		}
-	}
-
-	if (cpuid_feature_extract_field(features, 8) > 0)
-		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1;
-
-	if (cpuid_feature_extract_field(features, 12) > 0)
-		compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2;
-
-	if (cpuid_feature_extract_field(features, 16) > 0)
-		compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32;
-#endif
 }
-- 
cgit v1.2.3-58-ga151


From fe80f9f2da1006a4308c2bc018ee1d67f10dd8d0 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:53 +0100
Subject: arm64: Move FP/ASIMD hwcap handling to common code

The FP/ASIMD is detected in fpsimd_init(), which is built-in
unconditionally. Lets move the hwcap handling to the central place.

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c |  2 ++
 arch/arm64/kernel/fpsimd.c     | 16 +++++-----------
 2 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index dd6997eed61f..d0d607452e1d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -646,6 +646,8 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 1, CAP_HWCAP, HWCAP_SHA2),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 1, CAP_HWCAP, HWCAP_CRC32),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 2, CAP_HWCAP, HWCAP_ATOMICS),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 0, CAP_HWCAP, HWCAP_FP),
+	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 0, CAP_HWCAP, HWCAP_ASIMD),
 #ifdef CONFIG_COMPAT
 	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL),
 	HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES),
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index c56956a16d3f..4c46c54a3ad7 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -332,21 +332,15 @@ static inline void fpsimd_hotplug_init(void) { }
  */
 static int __init fpsimd_init(void)
 {
-	u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
-
-	if (pfr & (0xf << 16)) {
+	if (elf_hwcap & HWCAP_FP) {
+		fpsimd_pm_init();
+		fpsimd_hotplug_init();
+	} else {
 		pr_notice("Floating-point is not implemented\n");
-		return 0;
 	}
-	elf_hwcap |= HWCAP_FP;
 
-	if (pfr & (0xf << 20))
+	if (!(elf_hwcap & HWCAP_ASIMD))
 		pr_notice("Advanced SIMD is not implemented\n");
-	else
-		elf_hwcap |= HWCAP_ASIMD;
-
-	fpsimd_pm_init();
-	fpsimd_hotplug_init();
 
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From 3085bb01b40676d946a13064483ab2819ae3b010 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:54 +0100
Subject: arm64/debug: Make use of the system wide safe value

Use the system wide value of ID_AA64DFR0 to make safer decisions

Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/hw_breakpoint.h | 9 +++++++--
 arch/arm64/kernel/debug-monitors.c     | 6 ++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 4c47cb2fbb52..e54415ec6935 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -17,6 +17,7 @@
 #define __ASM_HW_BREAKPOINT_H
 
 #include <asm/cputype.h>
+#include <asm/cpufeature.h>
 
 #ifdef __KERNEL__
 
@@ -137,13 +138,17 @@ extern struct pmu perf_ops_bp;
 /* Determine number of BRP registers available. */
 static inline int get_num_brps(void)
 {
-	return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
+	return 1 +
+		cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+						ID_AA64DFR0_BRPS_SHIFT);
 }
 
 /* Determine number of WRP registers available. */
 static inline int get_num_wrps(void)
 {
-	return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
+	return 1 +
+		cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+						ID_AA64DFR0_WRPS_SHIFT);
 }
 
 #endif	/* __KERNEL__ */
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 8eef30f92651..1429044b143c 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -26,14 +26,16 @@
 #include <linux/stat.h>
 #include <linux/uaccess.h>
 
-#include <asm/debug-monitors.h>
+#include <asm/cpufeature.h>
 #include <asm/cputype.h>
+#include <asm/debug-monitors.h>
 #include <asm/system_misc.h>
 
 /* Determine debug architecture. */
 u8 debug_monitors_arch(void)
 {
-	return read_cpuid(ID_AA64DFR0_EL1) & 0xf;
+	return cpuid_feature_extract_field(read_system_reg(SYS_ID_AA64DFR0_EL1),
+						ID_AA64DFR0_DEBUGVER_SHIFT);
 }
 
 /*
-- 
cgit v1.2.3-58-ga151


From 4db8e5ea6b07ae83e4361aee3f967f7126e01fa4 Mon Sep 17 00:00:00 2001
From: "Suzuki K. Poulose" <suzuki.poulose@arm.com>
Date: Mon, 19 Oct 2015 14:24:55 +0100
Subject: arm64/kvm: Make use of the system wide safe values

Use the system wide safe value from the new API for safer
decisions

Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: kvmarm@lists.cs.columbia.edu
Signed-off-by: Suzuki K. Poulose <suzuki.poulose@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Tested-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kvm/reset.c    |  2 +-
 arch/arm64/kvm/sys_regs.c | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 91cf5350b328..f34745cb3d23 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -53,7 +53,7 @@ static bool cpu_has_32bit_el1(void)
 {
 	u64 pfr0;
 
-	pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+	pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
 	return !!(pfr0 & 0x20);
 }
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d03d3af17e7e..87a64e8db04c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -693,13 +693,13 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu,
 	if (p->is_write) {
 		return ignore_write(vcpu, p);
 	} else {
-		u64 dfr = read_cpuid(ID_AA64DFR0_EL1);
-		u64 pfr = read_cpuid(ID_AA64PFR0_EL1);
-		u32 el3 = !!((pfr >> 12) & 0xf);
+		u64 dfr = read_system_reg(SYS_ID_AA64DFR0_EL1);
+		u64 pfr = read_system_reg(SYS_ID_AA64PFR0_EL1);
+		u32 el3 = !!cpuid_feature_extract_field(pfr, ID_AA64PFR0_EL3_SHIFT);
 
-		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> 20) & 0xf) << 28) |
-					  (((dfr >> 12) & 0xf) << 24) |
-					  (((dfr >> 28) & 0xf) << 20) |
+		*vcpu_reg(vcpu, p->Rt) = ((((dfr >> ID_AA64DFR0_WRPS_SHIFT) & 0xf) << 28) |
+					  (((dfr >> ID_AA64DFR0_BRPS_SHIFT) & 0xf) << 24) |
+					  (((dfr >> ID_AA64DFR0_CTX_CMPS_SHIFT) & 0xf) << 20) |
 					  (6 << 16) | (el3 << 14) | (el3 << 12));
 		return true;
 	}
-- 
cgit v1.2.3-58-ga151


From 9299b24712400d1cfe3dcf3e5dcb6cac2940df56 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 30 Jul 2015 16:36:25 +0100
Subject: arm64: Constify hwcap name string arrays

The hwcap string arrays used for generating the contents of
/proc/cpuinfo are currently arrays of non-const pointers.

There's no need for these pointers to be mutable, so this patch makes
them const so that they can be moved to .rodata.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpuinfo.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 789fbea92b79..706679d0a0b4 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -48,7 +48,7 @@ static char *icache_policy_str[] = {
 
 unsigned long __icache_flags;
 
-static const char *hwcap_str[] = {
+static const char *const hwcap_str[] = {
 	"fp",
 	"asimd",
 	"evtstrm",
@@ -62,7 +62,7 @@ static const char *hwcap_str[] = {
 };
 
 #ifdef CONFIG_COMPAT
-static const char *compat_hwcap_str[] = {
+static const char *const compat_hwcap_str[] = {
 	"swp",
 	"half",
 	"thumb",
@@ -87,7 +87,7 @@ static const char *compat_hwcap_str[] = {
 	"evtstrm"
 };
 
-static const char *compat_hwcap2_str[] = {
+static const char *const compat_hwcap2_str[] = {
 	"aes",
 	"pmull",
 	"sha1",
-- 
cgit v1.2.3-58-ga151


From fde4a59fc1c55709b96d0f07110895f20015b6cc Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 27 Oct 2015 12:05:54 +0000
Subject: arm64: cpufeature: declare enable_cpu_capabilities as static

enable_cpu_capabilities is only called from within cpufeature.c, so it
can be declared static.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d0d607452e1d..504526fa8129 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -732,7 +732,7 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
  * Run through the enabled capabilities and enable() it on all active
  * CPUs
  */
-void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
 	int i;
 
-- 
cgit v1.2.3-58-ga151


From 59f2413573e4ee1e76062beef4a359156500db94 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 27 Oct 2015 12:05:55 +0000
Subject: arm64: cachetype: fix definitions of ICACHEF_* flags

test_bit and set_bit take the bit number to operate on, rather than a
mask. This patch fixes the ICACHEF_* definitions so that they represent
the bit index in __icache_flags as opposed to the mask returned by the
BIT macro.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cachetype.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h
index da2fc9e3cedd..f5588692f1d4 100644
--- a/arch/arm64/include/asm/cachetype.h
+++ b/arch/arm64/include/asm/cachetype.h
@@ -34,8 +34,8 @@
 
 #define CTR_L1IP(ctr)	(((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
 
-#define ICACHEF_ALIASING	BIT(0)
-#define ICACHEF_AIVIVT		BIT(1)
+#define ICACHEF_ALIASING	0
+#define ICACHEF_AIVIVT		1
 
 extern unsigned long __icache_flags;
 
-- 
cgit v1.2.3-58-ga151


From f23bef34d34b6325916daddc4cfdeee53d5139e6 Mon Sep 17 00:00:00 2001
From: Alexander Kuleshov <kuleshovmail@gmail.com>
Date: Mon, 26 Oct 2015 17:26:57 +0600
Subject: arm64/mm: use PAGE_ALIGNED instead of IS_ALIGNED

The <linux/mm.h> already provides the PAGE_ALIGNED macro. Let's
use this macro instead of IS_ALIGNED and passing PAGE_SIZE directly.

Signed-off-by: Alexander Kuleshov <kuleshovmail@gmail.com>
Acked-by: Laura Abbott <laura@labbott.name>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/pageattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index e47ed1c5dce1..3571c7309c5e 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -45,7 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
 	int ret;
 	struct page_change_data data;
 
-	if (!IS_ALIGNED(addr, PAGE_SIZE)) {
+	if (!PAGE_ALIGNED(addr)) {
 		start &= PAGE_MASK;
 		end = start + size;
 		WARN_ON_ONCE(1);
-- 
cgit v1.2.3-58-ga151


From f90df5e27d978c492c4d911476622a7413621213 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 26 Oct 2015 11:48:16 +0800
Subject: arm64: make Timer Interrupt Frequency selectable

It allows a selectable timer interrupt frequency of 100, 250, 300 and 1000 HZ.
We will get better performance when choose a suitable frequency in some scene.

Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Kconfig | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c62fb0393567..4d8a5b256f65 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -454,10 +454,7 @@ config HOTPLUG_CPU
 	  can be controlled through /sys/devices/system/cpu.
 
 source kernel/Kconfig.preempt
-
-config HZ
-	int
-	default 100
+source kernel/Kconfig.hz
 
 config ARCH_HAS_HOLES_MEMORYMODEL
 	def_bool y if SPARSEMEM
-- 
cgit v1.2.3-58-ga151


From 6e4a0f2b5c56af6be43b546df16b1ece7df537d8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 26 Oct 2015 12:53:17 +0900
Subject: arm64: remove bogus TASK_SIZE_64 check

The comparison between TASK_SIZE_64 and MODULES_VADDR does not
make any sense on arm64, it is simply something that has been
carried over from the ARM port which arm64 is based on. So drop it.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/memory.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 11ccf6c09533..37f3538a2924 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -70,10 +70,6 @@
 
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
 
-#if TASK_SIZE_64 > MODULES_VADDR
-#error Top of 64-bit user space clashes with start of module space
-#endif
-
 /*
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
-- 
cgit v1.2.3-58-ga151


From 97303480753e48fb313dc0e15daaf11b0451cdb8 Mon Sep 17 00:00:00 2001
From: Tirumalesh Chalamarla <tchalamarla@cavium.com>
Date: Tue, 22 Sep 2015 19:59:48 +0200
Subject: arm64: Increase the max granular size

Increase the standard cacheline size to avoid having locks in the same
cacheline.

Cavium's ThunderX core implements cache lines of 128 byte size. With
current granulare size of 64 bytes (L1_CACHE_SHIFT=6) two locks could
share the same cache line leading a performance degradation.
Increasing the size fixes that.

Increasing the size has no negative impact to cache invalidation on
systems with a smaller cache line. There is an impact on memory usage,
but that's not too important for arm64 use cases.

Signed-off-by: Tirumalesh Chalamarla <tchalamarla@cavium.com>
Signed-off-by: Robert Richter <rrichter@cavium.com>
Acked-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/cache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index bde449936e2f..5082b30bc2c0 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -18,7 +18,7 @@
 
 #include <asm/cachetype.h>
 
-#define L1_CACHE_SHIFT		6
+#define L1_CACHE_SHIFT		7
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
 /*
-- 
cgit v1.2.3-58-ga151


From 5accd17d0eb523350c9ef754d655e379c9bb93b3 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 22 Oct 2015 15:41:52 +0100
Subject: arm64: Fix compat register mappings

For reasons not entirely apparent, but now enshrined in history, the
architectural mapping of AArch32 banked registers to AArch64 registers
actually orders SP_<mode> and LR_<mode> backwards compared to the
intuitive r13/r14 order, for all modes except FIQ.

Fix the compat_<reg>_<mode> macros accordingly, in the hope of avoiding
subtle bugs with KVM and AArch32 guests.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/ptrace.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 536274ed292e..e9e5467e0bf4 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -83,14 +83,14 @@
 #define compat_sp	regs[13]
 #define compat_lr	regs[14]
 #define compat_sp_hyp	regs[15]
-#define compat_sp_irq	regs[16]
-#define compat_lr_irq	regs[17]
-#define compat_sp_svc	regs[18]
-#define compat_lr_svc	regs[19]
-#define compat_sp_abt	regs[20]
-#define compat_lr_abt	regs[21]
-#define compat_sp_und	regs[22]
-#define compat_lr_und	regs[23]
+#define compat_lr_irq	regs[16]
+#define compat_sp_irq	regs[17]
+#define compat_lr_svc	regs[18]
+#define compat_sp_svc	regs[19]
+#define compat_lr_abt	regs[20]
+#define compat_sp_abt	regs[21]
+#define compat_lr_und	regs[22]
+#define compat_sp_und	regs[23]
 #define compat_r8_fiq	regs[24]
 #define compat_r9_fiq	regs[25]
 #define compat_r10_fiq	regs[26]
-- 
cgit v1.2.3-58-ga151


From 86a5906e4d1df1ec160fa9e18b6f2277a5216c60 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 27 Oct 2015 17:40:26 +0000
Subject: arm64: Fix build with CONFIG_ZONE_DMA=n

Trying to build with CONFIG_ZONE_DMA=n leaves visible references
to the now-undefined ZONE_DMA, resulting in a syntax error.

Hide the references behind an #ifdef instead of using IS_ENABLED.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 7a1f9a05dc82..17bf39ac83ba 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -86,10 +86,10 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 	memset(zone_size, 0, sizeof(zone_size));
 
 	/* 4GB maximum for 32-bit only capable devices */
-	if (IS_ENABLED(CONFIG_ZONE_DMA)) {
-		max_dma = PFN_DOWN(arm64_dma_phys_limit);
-		zone_size[ZONE_DMA] = max_dma - min;
-	}
+#ifdef CONFIG_ZONE_DMA
+	max_dma = PFN_DOWN(arm64_dma_phys_limit);
+	zone_size[ZONE_DMA] = max_dma - min;
+#endif
 	zone_size[ZONE_NORMAL] = max - max_dma;
 
 	memcpy(zhole_size, zone_size, sizeof(zhole_size));
@@ -101,11 +101,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 		if (start >= max)
 			continue;
 
-		if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) {
+#ifdef CONFIG_ZONE_DMA
+		if (start < max_dma) {
 			unsigned long dma_end = min(end, max_dma);
 			zhole_size[ZONE_DMA] -= dma_end - start;
 		}
-
+#endif
 		if (end > max_dma) {
 			unsigned long normal_end = min(end, max);
 			unsigned long normal_start = max(start, max_dma);
-- 
cgit v1.2.3-58-ga151


From cb083816ab5ac3d10a9417527f07fc5962cc3808 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 26 Oct 2015 21:42:33 +0000
Subject: arm64: page-align sections for DEBUG_RODATA

A kernel built with DEBUG_RO_DATA && !CONFIG_DEBUG_ALIGN_RODATA doesn't
have .text aligned to a page boundary, though fixup_executable works at
page-granularity thanks to its use of create_mapping. If .text is not
page-aligned, the first page it exists in may be marked non-executable,
leading to failures when an attempt is made to execute code in said
page.

This patch upgrades ALIGN_DEBUG_RO and ALIGN_DEBUG_RO_MIN to force page
alignment for DEBUG_RO_DATA && !CONFIG_DEBUG_ALIGN_RODATA kernels,
ensuring that all sections with specific RWX permission requirements are
mapped with the correct permissions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Laura Abbott <laura@labbott.name>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Fixes: da141706aea52c1a ("arm64: add better page protections to arm64")
Cc: <stable@vger.kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/vmlinux.lds.S | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 8a5d97b461c0..1ee2c3937d4e 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -61,9 +61,12 @@ PECOFF_FILE_ALIGNMENT = 0x200;
 #define PECOFF_EDATA_PADDING
 #endif
 
-#ifdef CONFIG_DEBUG_ALIGN_RODATA
+#if defined(CONFIG_DEBUG_ALIGN_RODATA)
 #define ALIGN_DEBUG_RO			. = ALIGN(1<<SECTION_SHIFT);
 #define ALIGN_DEBUG_RO_MIN(min)		ALIGN_DEBUG_RO
+#elif defined(CONFIG_DEBUG_RODATA)
+#define ALIGN_DEBUG_RO			. = ALIGN(1<<PAGE_SHIFT);
+#define ALIGN_DEBUG_RO_MIN(min)		ALIGN_DEBUG_RO
 #else
 #define ALIGN_DEBUG_RO
 #define ALIGN_DEBUG_RO_MIN(min)		. = ALIGN(min);
-- 
cgit v1.2.3-58-ga151


From bf457786f569cc480629d7855cac1fd1173ac009 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 23 Oct 2015 16:48:14 +0200
Subject: arm64/efi: move arm64 specific stub C code to libstub

Now that we added special handling to the C files in libstub, move
the one remaining arm64 specific EFI stub C file to libstub as
well, so that it gets the same treatment. This should prevent future
changes from resulting in binaries that may execute incorrectly in
UEFI context.

With efi-entry.S the only remaining EFI stub source file under
arch/arm64, we can also simplify the Makefile logic somewhat.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/Makefile                |  9 +---
 arch/arm64/kernel/efi-stub.c              | 68 -------------------------------
 drivers/firmware/efi/libstub/Makefile     |  3 ++
 drivers/firmware/efi/libstub/arm64-stub.c | 68 +++++++++++++++++++++++++++++++
 4 files changed, 73 insertions(+), 75 deletions(-)
 delete mode 100644 arch/arm64/kernel/efi-stub.c
 create mode 100644 drivers/firmware/efi/libstub/arm64-stub.c

(limited to 'arch')

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 1b6bda2ff102..474691f8b13a 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -4,11 +4,8 @@
 
 CPPFLAGS_vmlinux.lds	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 AFLAGS_head.o		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
-CFLAGS_efi-stub.o 	:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 CFLAGS_armv8_deprecated.o := -I$(src)
 
-KASAN_SANITIZE_efi-stub.o	:= n
-
 CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_insn.o = -pg
 CFLAGS_REMOVE_return_address.o = -pg
@@ -22,9 +19,7 @@ arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o
 
-stub-obj				:= efi-stub.o efi-entry.o
-extra-y					:= $(stub-obj)
-stub-obj				:= $(patsubst %.o,%.stub.o,$(stub-obj))
+extra-$(CONFIG_EFI)			:= efi-entry.o
 
 OBJCOPYFLAGS := --prefix-symbols=__efistub_
 $(obj)/%.stub.o: $(obj)/%.o FORCE
@@ -42,7 +37,7 @@ arm64-obj-$(CONFIG_CPU_PM)		+= sleep.o suspend.o
 arm64-obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
-arm64-obj-$(CONFIG_EFI)			+= efi.o $(stub-obj)
+arm64-obj-$(CONFIG_EFI)			+= efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c
deleted file mode 100644
index 816120ece6bc..000000000000
--- a/arch/arm64/kernel/efi-stub.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2013, 2014 Linaro Ltd;  <roy.franz@linaro.org>
- *
- * This file implements the EFI boot stub for the arm64 kernel.
- * Adapted from ARM version by Mark Salter <msalter@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#include <linux/efi.h>
-#include <asm/efi.h>
-#include <asm/sections.h>
-
-efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
-					unsigned long *image_addr,
-					unsigned long *image_size,
-					unsigned long *reserve_addr,
-					unsigned long *reserve_size,
-					unsigned long dram_base,
-					efi_loaded_image_t *image)
-{
-	efi_status_t status;
-	unsigned long kernel_size, kernel_memsize = 0;
-	unsigned long nr_pages;
-	void *old_image_addr = (void *)*image_addr;
-
-	/* Relocate the image, if required. */
-	kernel_size = _edata - _text;
-	if (*image_addr != (dram_base + TEXT_OFFSET)) {
-		kernel_memsize = kernel_size + (_end - _edata);
-
-		/*
-		 * First, try a straight allocation at the preferred offset.
-		 * This will work around the issue where, if dram_base == 0x0,
-		 * efi_low_alloc() refuses to allocate at 0x0 (to prevent the
-		 * address of the allocation to be mistaken for a FAIL return
-		 * value or a NULL pointer). It will also ensure that, on
-		 * platforms where the [dram_base, dram_base + TEXT_OFFSET)
-		 * interval is partially occupied by the firmware (like on APM
-		 * Mustang), we can still place the kernel at the address
-		 * 'dram_base + TEXT_OFFSET'.
-		 */
-		*image_addr = *reserve_addr = dram_base + TEXT_OFFSET;
-		nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) /
-			   EFI_PAGE_SIZE;
-		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
-					EFI_LOADER_DATA, nr_pages,
-					(efi_physical_addr_t *)reserve_addr);
-		if (status != EFI_SUCCESS) {
-			kernel_memsize += TEXT_OFFSET;
-			status = efi_low_alloc(sys_table_arg, kernel_memsize,
-					       SZ_2M, reserve_addr);
-
-			if (status != EFI_SUCCESS) {
-				pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
-				return status;
-			}
-			*image_addr = *reserve_addr + TEXT_OFFSET;
-		}
-		memcpy((void *)*image_addr, old_image_addr, kernel_size);
-		*reserve_size = kernel_memsize;
-	}
-
-
-	return EFI_SUCCESS;
-}
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index bca9a76cbd33..92ae557abbbc 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -34,6 +34,9 @@ $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
 lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o string.o \
 				   $(patsubst %.c,lib-%.o,$(arm-deps))
 
+lib-$(CONFIG_ARM64)		+= arm64-stub.o
+CFLAGS_arm64-stub.o 		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
+
 #
 # arm64 puts the stub in the kernel proper, which will unnecessarily retain all
 # code indefinitely unless it is annotated as __init/__initdata/__initconst etc.
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
new file mode 100644
index 000000000000..816120ece6bc
--- /dev/null
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2013, 2014 Linaro Ltd;  <roy.franz@linaro.org>
+ *
+ * This file implements the EFI boot stub for the arm64 kernel.
+ * Adapted from ARM version by Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/efi.h>
+#include <asm/efi.h>
+#include <asm/sections.h>
+
+efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
+					unsigned long *image_addr,
+					unsigned long *image_size,
+					unsigned long *reserve_addr,
+					unsigned long *reserve_size,
+					unsigned long dram_base,
+					efi_loaded_image_t *image)
+{
+	efi_status_t status;
+	unsigned long kernel_size, kernel_memsize = 0;
+	unsigned long nr_pages;
+	void *old_image_addr = (void *)*image_addr;
+
+	/* Relocate the image, if required. */
+	kernel_size = _edata - _text;
+	if (*image_addr != (dram_base + TEXT_OFFSET)) {
+		kernel_memsize = kernel_size + (_end - _edata);
+
+		/*
+		 * First, try a straight allocation at the preferred offset.
+		 * This will work around the issue where, if dram_base == 0x0,
+		 * efi_low_alloc() refuses to allocate at 0x0 (to prevent the
+		 * address of the allocation to be mistaken for a FAIL return
+		 * value or a NULL pointer). It will also ensure that, on
+		 * platforms where the [dram_base, dram_base + TEXT_OFFSET)
+		 * interval is partially occupied by the firmware (like on APM
+		 * Mustang), we can still place the kernel at the address
+		 * 'dram_base + TEXT_OFFSET'.
+		 */
+		*image_addr = *reserve_addr = dram_base + TEXT_OFFSET;
+		nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) /
+			   EFI_PAGE_SIZE;
+		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
+					EFI_LOADER_DATA, nr_pages,
+					(efi_physical_addr_t *)reserve_addr);
+		if (status != EFI_SUCCESS) {
+			kernel_memsize += TEXT_OFFSET;
+			status = efi_low_alloc(sys_table_arg, kernel_memsize,
+					       SZ_2M, reserve_addr);
+
+			if (status != EFI_SUCCESS) {
+				pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
+				return status;
+			}
+			*image_addr = *reserve_addr + TEXT_OFFSET;
+		}
+		memcpy((void *)*image_addr, old_image_addr, kernel_size);
+		*reserve_size = kernel_memsize;
+	}
+
+
+	return EFI_SUCCESS;
+}
-- 
cgit v1.2.3-58-ga151


From aa644fa64c25ab2231a0fa9464892f5579d4e161 Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Mon, 19 Oct 2015 17:55:49 +0100
Subject: ARM64: Enable multi-core scheduler support by default

Make sure that the task scheduler domain hierarchy is set-up correctly
on systems with single or multi-cluster topology.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Acked-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index c8ccda16e079..5f760347aee2 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -51,6 +51,7 @@ CONFIG_PCI=y
 CONFIG_PCI_MSI=y
 CONFIG_PCI_XGENE=y
 CONFIG_SMP=y
+CONFIG_SCHED_MC=y
 CONFIG_PREEMPT=y
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
-- 
cgit v1.2.3-58-ga151