summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 08:36:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 08:36:13 -0700
commita5819099f601c1af5b86b1f5921a56859e45b19a (patch)
tree797e26cbdc5d45e97dd3cd92b27c754c8702d9ec /arch
parent0c3836482481200ead7b416ca80c68a29cfdaabd (diff)
parent94a2bc0f611cd9fa4d26e4679bf7ea4b01b12d56 (diff)
Merge branch 'runtime-constants'
Merge runtime constants infrastructure with implementations for x86 and arm64. This is one of four branches that came out of me looking at profiles of my kernel build filesystem load on my 128-core Altra arm64 system, where pathname walking and the user copies (particularly strncpy_from_user() for fetching the pathname from user space) is very hot. This is a very specialized "instruction alternatives" model where the dentry hash pointer and hash count will be constants for the lifetime of the kernel, but the allocation are not static but done early during the kernel boot. In order to avoid the pointer load and dynamic shift, we just rewrite the constants in the instructions in place. We can't use the "generic" alternative instructions infrastructure, because different architectures do it very differently, and it's actually simpler to just have very specific helpers, with a fallback to the generic ("old") model of just using variables for architectures that do not implement the runtime constant patching infrastructure. Link: https://lore.kernel.org/all/CAHk-=widPe38fUNjUOmX11ByDckaeEo9tN4Eiyke9u1SAtu9sA@mail.gmail.com/ * runtime-constants: arm64: add 'runtime constant' support runtime constants: add x86 architecture support runtime constants: add default dummy infrastructure vfs: dcache: move hashlen_hash() from callers into d_hash()
Diffstat (limited to 'arch')
-rw-r--r--arch/arm64/include/asm/runtime-const.h88
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/include/asm/runtime-const.h61
-rw-r--r--arch/x86/kernel/vmlinux.lds.S3
4 files changed, 155 insertions, 0 deletions
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h
new file mode 100644
index 000000000000..be5915669d23
--- /dev/null
+++ b/arch/arm64/include/asm/runtime-const.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#include <asm/cacheflush.h>
+
+/* Sigh. You can still run arm64 in BE mode */
+#include <asm/byteorder.h>
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("1:\t" \
+ "movz %0, #0xcdef\n\t" \
+ "movk %0, #0x89ab, lsl #16\n\t" \
+ "movk %0, #0x4567, lsl #32\n\t" \
+ "movk %0, #0x0123, lsl #48\n\t" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret)); \
+ __ret; })
+
+#define runtime_const_shift_right_32(val, sym) ({ \
+ unsigned long __ret; \
+ asm_inline("1:\t" \
+ "lsr %w0,%w1,#12\n\t" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"r" (0u+(val))); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */
+static inline void __runtime_fixup_16(__le32 *p, unsigned int val)
+{
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffe0001f;
+ insn |= (val & 0xffff) << 5;
+ *p = cpu_to_le32(insn);
+}
+
+static inline void __runtime_fixup_caches(void *where, unsigned int insns)
+{
+ unsigned long va = (unsigned long)where;
+ caches_clean_inval_pou(va, va + 4*insns);
+}
+
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ __runtime_fixup_16(p, val);
+ __runtime_fixup_16(p+1, val >> 16);
+ __runtime_fixup_16(p+2, val >> 32);
+ __runtime_fixup_16(p+3, val >> 48);
+ __runtime_fixup_caches(where, 4);
+}
+
+/* Immediate value is 6 bits starting at bit #16 */
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffc0ffff;
+ insn |= (val & 63) << 16;
+ *p = cpu_to_le32(insn);
+ __runtime_fixup_caches(where, 1);
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 755a22d4f840..55a8e310ea12 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -264,6 +264,9 @@ SECTIONS
EXIT_DATA
}
+ RUNTIME_CONST(shift, d_hash_shift)
+ RUNTIME_CONST(ptr, dentry_hashtable)
+
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
new file mode 100644
index 000000000000..24e3a53ca255
--- /dev/null
+++ b/arch/x86/include/asm/runtime-const.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("mov %1,%0\n1:\n" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - %c2 - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"i" ((unsigned long)0x0123456789abcdefull), \
+ "i" (sizeof(long))); \
+ __ret; })
+
+// The 'typeof' will create at _least_ a 32-bit type, but
+// will happily also take a bigger type and the 'shrl' will
+// clear the upper bits
+#define runtime_const_shift_right_32(val, sym) ({ \
+ typeof(0u+(val)) __ret = (val); \
+ asm_inline("shrl $12,%k0\n1:\n" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - 1 - .\n\t" \
+ ".popsection" \
+ :"+r" (__ret)); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/*
+ * The text patching is trivial - you can only do this at init time,
+ * when the text section hasn't been marked RO, and before the text
+ * has ever been executed.
+ */
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ *(unsigned long *)where = val;
+}
+
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ *(unsigned char *)where = val;
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3509afc6a672..6e73403e874f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -357,6 +357,9 @@ SECTIONS
PERCPU_SECTION(INTERNODE_CACHE_BYTES)
#endif
+ RUNTIME_CONST(shift, d_hash_shift)
+ RUNTIME_CONST(ptr, dentry_hashtable)
+
. = ALIGN(PAGE_SIZE);
/* freed after init ends here */