summaryrefslogtreecommitdiff
path: root/arch/powerpc/include/asm
diff options
context:
space:
mode:
authorNaveen N Rao <naveen@kernel.org>2024-10-30 12:38:45 +0530
committerMichael Ellerman <mpe@ellerman.id.au>2024-10-31 11:00:54 +1100
commiteec37961a56aa4f3fe1c33ffd48eec7d1bb0c009 (patch)
tree23efd34558b2a4646ccec6797d3ffa5d71d08142 /arch/powerpc/include/asm
parent1198c9c689cfdaa2d08eb508c13ff116043f07b7 (diff)
powerpc64/ftrace: Move ftrace sequence out of line
Function profile sequence on powerpc includes two instructions at the beginning of each function: mflr r0 bl ftrace_caller The call to ftrace_caller() gets nop'ed out during kernel boot and is patched in when ftrace is enabled. Given the sequence, we cannot return from ftrace_caller with 'blr' as we need to keep LR and r0 intact. This results in link stack (return address predictor) imbalance when ftrace is enabled. To address that, we would like to use a three instruction sequence: mflr r0 bl ftrace_caller mtlr r0 Further more, to support DYNAMIC_FTRACE_WITH_CALL_OPS, we need to reserve two instruction slots before the function. This results in a total of five instruction slots to be reserved for ftrace use on each function that is traced. Move the function profile sequence out-of-line to minimize its impact. To do this, we reserve a single nop at function entry using -fpatchable-function-entry=1 and add a pass on vmlinux.o to determine the total number of functions that can be traced. This is then used to generate a .S file reserving the appropriate amount of space for use as ftrace stubs, which is built and linked into vmlinux. On bootup, the stub space is split into separate stubs per function and populated with the proper instruction sequence. A pointer to the associated stub is maintained in dyn_arch_ftrace. For modules, space for ftrace stubs is reserved from the generic module stub space. This is restricted to and enabled by default only on 64-bit powerpc, though there are some changes to accommodate 32-bit powerpc. This is done so that 32-bit powerpc could choose to opt into this based on further tests and benchmarks. As an example, after this patch, kernel functions will have a single nop at function entry: <kernel_clone>: addis r2,r12,467 addi r2,r2,-16028 nop mfocrf r11,8 ... When ftrace is enabled, the nop is converted to an unconditional branch to the stub associated with that function: <kernel_clone>: addis r2,r12,467 addi r2,r2,-16028 b ftrace_ool_stub_text_end+0x11b28 mfocrf r11,8 ... The associated stub: <ftrace_ool_stub_text_end+0x11b28>: mflr r0 bl ftrace_caller mtlr r0 b kernel_clone+0xc ... This change showed an improvement of ~10% in null_syscall benchmark on a Power 10 system with ftrace enabled. Signed-off-by: Naveen N Rao <naveen@kernel.org> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://patch.msgid.link/20241030070850.1361304-13-hbathini@linux.ibm.com
Diffstat (limited to 'arch/powerpc/include/asm')
-rw-r--r--arch/powerpc/include/asm/ftrace.h11
-rw-r--r--arch/powerpc/include/asm/module.h5
2 files changed, 16 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 278d4548e8f1..bdbafc668b20 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -24,6 +24,10 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
struct module;
struct dyn_ftrace;
struct dyn_arch_ftrace {
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ /* pointer to the associated out-of-line stub */
+ unsigned long ool_stub;
+#endif
};
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -130,6 +134,13 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; }
#ifdef CONFIG_FUNCTION_TRACER
extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+struct ftrace_ool_stub {
+ u32 insn[4];
+};
+extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_inittext[];
+extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_inittext_count;
+#endif
void ftrace_free_init_tramp(void);
unsigned long ftrace_call_adjust(unsigned long addr);
#else
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 300c777cc307..9ee70a4a0fde 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -47,6 +47,11 @@ struct mod_arch_specific {
#ifdef CONFIG_DYNAMIC_FTRACE
unsigned long tramp;
unsigned long tramp_regs;
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ struct ftrace_ool_stub *ool_stubs;
+ unsigned int ool_stub_count;
+ unsigned int ool_stub_index;
+#endif
#endif
};