From 79922b8009c074e30d3a97f5a24519f11814ad03 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 6 May 2014 21:56:17 -0400 Subject: ftrace: Optimize function graph to be called directly Function graph tracing is a bit different than the function tracers, as it is processed after either the ftrace_caller or ftrace_regs_caller and we only have one place to modify the jump to ftrace_graph_caller, the jump needs to happen after the restore of registeres. The function graph tracer is dependent on the function tracer, where even if the function graph tracing is going on by itself, the save and restore of registers is still done for function tracing regardless of if function tracing is happening, before it calls the function graph code. If there's no function tracing happening, it is possible to just call the function graph tracer directly, and avoid the wasted effort to save and restore regs for function tracing. This requires adding new flags to the dyn_ftrace records: FTRACE_FL_TRAMP FTRACE_FL_TRAMP_EN The first is set if the count for the record is one, and the ftrace_ops associated to that record has its own trampoline. That way the mcount code can call that trampoline directly. In the future, trampolines can be added to arbitrary ftrace_ops, where you can have two or more ftrace_ops registered to ftrace (like kprobes and perf) and if they are not tracing the same functions, then instead of doing a loop to check all registered ftrace_ops against their hashes, just call the ftrace_ops trampoline directly, which would call the registered ftrace_ops function directly. Without this patch perf showed: 0.05% hackbench [kernel.kallsyms] [k] ftrace_caller 0.05% hackbench [kernel.kallsyms] [k] arch_local_irq_save 0.05% hackbench [kernel.kallsyms] [k] native_sched_clock 0.04% hackbench [kernel.kallsyms] [k] __buffer_unlock_commit 0.04% hackbench [kernel.kallsyms] [k] preempt_trace 0.04% hackbench [kernel.kallsyms] [k] prepare_ftrace_return 0.04% hackbench [kernel.kallsyms] [k] __this_cpu_preempt_check 0.04% hackbench [kernel.kallsyms] [k] ftrace_graph_caller See that the ftrace_caller took up more time than the ftrace_graph_caller did. With this patch: 0.05% hackbench [kernel.kallsyms] [k] __buffer_unlock_commit 0.04% hackbench [kernel.kallsyms] [k] call_filter_check_discard 0.04% hackbench [kernel.kallsyms] [k] ftrace_graph_caller 0.04% hackbench [kernel.kallsyms] [k] sched_clock The ftrace_caller is no where to be found and ftrace_graph_caller still takes up the same percentage. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'include/linux/ftrace.h') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e5baa6b2c93f..11e18fd58b1a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -118,12 +118,15 @@ struct ftrace_ops { ftrace_func_t func; struct ftrace_ops *next; unsigned long flags; - int __percpu *disabled; void *private; + int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE + int trampolines; struct ftrace_hash *notrace_hash; struct ftrace_hash *filter_hash; + struct ftrace_hash *tramp_hash; struct mutex regex_lock; + unsigned long trampoline; #endif }; @@ -317,13 +320,15 @@ extern int ftrace_nr_registered_ops(void); * from tracing that function. */ enum { - FTRACE_FL_ENABLED = (1UL << 29), + FTRACE_FL_ENABLED = (1UL << 31), FTRACE_FL_REGS = (1UL << 30), - FTRACE_FL_REGS_EN = (1UL << 31) + FTRACE_FL_REGS_EN = (1UL << 29), + FTRACE_FL_TRAMP = (1UL << 28), + FTRACE_FL_TRAMP_EN = (1UL << 27), }; -#define FTRACE_REF_MAX_SHIFT 29 -#define FTRACE_FL_BITS 3 +#define FTRACE_REF_MAX_SHIFT 27 +#define FTRACE_FL_BITS 5 #define FTRACE_FL_MASKED_BITS ((1UL << FTRACE_FL_BITS) - 1) #define FTRACE_FL_MASK (FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT) #define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1) @@ -436,6 +441,10 @@ void ftrace_modify_all_code(int command); #define FTRACE_ADDR ((unsigned long)ftrace_caller) #endif +#ifndef FTRACE_GRAPH_ADDR +#define FTRACE_GRAPH_ADDR ((unsigned long)ftrace_graph_caller) +#endif + #ifndef FTRACE_REGS_ADDR #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS # define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) -- cgit v1.2.3-58-ga151