diff options
Diffstat (limited to 'kernel/trace/fgraph.c')
-rw-r--r-- | kernel/trace/fgraph.c | 1054 |
1 files changed, 858 insertions, 196 deletions
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index a130b2d898f7..fc205ad167a9 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -7,9 +7,11 @@ * * Highly modified by Steven Rostedt (VMware). */ +#include <linux/bits.h> #include <linux/jump_label.h> #include <linux/suspend.h> #include <linux/ftrace.h> +#include <linux/static_call.h> #include <linux/slab.h> #include <trace/events/sched.h> @@ -17,17 +19,447 @@ #include "ftrace_internal.h" #include "trace.h" -#ifdef CONFIG_DYNAMIC_FTRACE -#define ASSIGN_OPS_HASH(opsname, val) \ - .func_hash = val, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), -#else -#define ASSIGN_OPS_HASH(opsname, val) -#endif +/* + * FGRAPH_FRAME_SIZE: Size in bytes of the meta data on the shadow stack + * FGRAPH_FRAME_OFFSET: Size in long words of the meta data frame + */ +#define FGRAPH_FRAME_SIZE sizeof(struct ftrace_ret_stack) +#define FGRAPH_FRAME_OFFSET DIV_ROUND_UP(FGRAPH_FRAME_SIZE, sizeof(long)) + +/* + * On entry to a function (via function_graph_enter()), a new fgraph frame + * (ftrace_ret_stack) is pushed onto the stack as well as a word that + * holds a bitmask and a type (called "bitmap"). The bitmap is defined as: + * + * bits: 0 - 9 offset in words from the previous ftrace_ret_stack + * + * bits: 10 - 11 Type of storage + * 0 - reserved + * 1 - bitmap of fgraph_array index + * 2 - reserved data + * + * For type with "bitmap of fgraph_array index" (FGRAPH_TYPE_BITMAP): + * bits: 12 - 27 The bitmap of fgraph_ops fgraph_array index + * That is, it's a bitmask of 0-15 (16 bits) + * where if a corresponding ops in the fgraph_array[] + * expects a callback from the return of the function + * it's corresponding bit will be set. + * + * + * The top of the ret_stack (when not empty) will always have a reference + * word that points to the last fgraph frame that was saved. + * + * For reserved data: + * bits: 12 - 17 The size in words that is stored + * bits: 18 - 23 The index of fgraph_array, which shows who is stored + * + * That is, at the end of function_graph_enter, if the first and forth + * fgraph_ops on the fgraph_array[] (index 0 and 3) needs their retfunc called + * on the return of the function being traced, and the forth fgraph_ops + * stored two words of data, this is what will be on the task's shadow + * ret_stack: (the stack grows upward) + * + * ret_stack[SHADOW_STACK_OFFSET] + * | SHADOW_STACK_TASK_VARS(ret_stack)[15] | + * ... + * | SHADOW_STACK_TASK_VARS(ret_stack)[0] | + * ret_stack[SHADOW_STACK_MAX_OFFSET] + * ... + * | | <- task->curr_ret_stack + * +--------------------------------------------+ + * | (3 << 12) | (3 << 10) | FGRAPH_FRAME_OFFSET| + * | *or put another way* | + * | (3 << FGRAPH_DATA_INDEX_SHIFT)| \ | This is for fgraph_ops[3]. + * | ((2 - 1) << FGRAPH_DATA_SHIFT)| \ | The data size is 2 words. + * | (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT)| \ | + * | (offset2:FGRAPH_FRAME_OFFSET+3) | <- the offset2 is from here + * +--------------------------------------------+ ( It is 4 words from the ret_stack) + * | STORED DATA WORD 2 | + * | STORED DATA WORD 1 | + * +--------------------------------------------+ + * | (9 << 12) | (1 << 10) | FGRAPH_FRAME_OFFSET| + * | *or put another way* | + * | (BIT(3)|BIT(0)) << FGRAPH_INDEX_SHIFT | \ | + * | FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT| \ | + * | (offset1:FGRAPH_FRAME_OFFSET) | <- the offset1 is from here + * +--------------------------------------------+ + * | struct ftrace_ret_stack | + * | (stores the saved ret pointer) | <- the offset points here + * +--------------------------------------------+ + * | (X) | (N) | ( N words away from + * | | previous ret_stack) + * ... + * ret_stack[0] + * + * If a backtrace is required, and the real return pointer needs to be + * fetched, then it looks at the task's curr_ret_stack offset, if it + * is greater than zero (reserved, or right before popped), it would mask + * the value by FGRAPH_FRAME_OFFSET_MASK to get the offset of the + * ftrace_ret_stack structure stored on the shadow stack. + */ + +/* + * The following is for the top word on the stack: + * + * FGRAPH_FRAME_OFFSET (0-9) holds the offset delta to the fgraph frame + * FGRAPH_TYPE (10-11) holds the type of word this is. + * (RESERVED or BITMAP) + */ +#define FGRAPH_FRAME_OFFSET_BITS 10 +#define FGRAPH_FRAME_OFFSET_MASK GENMASK(FGRAPH_FRAME_OFFSET_BITS - 1, 0) + +#define FGRAPH_TYPE_BITS 2 +#define FGRAPH_TYPE_MASK GENMASK(FGRAPH_TYPE_BITS - 1, 0) +#define FGRAPH_TYPE_SHIFT FGRAPH_FRAME_OFFSET_BITS + +enum { + FGRAPH_TYPE_RESERVED = 0, + FGRAPH_TYPE_BITMAP = 1, + FGRAPH_TYPE_DATA = 2, +}; + +/* + * For BITMAP type: + * FGRAPH_INDEX (12-27) bits holding the gops index wanting return callback called + */ +#define FGRAPH_INDEX_BITS 16 +#define FGRAPH_INDEX_MASK GENMASK(FGRAPH_INDEX_BITS - 1, 0) +#define FGRAPH_INDEX_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS) + +/* + * For DATA type: + * FGRAPH_DATA (12-17) bits hold the size of data (in words) + * FGRAPH_INDEX (18-23) bits hold the index for which gops->idx the data is for + * + * Note: + * data_size == 0 means 1 word, and 31 (=2^5 - 1) means 32 words. + */ +#define FGRAPH_DATA_BITS 5 +#define FGRAPH_DATA_MASK GENMASK(FGRAPH_DATA_BITS - 1, 0) +#define FGRAPH_DATA_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS) +#define FGRAPH_MAX_DATA_SIZE (sizeof(long) * (1 << FGRAPH_DATA_BITS)) + +#define FGRAPH_DATA_INDEX_BITS 4 +#define FGRAPH_DATA_INDEX_MASK GENMASK(FGRAPH_DATA_INDEX_BITS - 1, 0) +#define FGRAPH_DATA_INDEX_SHIFT (FGRAPH_DATA_SHIFT + FGRAPH_DATA_BITS) + +#define FGRAPH_MAX_INDEX \ + ((FGRAPH_INDEX_SIZE << FGRAPH_DATA_BITS) + FGRAPH_RET_INDEX) + +#define FGRAPH_ARRAY_SIZE FGRAPH_INDEX_BITS + +/* + * SHADOW_STACK_SIZE: The size in bytes of the entire shadow stack + * SHADOW_STACK_OFFSET: The size in long words of the shadow stack + * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added + */ +#define SHADOW_STACK_SIZE (PAGE_SIZE) +#define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long)) +/* Leave on a buffer at the end */ +#define SHADOW_STACK_MAX_OFFSET \ + (SHADOW_STACK_OFFSET - (FGRAPH_FRAME_OFFSET + 1 + FGRAPH_ARRAY_SIZE)) + +/* RET_STACK(): Return the frame from a given @offset from task @t */ +#define RET_STACK(t, offset) ((struct ftrace_ret_stack *)(&(t)->ret_stack[offset])) + +/* + * Each fgraph_ops has a reservered unsigned long at the end (top) of the + * ret_stack to store task specific state. + */ +#define SHADOW_STACK_TASK_VARS(ret_stack) \ + ((unsigned long *)(&(ret_stack)[SHADOW_STACK_OFFSET - FGRAPH_ARRAY_SIZE])) DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); int ftrace_graph_active; +static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE]; +static unsigned long fgraph_array_bitmask; + +/* LRU index table for fgraph_array */ +static int fgraph_lru_table[FGRAPH_ARRAY_SIZE]; +static int fgraph_lru_next; +static int fgraph_lru_last; + +/* Initialize fgraph_lru_table with unused index */ +static void fgraph_lru_init(void) +{ + int i; + + for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) + fgraph_lru_table[i] = i; +} + +/* Release the used index to the LRU table */ +static int fgraph_lru_release_index(int idx) +{ + if (idx < 0 || idx >= FGRAPH_ARRAY_SIZE || + WARN_ON_ONCE(fgraph_lru_table[fgraph_lru_last] != -1)) + return -1; + + fgraph_lru_table[fgraph_lru_last] = idx; + fgraph_lru_last = (fgraph_lru_last + 1) % FGRAPH_ARRAY_SIZE; + + clear_bit(idx, &fgraph_array_bitmask); + return 0; +} + +/* Allocate a new index from LRU table */ +static int fgraph_lru_alloc_index(void) +{ + int idx = fgraph_lru_table[fgraph_lru_next]; + + /* No id is available */ + if (idx == -1) + return -1; + + fgraph_lru_table[fgraph_lru_next] = -1; + fgraph_lru_next = (fgraph_lru_next + 1) % FGRAPH_ARRAY_SIZE; + + set_bit(idx, &fgraph_array_bitmask); + return idx; +} + +/* Get the offset to the fgraph frame from a ret_stack value */ +static inline int __get_offset(unsigned long val) +{ + return val & FGRAPH_FRAME_OFFSET_MASK; +} + +/* Get the type of word from a ret_stack value */ +static inline int __get_type(unsigned long val) +{ + return (val >> FGRAPH_TYPE_SHIFT) & FGRAPH_TYPE_MASK; +} + +/* Get the data_index for a DATA type ret_stack word */ +static inline int __get_data_index(unsigned long val) +{ + return (val >> FGRAPH_DATA_INDEX_SHIFT) & FGRAPH_DATA_INDEX_MASK; +} + +/* Get the data_size for a DATA type ret_stack word */ +static inline int __get_data_size(unsigned long val) +{ + return ((val >> FGRAPH_DATA_SHIFT) & FGRAPH_DATA_MASK) + 1; +} + +/* Get the word from the ret_stack at @offset */ +static inline unsigned long get_fgraph_entry(struct task_struct *t, int offset) +{ + return t->ret_stack[offset]; +} + +/* Get the FRAME_OFFSET from the word from the @offset on ret_stack */ +static inline int get_frame_offset(struct task_struct *t, int offset) +{ + return __get_offset(t->ret_stack[offset]); +} + +/* For BITMAP type: get the bitmask from the @offset at ret_stack */ +static inline unsigned long +get_bitmap_bits(struct task_struct *t, int offset) +{ + return (t->ret_stack[offset] >> FGRAPH_INDEX_SHIFT) & FGRAPH_INDEX_MASK; +} + +/* Write the bitmap to the ret_stack at @offset (does index, offset and bitmask) */ +static inline void +set_bitmap(struct task_struct *t, int offset, unsigned long bitmap) +{ + t->ret_stack[offset] = (bitmap << FGRAPH_INDEX_SHIFT) | + (FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET; +} + +/* For DATA type: get the data saved under the ret_stack word at @offset */ +static inline void *get_data_type_data(struct task_struct *t, int offset) +{ + unsigned long val = t->ret_stack[offset]; + + if (__get_type(val) != FGRAPH_TYPE_DATA) + return NULL; + offset -= __get_data_size(val); + return (void *)&t->ret_stack[offset]; +} + +/* Create the ret_stack word for a DATA type */ +static inline unsigned long make_data_type_val(int idx, int size, int offset) +{ + return (idx << FGRAPH_DATA_INDEX_SHIFT) | + ((size - 1) << FGRAPH_DATA_SHIFT) | + (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT) | offset; +} + +/* ftrace_graph_entry set to this to tell some archs to run function graph */ +static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops) +{ + return 0; +} + +/* ftrace_graph_return set to this to tell some archs to run function graph */ +static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops) +{ +} + +static void ret_stack_set_task_var(struct task_struct *t, int idx, long val) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack); + + gvals[idx] = val; +} + +static unsigned long * +ret_stack_get_task_var(struct task_struct *t, int idx) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack); + + return &gvals[idx]; +} + +static void ret_stack_init_task_vars(unsigned long *ret_stack) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(ret_stack); + + memset(gvals, 0, sizeof(*gvals) * FGRAPH_ARRAY_SIZE); +} + +/** + * fgraph_reserve_data - Reserve storage on the task's ret_stack + * @idx: The index of fgraph_array + * @size_bytes: The size in bytes to reserve + * + * Reserves space of up to FGRAPH_MAX_DATA_SIZE bytes on the + * task's ret_stack shadow stack, for a given fgraph_ops during + * the entryfunc() call. If entryfunc() returns zero, the storage + * is discarded. An entryfunc() can only call this once per iteration. + * The fgraph_ops retfunc() can retrieve this stored data with + * fgraph_retrieve_data(). + * + * Returns: On success, a pointer to the data on the stack. + * Otherwise, NULL if there's not enough space left on the + * ret_stack for the data, or if fgraph_reserve_data() was called + * more than once for a single entryfunc() call. + */ +void *fgraph_reserve_data(int idx, int size_bytes) +{ + unsigned long val; + void *data; + int curr_ret_stack = current->curr_ret_stack; + int data_size; + + if (size_bytes > FGRAPH_MAX_DATA_SIZE) + return NULL; + + /* Convert the data size to number of longs. */ + data_size = (size_bytes + sizeof(long) - 1) >> (sizeof(long) == 4 ? 2 : 3); + + val = get_fgraph_entry(current, curr_ret_stack - 1); + data = ¤t->ret_stack[curr_ret_stack]; + + curr_ret_stack += data_size + 1; + if (unlikely(curr_ret_stack >= SHADOW_STACK_MAX_OFFSET)) + return NULL; + + val = make_data_type_val(idx, data_size, __get_offset(val) + data_size + 1); + + /* Set the last word to be reserved */ + current->ret_stack[curr_ret_stack - 1] = val; + + /* Make sure interrupts see this */ + barrier(); + current->curr_ret_stack = curr_ret_stack; + /* Again sync with interrupts, and reset reserve */ + current->ret_stack[curr_ret_stack - 1] = val; + + return data; +} + +/** + * fgraph_retrieve_data - Retrieve stored data from fgraph_reserve_data() + * @idx: the index of fgraph_array (fgraph_ops::idx) + * @size_bytes: pointer to retrieved data size. + * + * This is to be called by a fgraph_ops retfunc(), to retrieve data that + * was stored by the fgraph_ops entryfunc() on the function entry. + * That is, this will retrieve the data that was reserved on the + * entry of the function that corresponds to the exit of the function + * that the fgraph_ops retfunc() is called on. + * + * Returns: The stored data from fgraph_reserve_data() called by the + * matching entryfunc() for the retfunc() this is called from. + * Or NULL if there was nothing stored. + */ +void *fgraph_retrieve_data(int idx, int *size_bytes) +{ + int offset = current->curr_ret_stack - 1; + unsigned long val; + + val = get_fgraph_entry(current, offset); + while (__get_type(val) == FGRAPH_TYPE_DATA) { + if (__get_data_index(val) == idx) + goto found; + offset -= __get_data_size(val) + 1; + val = get_fgraph_entry(current, offset); + } + return NULL; +found: + if (size_bytes) + *size_bytes = __get_data_size(val) * sizeof(long); + return get_data_type_data(current, offset); +} + +/** + * fgraph_get_task_var - retrieve a task specific state variable + * @gops: The ftrace_ops that owns the task specific variable + * + * Every registered fgraph_ops has a task state variable + * reserved on the task's ret_stack. This function returns the + * address to that variable. + * + * Returns the address to the fgraph_ops @gops tasks specific + * unsigned long variable. + */ +unsigned long *fgraph_get_task_var(struct fgraph_ops *gops) +{ + return ret_stack_get_task_var(current, gops->idx); +} + +/* + * @offset: The offset into @t->ret_stack to find the ret_stack entry + * @frame_offset: Where to place the offset into @t->ret_stack of that entry + * + * Returns a pointer to the previous ret_stack below @offset or NULL + * when it reaches the bottom of the stack. + * + * Calling this with: + * + * offset = task->curr_ret_stack; + * do { + * ret_stack = get_ret_stack(task, offset, &offset); + * } while (ret_stack); + * + * Will iterate through all the ret_stack entries from curr_ret_stack + * down to the first one. + */ +static inline struct ftrace_ret_stack * +get_ret_stack(struct task_struct *t, int offset, int *frame_offset) +{ + int offs; + + BUILD_BUG_ON(FGRAPH_FRAME_SIZE % sizeof(long)); + + if (unlikely(offset <= 0)) + return NULL; + + offs = get_frame_offset(t, --offset); + if (WARN_ON_ONCE(offs <= 0 || offs > offset)) + return NULL; + + offset -= offs; + + *frame_offset = offset; + return RET_STACK(t, offset); +} + /* Both enabled by default (can be cleared by function_graph tracer flags */ static bool fgraph_sleep_time = true; @@ -51,6 +483,27 @@ int __weak ftrace_disable_ftrace_graph_caller(void) } #endif +int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) +{ + return 0; +} + +static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) +{ +} + +static struct fgraph_ops fgraph_stub = { + .entryfunc = ftrace_graph_entry_stub, + .retfunc = ftrace_graph_ret_stub, +}; + +static struct fgraph_ops *fgraph_direct_gops = &fgraph_stub; +DEFINE_STATIC_CALL(fgraph_func, ftrace_graph_entry_stub); +DEFINE_STATIC_CALL(fgraph_retfunc, ftrace_graph_ret_stub); +static DEFINE_STATIC_KEY_TRUE(fgraph_do_direct); + /** * ftrace_graph_stop - set to permanently disable function graph tracing * @@ -67,10 +520,13 @@ void ftrace_graph_stop(void) /* Add a function return address to the trace stack on thread info.*/ static int ftrace_push_return_trace(unsigned long ret, unsigned long func, - unsigned long frame_pointer, unsigned long *retp) + unsigned long frame_pointer, unsigned long *retp, + int fgraph_idx) { + struct ftrace_ret_stack *ret_stack; unsigned long long calltime; - int index; + unsigned long val; + int offset; if (unlikely(ftrace_graph_is_dead())) return -EBUSY; @@ -78,32 +534,67 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, if (!current->ret_stack) return -EBUSY; + BUILD_BUG_ON(SHADOW_STACK_SIZE % sizeof(long)); + + /* Set val to "reserved" with the delta to the new fgraph frame */ + val = (FGRAPH_TYPE_RESERVED << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET; + /* * We must make sure the ret_stack is tested before we read * anything else. */ smp_rmb(); - /* The return trace stack is full */ - if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + /* + * Check if there's room on the shadow stack to fit a fraph frame + * and a bitmap word. + */ + if (current->curr_ret_stack + FGRAPH_FRAME_OFFSET + 1 >= SHADOW_STACK_MAX_OFFSET) { atomic_inc(¤t->trace_overrun); return -EBUSY; } calltime = trace_clock_local(); - index = ++current->curr_ret_stack; + offset = READ_ONCE(current->curr_ret_stack); + ret_stack = RET_STACK(current, offset); + offset += FGRAPH_FRAME_OFFSET; + + /* ret offset = FGRAPH_FRAME_OFFSET ; type = reserved */ + current->ret_stack[offset] = val; + ret_stack->ret = ret; + /* + * The unwinders expect curr_ret_stack to point to either zero + * or an offset where to find the next ret_stack. Even though the + * ret stack might be bogus, we want to write the ret and the + * offset to find the ret_stack before we increment the stack point. + * If an interrupt comes in now before we increment the curr_ret_stack + * it may blow away what we wrote. But that's fine, because the + * offset will still be correct (even though the 'ret' won't be). + * What we worry about is the offset being correct after we increment + * the curr_ret_stack and before we update that offset, as if an + * interrupt comes in and does an unwind stack dump, it will need + * at least a correct offset! + */ + barrier(); + WRITE_ONCE(current->curr_ret_stack, offset + 1); + /* + * This next barrier is to ensure that an interrupt coming in + * will not corrupt what we are about to write. + */ barrier(); - current->ret_stack[index].ret = ret; - current->ret_stack[index].func = func; - current->ret_stack[index].calltime = calltime; + + /* Still keep it reserved even if an interrupt came in */ + current->ret_stack[offset] = val; + + ret_stack->ret = ret; + ret_stack->func = func; + ret_stack->calltime = calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - current->ret_stack[index].fp = frame_pointer; -#endif -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - current->ret_stack[index].retp = retp; + ret_stack->fp = frame_pointer; #endif - return 0; + ret_stack->retp = retp; + return offset; } /* @@ -120,44 +611,85 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, # define MCOUNT_INSN_SIZE 0 #endif +/* If the caller does not use ftrace, call this function. */ int function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { struct ftrace_graph_ent trace; + unsigned long bitmap = 0; + int offset; + int i; trace.func = func; trace.depth = ++current->curr_ret_depth; - if (ftrace_push_return_trace(ret, func, frame_pointer, retp)) + offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0); + if (offset < 0) goto out; - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) +#ifdef CONFIG_HAVE_STATIC_CALL + if (static_branch_likely(&fgraph_do_direct)) { + int save_curr_ret_stack = current->curr_ret_stack; + + if (static_call(fgraph_func)(&trace, fgraph_direct_gops)) + bitmap |= BIT(fgraph_direct_gops->idx); + else + /* Clear out any saved storage */ + current->curr_ret_stack = save_curr_ret_stack; + } else +#endif + { + for_each_set_bit(i, &fgraph_array_bitmask, + sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) { + struct fgraph_ops *gops = READ_ONCE(fgraph_array[i]); + int save_curr_ret_stack; + + if (gops == &fgraph_stub) + continue; + + save_curr_ret_stack = current->curr_ret_stack; + if (ftrace_ops_test(&gops->ops, func, NULL) && + gops->entryfunc(&trace, gops)) + bitmap |= BIT(i); + else + /* Clear out any saved storage */ + current->curr_ret_stack = save_curr_ret_stack; + } + } + + if (!bitmap) goto out_ret; + /* + * Since this function uses fgraph_idx = 0 as a tail-call checking + * flag, set that bit always. + */ + set_bitmap(current, offset, bitmap | BIT(0)); + return 0; out_ret: - current->curr_ret_stack--; + current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1; out: current->curr_ret_depth--; return -EBUSY; } /* Retrieve a function return address to the trace stack on thread info.*/ -static void +static struct ftrace_ret_stack * ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, - unsigned long frame_pointer) + unsigned long frame_pointer, int *offset) { - int index; + struct ftrace_ret_stack *ret_stack; - index = current->curr_ret_stack; + ret_stack = get_ret_stack(current, current->curr_ret_stack, offset); - if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) { + if (unlikely(!ret_stack)) { ftrace_graph_stop(); - WARN_ON(1); + WARN(1, "Bad function graph ret_stack pointer: %d", + current->curr_ret_stack); /* Might as well panic, otherwise we have no where to go */ *ret = (unsigned long)panic; - return; + return NULL; } #ifdef HAVE_FUNCTION_GRAPH_FP_TEST @@ -175,30 +707,33 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, * Note, -mfentry does not use frame pointers, and this test * is not needed if CC_USING_FENTRY is set. */ - if (unlikely(current->ret_stack[index].fp != frame_pointer)) { + if (unlikely(ret_stack->fp != frame_pointer)) { ftrace_graph_stop(); WARN(1, "Bad frame pointer: expected %lx, received %lx\n" " from func %ps return to %lx\n", - current->ret_stack[index].fp, + ret_stack->fp, frame_pointer, - (void *)current->ret_stack[index].func, - current->ret_stack[index].ret); + (void *)ret_stack->func, + ret_stack->ret); *ret = (unsigned long)panic; - return; + return NULL; } #endif - *ret = current->ret_stack[index].ret; - trace->func = current->ret_stack[index].func; - trace->calltime = current->ret_stack[index].calltime; + *offset += FGRAPH_FRAME_OFFSET; + *ret = ret_stack->ret; + trace->func = ret_stack->func; + trace->calltime = ret_stack->calltime; trace->overrun = atomic_read(¤t->trace_overrun); - trace->depth = current->curr_ret_depth--; + trace->depth = current->curr_ret_depth; /* * We still want to trace interrupts coming in if * max_depth is set to 1. Make sure the decrement is * seen before ftrace_graph_return. */ barrier(); + + return ret_stack; } /* @@ -236,30 +771,55 @@ struct fgraph_ret_regs; static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs, unsigned long frame_pointer) { + struct ftrace_ret_stack *ret_stack; struct ftrace_graph_ret trace; + unsigned long bitmap; unsigned long ret; + int offset; + int i; + + ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &offset); + + if (unlikely(!ret_stack)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic. What else to do? */ + return (unsigned long)panic; + } - ftrace_pop_return_trace(&trace, &ret, frame_pointer); + trace.rettime = trace_clock_local(); #ifdef CONFIG_FUNCTION_GRAPH_RETVAL trace.retval = fgraph_ret_regs_return_value(ret_regs); #endif - trace.rettime = trace_clock_local(); - ftrace_graph_return(&trace); + + bitmap = get_bitmap_bits(current, offset); + +#ifdef CONFIG_HAVE_STATIC_CALL + if (static_branch_likely(&fgraph_do_direct)) { + if (test_bit(fgraph_direct_gops->idx, &bitmap)) + static_call(fgraph_retfunc)(&trace, fgraph_direct_gops); + } else +#endif + { + for_each_set_bit(i, &bitmap, sizeof(bitmap) * BITS_PER_BYTE) { + struct fgraph_ops *gops = fgraph_array[i]; + + if (gops == &fgraph_stub) + continue; + + gops->retfunc(&trace, gops); + } + } + /* * The ftrace_graph_return() may still access the current * ret_stack structure, we need to make sure the update of * curr_ret_stack is after that. */ barrier(); - current->curr_ret_stack--; - - if (unlikely(!ret)) { - ftrace_graph_stop(); - WARN_ON(1); - /* Might as well panic. What else to do? */ - ret = (unsigned long)panic; - } + current->curr_ret_stack = offset - FGRAPH_FRAME_OFFSET; + current->curr_ret_depth--; return ret; } @@ -282,7 +842,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) /** * ftrace_graph_get_ret_stack - return the entry of the shadow stack - * @task: The task to read the shadow stack from + * @task: The task to read the shadow stack from. * @idx: Index down the shadow stack * * Return the ret_struct on the shadow stack of the @task at the @@ -294,104 +854,116 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int idx) { - idx = task->curr_ret_stack - idx; + struct ftrace_ret_stack *ret_stack = NULL; + int offset = task->curr_ret_stack; - if (idx >= 0 && idx <= task->curr_ret_stack) - return &task->ret_stack[idx]; + if (offset < 0) + return NULL; - return NULL; + do { + ret_stack = get_ret_stack(task, offset, &offset); + } while (ret_stack && --idx >= 0); + + return ret_stack; } /** - * ftrace_graph_ret_addr - convert a potentially modified stack return address - * to its original value + * ftrace_graph_ret_addr - return the original value of the return address + * @task: The task the unwinder is being executed on + * @idx: An initialized pointer to the next stack index to use + * @ret: The current return address (likely pointing to return_handler) + * @retp: The address on the stack of the current return location * * This function can be called by stack unwinding code to convert a found stack - * return address ('ret') to its original value, in case the function graph + * return address (@ret) to its original value, in case the function graph * tracer has modified it to be 'return_to_handler'. If the address hasn't - * been modified, the unchanged value of 'ret' is returned. + * been modified, the unchanged value of @ret is returned. * - * 'idx' is a state variable which should be initialized by the caller to zero - * before the first call. + * @idx holds the last index used to know where to start from. It should be + * initialized to zero for the first iteration as that will mean to start + * at the top of the shadow stack. If the location is found, this pointer + * will be assigned that location so that if called again, it will continue + * where it left off. * - * 'retp' is a pointer to the return address on the stack. It's ignored if - * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined. + * @retp is a pointer to the return address on the stack. */ -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp) { - int index = task->curr_ret_stack; - int i; + struct ftrace_ret_stack *ret_stack; + unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler); + int i = task->curr_ret_stack; - if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) + if (ret != return_handler) return ret; - if (index < 0) + if (!idx) return ret; - for (i = 0; i <= index; i++) - if (task->ret_stack[i].retp == retp) - return task->ret_stack[i].ret; + i = *idx ? : task->curr_ret_stack; + while (i > 0) { + ret_stack = get_ret_stack(current, i, &i); + if (!ret_stack) + break; + /* + * For the tail-call, there would be 2 or more ftrace_ret_stacks on + * the ret_stack, which records "return_to_handler" as the return + * address except for the last one. + * But on the real stack, there should be 1 entry because tail-call + * reuses the return address on the stack and jump to the next function. + * Thus we will continue to find real return address. + */ + if (ret_stack->retp == retp && + ret_stack->ret != return_handler) { + *idx = i; + return ret_stack->ret; + } + } return ret; } -#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ -unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, - unsigned long ret, unsigned long *retp) -{ - int task_idx; - - if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) - return ret; - - task_idx = task->curr_ret_stack; - - if (!task->ret_stack || task_idx < *idx) - return ret; - - task_idx -= *idx; - (*idx)++; - - return task->ret_stack[task_idx].ret; -} -#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ static struct ftrace_ops graph_ops = { .func = ftrace_graph_func, - .flags = FTRACE_OPS_FL_INITIALIZED | - FTRACE_OPS_FL_PID | - FTRACE_OPS_GRAPH_STUB, + .flags = FTRACE_OPS_GRAPH_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, /* trampoline_size is only needed for dynamically allocated tramps */ #endif - ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) }; -void ftrace_graph_sleep_time_control(bool enable) +void fgraph_init_ops(struct ftrace_ops *dst_ops, + struct ftrace_ops *src_ops) { - fgraph_sleep_time = enable; + dst_ops->flags = FTRACE_OPS_FL_PID | FTRACE_OPS_GRAPH_STUB; + +#ifdef CONFIG_DYNAMIC_FTRACE + if (src_ops) { + dst_ops->func_hash = &src_ops->local_hash; + mutex_init(&dst_ops->local_hash.regex_lock); + INIT_LIST_HEAD(&dst_ops->subop_list); + dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED; + } +#endif } -int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) +void ftrace_graph_sleep_time_control(bool enable) { - return 0; + fgraph_sleep_time = enable; } /* * Simply points to ftrace_stub, but with the proper protocol. * Defined by the linker script in linux/vmlinux.lds.h */ -extern void ftrace_stub_graph(struct ftrace_graph_ret *); +void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); /* The callbacks that hook a function */ trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph; trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; -static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ -static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) +static int alloc_retstack_tasklist(unsigned long **ret_stack_list) { int i; int ret = 0; @@ -399,10 +971,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) struct task_struct *g, *t; for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { - ret_stack_list[i] = - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack_list[i] = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list[i]) { start = 0; end = i; @@ -420,9 +989,10 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) if (t->ret_stack == NULL) { atomic_set(&t->trace_overrun, 0); - t->curr_ret_stack = -1; + ret_stack_init_task_vars(ret_stack_list[start]); + t->curr_ret_stack = 0; t->curr_ret_depth = -1; - /* Make sure the tasks see the -1 first: */ + /* Make sure the tasks see the 0 first: */ smp_wmb(); t->ret_stack = ret_stack_list[start++]; } @@ -442,8 +1012,9 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, struct task_struct *next, unsigned int prev_state) { + struct ftrace_ret_stack *ret_stack; unsigned long long timestamp; - int index; + int offset; /* * Does the user want to count the time a function was asleep. @@ -466,57 +1037,23 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, */ timestamp -= next->ftrace_timestamp; - for (index = next->curr_ret_stack; index >= 0; index--) - next->ret_stack[index].calltime += timestamp; -} - -static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) -{ - if (!ftrace_ops_test(&global_ops, trace->func, NULL)) - return 0; - return __ftrace_graph_entry(trace); -} - -/* - * The function graph tracer should only trace the functions defined - * by set_ftrace_filter and set_ftrace_notrace. If another function - * tracer ops is registered, the graph tracer requires testing the - * function against the global ops, and not just trace any function - * that any ftrace_ops registered. - */ -void update_function_graph_func(void) -{ - struct ftrace_ops *op; - bool do_test = false; - - /* - * The graph and global ops share the same set of functions - * to test. If any other ops is on the list, then - * the graph tracing needs to test if its the function - * it should call. - */ - do_for_each_ftrace_op(op, ftrace_ops_list) { - if (op != &global_ops && op != &graph_ops && - op != &ftrace_list_end) { - do_test = true; - /* in double loop, break out with goto */ - goto out; - } - } while_for_each_ftrace_op(op); - out: - if (do_test) - ftrace_graph_entry = ftrace_graph_entry_test; - else - ftrace_graph_entry = __ftrace_graph_entry; + for (offset = next->curr_ret_stack; offset > 0; ) { + ret_stack = get_ret_stack(next, offset, &offset); + if (ret_stack) + ret_stack->calltime += timestamp; + } } -static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); +static DEFINE_PER_CPU(unsigned long *, idle_ret_stack); static void -graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) +graph_init_task(struct task_struct *t, unsigned long *ret_stack) { atomic_set(&t->trace_overrun, 0); + ret_stack_init_task_vars(ret_stack); t->ftrace_timestamp = 0; + t->curr_ret_stack = 0; + t->curr_ret_depth = -1; /* make curr_ret_stack visible before we add the ret_stack */ smp_wmb(); t->ret_stack = ret_stack; @@ -528,7 +1065,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) */ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { - t->curr_ret_stack = -1; + t->curr_ret_stack = 0; t->curr_ret_depth = -1; /* * The idle task has no parent, it either has its own @@ -538,14 +1075,11 @@ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu)); if (ftrace_graph_active) { - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; ret_stack = per_cpu(idle_ret_stack, cpu); if (!ret_stack) { - ret_stack = - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack) return; per_cpu(idle_ret_stack, cpu) = ret_stack; @@ -559,15 +1093,13 @@ void ftrace_graph_init_task(struct task_struct *t) { /* Make sure we do not use the parent ret_stack */ t->ret_stack = NULL; - t->curr_ret_stack = -1; + t->curr_ret_stack = 0; t->curr_ret_depth = -1; if (ftrace_graph_active) { - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; - ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack) return; graph_init_task(t, ret_stack); @@ -576,7 +1108,7 @@ void ftrace_graph_init_task(struct task_struct *t) void ftrace_graph_exit_task(struct task_struct *t) { - struct ftrace_ret_stack *ret_stack = t->ret_stack; + unsigned long *ret_stack = t->ret_stack; t->ret_stack = NULL; /* NULL must become visible to IRQs before we free it: */ @@ -585,15 +1117,52 @@ void ftrace_graph_exit_task(struct task_struct *t) kfree(ret_stack); } +#ifdef CONFIG_DYNAMIC_FTRACE +static int fgraph_pid_func(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) +{ + struct trace_array *tr = gops->ops.private; + int pid; + + if (tr) { + pid = this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid); + if (pid == FTRACE_PID_IGNORE) + return 0; + if (pid != FTRACE_PID_TRACE && + pid != current->pid) + return 0; + } + + return gops->saved_func(trace, gops); +} + +void fgraph_update_pid_func(void) +{ + struct fgraph_ops *gops; + struct ftrace_ops *op; + + if (!(graph_ops.flags & FTRACE_OPS_FL_INITIALIZED)) + return; + + list_for_each_entry(op, &graph_ops.subop_list, list) { + if (op->flags & FTRACE_OPS_FL_PID) { + gops = container_of(op, struct fgraph_ops, ops); + gops->entryfunc = ftrace_pids_enabled(op) ? + fgraph_pid_func : gops->saved_func; + if (ftrace_graph_active == 1) + static_call_update(fgraph_func, gops->entryfunc); + } + } +} +#endif + /* Allocate a return stack for each task */ static int start_graph_tracing(void) { - struct ftrace_ret_stack **ret_stack_list; + unsigned long **ret_stack_list; int ret, cpu; - ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE, - sizeof(struct ftrace_ret_stack *), - GFP_KERNEL); + ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; @@ -619,40 +1188,111 @@ static int start_graph_tracing(void) return ret; } +static void init_task_vars(int idx) +{ + struct task_struct *g, *t; + int cpu; + + for_each_online_cpu(cpu) { + if (idle_task(cpu)->ret_stack) + ret_stack_set_task_var(idle_task(cpu), idx, 0); + } + + read_lock(&tasklist_lock); + for_each_process_thread(g, t) { + if (t->ret_stack) + ret_stack_set_task_var(t, idx, 0); + } + read_unlock(&tasklist_lock); +} + +static void ftrace_graph_enable_direct(bool enable_branch) +{ + trace_func_graph_ent_t func = NULL; + trace_func_graph_ret_t retfunc = NULL; + int i; + + for_each_set_bit(i, &fgraph_array_bitmask, + sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) { + func = fgraph_array[i]->entryfunc; + retfunc = fgraph_array[i]->retfunc; + fgraph_direct_gops = fgraph_array[i]; + } + if (WARN_ON_ONCE(!func)) + return; + + static_call_update(fgraph_func, func); + static_call_update(fgraph_retfunc, retfunc); + if (enable_branch) + static_branch_disable(&fgraph_do_direct); +} + +static void ftrace_graph_disable_direct(bool disable_branch) +{ + if (disable_branch) + static_branch_disable(&fgraph_do_direct); + static_call_update(fgraph_func, ftrace_graph_entry_stub); + static_call_update(fgraph_retfunc, ftrace_graph_ret_stub); + fgraph_direct_gops = &fgraph_stub; +} + int register_ftrace_graph(struct fgraph_ops *gops) { + int command = 0; int ret = 0; + int i = -1; mutex_lock(&ftrace_lock); - /* we currently allow only one tracer registered at a time */ - if (ftrace_graph_active) { - ret = -EBUSY; + if (!fgraph_array[0]) { + /* The array must always have real data on it */ + for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) + fgraph_array[i] = &fgraph_stub; + fgraph_lru_init(); + } + + i = fgraph_lru_alloc_index(); + if (i < 0 || WARN_ON_ONCE(fgraph_array[i] != &fgraph_stub)) { + ret = -ENOSPC; goto out; } - register_pm_notifier(&ftrace_suspend_notifier); + fgraph_array[i] = gops; + gops->idx = i; ftrace_graph_active++; - ret = start_graph_tracing(); - if (ret) { - ftrace_graph_active--; - goto out; - } - ftrace_graph_return = gops->retfunc; + if (ftrace_graph_active == 2) + ftrace_graph_disable_direct(true); - /* - * Update the indirect function to the entryfunc, and the - * function that gets called to the entry_test first. Then - * call the update fgraph entry function to determine if - * the entryfunc should be called directly or not. - */ - __ftrace_graph_entry = gops->entryfunc; - ftrace_graph_entry = ftrace_graph_entry_test; - update_function_graph_func(); + if (ftrace_graph_active == 1) { + ftrace_graph_enable_direct(false); + register_pm_notifier(&ftrace_suspend_notifier); + ret = start_graph_tracing(); + if (ret) + goto error; + /* + * Some archs just test to see if these are not + * the default function + */ + ftrace_graph_return = return_run; + ftrace_graph_entry = entry_run; + command = FTRACE_START_FUNC_RET; + } else { + init_task_vars(gops->idx); + } + + /* Always save the function, and reset at unregistering */ + gops->saved_func = gops->entryfunc; - ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET); + ret = ftrace_startup_subops(&graph_ops, &gops->ops, command); +error: + if (ret) { + fgraph_array[i] = &fgraph_stub; + ftrace_graph_active--; + gops->saved_func = NULL; + fgraph_lru_release_index(i); + } out: mutex_unlock(&ftrace_lock); return ret; @@ -660,19 +1300,41 @@ out: void unregister_ftrace_graph(struct fgraph_ops *gops) { + int command = 0; + mutex_lock(&ftrace_lock); if (unlikely(!ftrace_graph_active)) goto out; + if (unlikely(gops->idx < 0 || gops->idx >= FGRAPH_ARRAY_SIZE || + fgraph_array[gops->idx] != gops)) + goto out; + + if (fgraph_lru_release_index(gops->idx) < 0) + goto out; + + fgraph_array[gops->idx] = &fgraph_stub; + ftrace_graph_active--; - ftrace_graph_return = ftrace_stub_graph; - ftrace_graph_entry = ftrace_graph_entry_stub; - __ftrace_graph_entry = ftrace_graph_entry_stub; - ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET); - unregister_pm_notifier(&ftrace_suspend_notifier); - unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); + if (!ftrace_graph_active) + command = FTRACE_STOP_FUNC_RET; + + ftrace_shutdown_subops(&graph_ops, &gops->ops, command); + + if (ftrace_graph_active == 1) + ftrace_graph_enable_direct(true); + else if (!ftrace_graph_active) + ftrace_graph_disable_direct(false); + + if (!ftrace_graph_active) { + ftrace_graph_return = ftrace_stub_graph; + ftrace_graph_entry = ftrace_graph_entry_stub; + unregister_pm_notifier(&ftrace_suspend_notifier); + unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); + } out: + gops->saved_func = NULL; mutex_unlock(&ftrace_lock); } |