summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/configs/hardening.config1
-rw-r--r--kernel/dma/direct.c10
-rw-r--r--kernel/dma/direct.h9
-rw-r--r--kernel/dma/map_benchmark.c16
-rw-r--r--kernel/dma/swiotlb.c68
-rw-r--r--kernel/events/callchain.c43
-rw-r--r--kernel/events/uprobes.c33
-rw-r--r--kernel/locking/lockdep.c9
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/trace/fgraph.c1054
-rw-r--r--kernel/trace/ftrace.c688
-rw-r--r--kernel/trace/ftrace_internal.h18
-rw-r--r--kernel/trace/pid_list.c5
-rw-r--r--kernel/trace/trace.h93
-rw-r--r--kernel/trace/trace_functions.c15
-rw-r--r--kernel/trace/trace_functions_graph.c96
-rw-r--r--kernel/trace/trace_irqsoff.c10
-rw-r--r--kernel/trace/trace_kprobe.c192
-rw-r--r--kernel/trace/trace_osnoise.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c10
-rw-r--r--kernel/trace/trace_selftest.c259
21 files changed, 2148 insertions, 487 deletions
diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config
index 8a7ce7a6b3ab..3fabb8f55ef6 100644
--- a/kernel/configs/hardening.config
+++ b/kernel/configs/hardening.config
@@ -20,6 +20,7 @@ CONFIG_RANDOMIZE_MEMORY=y
# Randomize allocator freelists, harden metadata.
CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y
+CONFIG_SLAB_BUCKETS=y
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
CONFIG_RANDOM_KMALLOC_CACHES=y
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 4d543b1e9d57..4480a3cd92e0 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -404,9 +404,7 @@ void dma_direct_sync_sg_for_device(struct device *dev,
for_each_sg(sgl, sg, nents, i) {
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
- if (unlikely(is_swiotlb_buffer(dev, paddr)))
- swiotlb_sync_single_for_device(dev, paddr, sg->length,
- dir);
+ swiotlb_sync_single_for_device(dev, paddr, sg->length, dir);
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(paddr, sg->length,
@@ -430,9 +428,7 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(paddr, sg->length, dir);
- if (unlikely(is_swiotlb_buffer(dev, paddr)))
- swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
- dir);
+ swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
if (dir == DMA_FROM_DEVICE)
arch_dma_mark_clean(paddr, sg->length);
@@ -640,7 +636,7 @@ size_t dma_direct_max_mapping_size(struct device *dev)
bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
{
return !dev_is_dma_coherent(dev) ||
- is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
+ swiotlb_find_pool(dev, dma_to_phys(dev, dma_addr));
}
/**
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index 18d346118fe8..d2c0b7e632fc 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -58,8 +58,7 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
{
phys_addr_t paddr = dma_to_phys(dev, addr);
- if (unlikely(is_swiotlb_buffer(dev, paddr)))
- swiotlb_sync_single_for_device(dev, paddr, size, dir);
+ swiotlb_sync_single_for_device(dev, paddr, size, dir);
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(paddr, size, dir);
@@ -75,8 +74,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
arch_sync_dma_for_cpu_all();
}
- if (unlikely(is_swiotlb_buffer(dev, paddr)))
- swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
+ swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
if (dir == DMA_FROM_DEVICE)
arch_dma_mark_clean(paddr, size);
@@ -121,8 +119,7 @@ static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
- if (unlikely(is_swiotlb_buffer(dev, phys)))
- swiotlb_tbl_unmap_single(dev, phys, size, dir,
+ swiotlb_tbl_unmap_single(dev, phys, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
}
#endif /* _KERNEL_DMA_DIRECT_H */
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 4950e0b622b1..cc19a3efea89 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -89,6 +89,22 @@ static int map_benchmark_thread(void *data)
atomic64_add(map_sq, &map->sum_sq_map);
atomic64_add(unmap_sq, &map->sum_sq_unmap);
atomic64_inc(&map->loops);
+
+ /*
+ * We may test for a long time so periodically check whether
+ * we need to schedule to avoid starving the others. Otherwise
+ * we may hangup the kernel in a non-preemptible kernel when
+ * the test kthreads number >= CPU number, the test kthreads
+ * will run endless on every CPU since the thread resposible
+ * for notifying the kthread stop (in do_map_benchmark())
+ * could not be scheduled.
+ *
+ * Note this may degrade the test concurrency since the test
+ * threads may need to share the CPU time with other load
+ * in the system. So it's recommended to run this benchmark
+ * on an idle system.
+ */
+ cond_resched();
}
out:
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index fe1ccb53596f..df68d29740a0 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -763,16 +763,18 @@ static void swiotlb_dyn_free(struct rcu_head *rcu)
}
/**
- * swiotlb_find_pool() - find the IO TLB pool for a physical address
+ * __swiotlb_find_pool() - find the IO TLB pool for a physical address
* @dev: Device which has mapped the DMA buffer.
* @paddr: Physical address within the DMA buffer.
*
* Find the IO TLB memory pool descriptor which contains the given physical
- * address, if any.
+ * address, if any. This function is for use only when the dev is known to
+ * be using swiotlb. Use swiotlb_find_pool() for the more general case
+ * when this condition is not met.
*
* Return: Memory pool which contains @paddr, or %NULL if none.
*/
-struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
+struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
struct io_tlb_pool *pool;
@@ -855,9 +857,8 @@ static unsigned int swiotlb_align_offset(struct device *dev,
* Bounce: copy the swiotlb buffer from or back to the original dma location
*/
static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
- enum dma_data_direction dir)
+ enum dma_data_direction dir, struct io_tlb_pool *mem)
{
- struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = mem->slots[index].orig_addr;
size_t alloc_size = mem->slots[index].alloc_size;
@@ -1243,7 +1244,7 @@ found:
* that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy
* atomicity).
*
- * See also the comment in is_swiotlb_buffer().
+ * See also the comment in swiotlb_find_pool().
*/
smp_mb();
@@ -1435,13 +1436,13 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
* hardware behavior. Use of swiotlb is supposed to be transparent,
* i.e. swiotlb must not corrupt memory by clobbering unwritten bytes.
*/
- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool);
return tlb_addr;
}
-static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
+static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr,
+ struct io_tlb_pool *mem)
{
- struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
unsigned long flags;
unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
int index, nslots, aindex;
@@ -1499,17 +1500,16 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
* swiotlb_del_transient() - delete a transient memory pool
* @dev: Device which mapped the buffer.
* @tlb_addr: Physical address within a bounce buffer.
+ * @pool: Pointer to the transient memory pool to be checked and deleted.
*
* Check whether the address belongs to a transient SWIOTLB memory pool.
* If yes, then delete the pool.
*
* Return: %true if @tlb_addr belonged to a transient pool that was released.
*/
-static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr)
+static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr,
+ struct io_tlb_pool *pool)
{
- struct io_tlb_pool *pool;
-
- pool = swiotlb_find_pool(dev, tlb_addr);
if (!pool->transient)
return false;
@@ -1522,7 +1522,7 @@ static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr)
#else /* !CONFIG_SWIOTLB_DYNAMIC */
static inline bool swiotlb_del_transient(struct device *dev,
- phys_addr_t tlb_addr)
+ phys_addr_t tlb_addr, struct io_tlb_pool *pool)
{
return false;
}
@@ -1532,36 +1532,39 @@ static inline bool swiotlb_del_transient(struct device *dev,
/*
* tlb_addr is the physical address of the bounce buffer to unmap.
*/
-void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
- size_t mapping_size, enum dma_data_direction dir,
- unsigned long attrs)
+void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
+ size_t mapping_size, enum dma_data_direction dir,
+ unsigned long attrs, struct io_tlb_pool *pool)
{
/*
* First, sync the memory before unmapping the entry
*/
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, mapping_size,
+ DMA_FROM_DEVICE, pool);
- if (swiotlb_del_transient(dev, tlb_addr))
+ if (swiotlb_del_transient(dev, tlb_addr, pool))
return;
- swiotlb_release_slots(dev, tlb_addr);
+ swiotlb_release_slots(dev, tlb_addr, pool);
}
-void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir)
+void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir,
+ struct io_tlb_pool *pool)
{
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool);
else
BUG_ON(dir != DMA_FROM_DEVICE);
}
-void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir)
+void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir,
+ struct io_tlb_pool *pool)
{
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool);
else
BUG_ON(dir != DMA_TO_DEVICE);
}
@@ -1585,8 +1588,9 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
/* Ensure that the address returned is DMA'ble */
dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
- swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
- attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
+ attrs | DMA_ATTR_SKIP_CPU_SYNC,
+ swiotlb_find_pool(dev, swiotlb_addr));
dev_WARN_ONCE(dev, 1,
"swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
@@ -1764,7 +1768,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
if (unlikely(!PAGE_ALIGNED(tlb_addr))) {
dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n",
&tlb_addr);
- swiotlb_release_slots(dev, tlb_addr);
+ swiotlb_release_slots(dev, tlb_addr, pool);
return NULL;
}
@@ -1774,11 +1778,13 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
bool swiotlb_free(struct device *dev, struct page *page, size_t size)
{
phys_addr_t tlb_addr = page_to_phys(page);
+ struct io_tlb_pool *pool;
- if (!is_swiotlb_buffer(dev, tlb_addr))
+ pool = swiotlb_find_pool(dev, tlb_addr);
+ if (!pool)
return false;
- swiotlb_release_slots(dev, tlb_addr);
+ swiotlb_release_slots(dev, tlb_addr, pool);
return true;
}
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index ad57944b6c40..8d57255e5b29 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -11,6 +11,7 @@
#include <linux/perf_event.h>
#include <linux/slab.h>
#include <linux/sched/task_stack.h>
+#include <linux/uprobes.h>
#include "internal.h"
@@ -176,13 +177,51 @@ put_callchain_entry(int rctx)
put_recursion_context(this_cpu_ptr(callchain_recursion), rctx);
}
+static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entry,
+ int start_entry_idx)
+{
+#ifdef CONFIG_UPROBES
+ struct uprobe_task *utask = current->utask;
+ struct return_instance *ri;
+ __u64 *cur_ip, *last_ip, tramp_addr;
+
+ if (likely(!utask || !utask->return_instances))
+ return;
+
+ cur_ip = &entry->ip[start_entry_idx];
+ last_ip = &entry->ip[entry->nr - 1];
+ ri = utask->return_instances;
+ tramp_addr = uprobe_get_trampoline_vaddr();
+
+ /*
+ * If there are pending uretprobes for the current thread, they are
+ * recorded in a list inside utask->return_instances; each such
+ * pending uretprobe replaces traced user function's return address on
+ * the stack, so when stack trace is captured, instead of seeing
+ * actual function's return address, we'll have one or many uretprobe
+ * trampoline addresses in the stack trace, which are not helpful and
+ * misleading to users.
+ * So here we go over the pending list of uretprobes, and each
+ * encountered trampoline address is replaced with actual return
+ * address.
+ */
+ while (ri && cur_ip <= last_ip) {
+ if (*cur_ip == tramp_addr) {
+ *cur_ip = ri->orig_ret_vaddr;
+ ri = ri->next;
+ }
+ cur_ip++;
+ }
+#endif
+}
+
struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
u32 max_stack, bool crosstask, bool add_mark)
{
struct perf_callchain_entry *entry;
struct perf_callchain_entry_ctx ctx;
- int rctx;
+ int rctx, start_entry_idx;
entry = get_callchain_entry(&rctx);
if (!entry)
@@ -215,7 +254,9 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
if (add_mark)
perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
+ start_entry_idx = entry->nr;
perf_callchain_user(&ctx, regs);
+ fixup_uretprobe_trampoline_entries(entry, start_entry_idx);
}
}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 2c83ba776fc7..99be2adedbc0 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1474,11 +1474,20 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
return ret;
}
+void * __weak arch_uprobe_trampoline(unsigned long *psize)
+{
+ static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
+
+ *psize = UPROBE_SWBP_INSN_SIZE;
+ return &insn;
+}
+
static struct xol_area *__create_xol_area(unsigned long vaddr)
{
struct mm_struct *mm = current->mm;
- uprobe_opcode_t insn = UPROBE_SWBP_INSN;
+ unsigned long insns_size;
struct xol_area *area;
+ void *insns;
area = kmalloc(sizeof(*area), GFP_KERNEL);
if (unlikely(!area))
@@ -1502,7 +1511,8 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
/* Reserve the 1st slot for get_trampoline_vaddr() */
set_bit(0, area->bitmap);
atomic_set(&area->slot_count, 1);
- arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE);
+ insns = arch_uprobe_trampoline(&insns_size);
+ arch_uprobe_copy_ixol(area->pages[0], 0, insns, insns_size);
if (!xol_add_vma(mm, area))
return area;
@@ -1827,7 +1837,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags)
*
* Returns -1 in case the xol_area is not allocated.
*/
-static unsigned long get_trampoline_vaddr(void)
+unsigned long uprobe_get_trampoline_vaddr(void)
{
struct xol_area *area;
unsigned long trampoline_vaddr = -1;
@@ -1878,7 +1888,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
if (!ri)
return;
- trampoline_vaddr = get_trampoline_vaddr();
+ trampoline_vaddr = uprobe_get_trampoline_vaddr();
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
if (orig_ret_vaddr == -1)
goto fail;
@@ -2123,7 +2133,7 @@ static struct return_instance *find_next_ret_chain(struct return_instance *ri)
return ri;
}
-static void handle_trampoline(struct pt_regs *regs)
+void uprobe_handle_trampoline(struct pt_regs *regs)
{
struct uprobe_task *utask;
struct return_instance *ri, *next;
@@ -2149,6 +2159,15 @@ static void handle_trampoline(struct pt_regs *regs)
instruction_pointer_set(regs, ri->orig_ret_vaddr);
do {
+ /* pop current instance from the stack of pending return instances,
+ * as it's not pending anymore: we just fixed up original
+ * instruction pointer in regs and are about to call handlers;
+ * this allows fixup_uretprobe_trampoline_entries() to properly fix up
+ * captured stack traces from uretprobe handlers, in which pending
+ * trampoline addresses on the stack are replaced with correct
+ * original return addresses
+ */
+ utask->return_instances = ri->next;
if (valid)
handle_uretprobe_chain(ri, regs);
ri = free_ret_instance(ri);
@@ -2187,8 +2206,8 @@ static void handle_swbp(struct pt_regs *regs)
int is_swbp;
bp_vaddr = uprobe_get_swbp_addr(regs);
- if (bp_vaddr == get_trampoline_vaddr())
- return handle_trampoline(regs);
+ if (bp_vaddr == uprobe_get_trampoline_vaddr())
+ return uprobe_handle_trampoline(regs);
uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
if (!uprobe) {
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 726b22ce7d0b..58c88220a478 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4917,6 +4917,9 @@ EXPORT_SYMBOL_GPL(lockdep_init_map_type);
struct lock_class_key __lockdep_no_validate__;
EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
+struct lock_class_key __lockdep_no_track__;
+EXPORT_SYMBOL_GPL(__lockdep_no_track__);
+
#ifdef CONFIG_PROVE_LOCKING
void lockdep_set_lock_cmp_fn(struct lockdep_map *lock, lock_cmp_fn cmp_fn,
lock_print_fn print_fn)
@@ -5001,6 +5004,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (unlikely(!debug_locks))
return 0;
+ if (unlikely(lock->key == &__lockdep_no_track__))
+ return 0;
+
if (!prove_locking || lock->key == &__lockdep_no_validate__)
check = 0;
@@ -5763,7 +5769,8 @@ void lock_release(struct lockdep_map *lock, unsigned long ip)
trace_lock_release(lock, ip);
- if (unlikely(!lockdep_enabled()))
+ if (unlikely(!lockdep_enabled() ||
+ lock->key == &__lockdep_no_track__))
return;
raw_local_irq_save(flags);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2ef820a2d067..c00a86931f8c 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -390,3 +390,5 @@ COND_SYSCALL(setuid16);
/* restartable sequence */
COND_SYSCALL(rseq);
+
+COND_SYSCALL(uretprobe);
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index a130b2d898f7..fc205ad167a9 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -7,9 +7,11 @@
*
* Highly modified by Steven Rostedt (VMware).
*/
+#include <linux/bits.h>
#include <linux/jump_label.h>
#include <linux/suspend.h>
#include <linux/ftrace.h>
+#include <linux/static_call.h>
#include <linux/slab.h>
#include <trace/events/sched.h>
@@ -17,17 +19,447 @@
#include "ftrace_internal.h"
#include "trace.h"
-#ifdef CONFIG_DYNAMIC_FTRACE
-#define ASSIGN_OPS_HASH(opsname, val) \
- .func_hash = val, \
- .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
-#else
-#define ASSIGN_OPS_HASH(opsname, val)
-#endif
+/*
+ * FGRAPH_FRAME_SIZE: Size in bytes of the meta data on the shadow stack
+ * FGRAPH_FRAME_OFFSET: Size in long words of the meta data frame
+ */
+#define FGRAPH_FRAME_SIZE sizeof(struct ftrace_ret_stack)
+#define FGRAPH_FRAME_OFFSET DIV_ROUND_UP(FGRAPH_FRAME_SIZE, sizeof(long))
+
+/*
+ * On entry to a function (via function_graph_enter()), a new fgraph frame
+ * (ftrace_ret_stack) is pushed onto the stack as well as a word that
+ * holds a bitmask and a type (called "bitmap"). The bitmap is defined as:
+ *
+ * bits: 0 - 9 offset in words from the previous ftrace_ret_stack
+ *
+ * bits: 10 - 11 Type of storage
+ * 0 - reserved
+ * 1 - bitmap of fgraph_array index
+ * 2 - reserved data
+ *
+ * For type with "bitmap of fgraph_array index" (FGRAPH_TYPE_BITMAP):
+ * bits: 12 - 27 The bitmap of fgraph_ops fgraph_array index
+ * That is, it's a bitmask of 0-15 (16 bits)
+ * where if a corresponding ops in the fgraph_array[]
+ * expects a callback from the return of the function
+ * it's corresponding bit will be set.
+ *
+ *
+ * The top of the ret_stack (when not empty) will always have a reference
+ * word that points to the last fgraph frame that was saved.
+ *
+ * For reserved data:
+ * bits: 12 - 17 The size in words that is stored
+ * bits: 18 - 23 The index of fgraph_array, which shows who is stored
+ *
+ * That is, at the end of function_graph_enter, if the first and forth
+ * fgraph_ops on the fgraph_array[] (index 0 and 3) needs their retfunc called
+ * on the return of the function being traced, and the forth fgraph_ops
+ * stored two words of data, this is what will be on the task's shadow
+ * ret_stack: (the stack grows upward)
+ *
+ * ret_stack[SHADOW_STACK_OFFSET]
+ * | SHADOW_STACK_TASK_VARS(ret_stack)[15] |
+ * ...
+ * | SHADOW_STACK_TASK_VARS(ret_stack)[0] |
+ * ret_stack[SHADOW_STACK_MAX_OFFSET]
+ * ...
+ * | | <- task->curr_ret_stack
+ * +--------------------------------------------+
+ * | (3 << 12) | (3 << 10) | FGRAPH_FRAME_OFFSET|
+ * | *or put another way* |
+ * | (3 << FGRAPH_DATA_INDEX_SHIFT)| \ | This is for fgraph_ops[3].
+ * | ((2 - 1) << FGRAPH_DATA_SHIFT)| \ | The data size is 2 words.
+ * | (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT)| \ |
+ * | (offset2:FGRAPH_FRAME_OFFSET+3) | <- the offset2 is from here
+ * +--------------------------------------------+ ( It is 4 words from the ret_stack)
+ * | STORED DATA WORD 2 |
+ * | STORED DATA WORD 1 |
+ * +--------------------------------------------+
+ * | (9 << 12) | (1 << 10) | FGRAPH_FRAME_OFFSET|
+ * | *or put another way* |
+ * | (BIT(3)|BIT(0)) << FGRAPH_INDEX_SHIFT | \ |
+ * | FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT| \ |
+ * | (offset1:FGRAPH_FRAME_OFFSET) | <- the offset1 is from here
+ * +--------------------------------------------+
+ * | struct ftrace_ret_stack |
+ * | (stores the saved ret pointer) | <- the offset points here
+ * +--------------------------------------------+
+ * | (X) | (N) | ( N words away from
+ * | | previous ret_stack)
+ * ...
+ * ret_stack[0]
+ *
+ * If a backtrace is required, and the real return pointer needs to be
+ * fetched, then it looks at the task's curr_ret_stack offset, if it
+ * is greater than zero (reserved, or right before popped), it would mask
+ * the value by FGRAPH_FRAME_OFFSET_MASK to get the offset of the
+ * ftrace_ret_stack structure stored on the shadow stack.
+ */
+
+/*
+ * The following is for the top word on the stack:
+ *
+ * FGRAPH_FRAME_OFFSET (0-9) holds the offset delta to the fgraph frame
+ * FGRAPH_TYPE (10-11) holds the type of word this is.
+ * (RESERVED or BITMAP)
+ */
+#define FGRAPH_FRAME_OFFSET_BITS 10
+#define FGRAPH_FRAME_OFFSET_MASK GENMASK(FGRAPH_FRAME_OFFSET_BITS - 1, 0)
+
+#define FGRAPH_TYPE_BITS 2
+#define FGRAPH_TYPE_MASK GENMASK(FGRAPH_TYPE_BITS - 1, 0)
+#define FGRAPH_TYPE_SHIFT FGRAPH_FRAME_OFFSET_BITS
+
+enum {
+ FGRAPH_TYPE_RESERVED = 0,
+ FGRAPH_TYPE_BITMAP = 1,
+ FGRAPH_TYPE_DATA = 2,
+};
+
+/*
+ * For BITMAP type:
+ * FGRAPH_INDEX (12-27) bits holding the gops index wanting return callback called
+ */
+#define FGRAPH_INDEX_BITS 16
+#define FGRAPH_INDEX_MASK GENMASK(FGRAPH_INDEX_BITS - 1, 0)
+#define FGRAPH_INDEX_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS)
+
+/*
+ * For DATA type:
+ * FGRAPH_DATA (12-17) bits hold the size of data (in words)
+ * FGRAPH_INDEX (18-23) bits hold the index for which gops->idx the data is for
+ *
+ * Note:
+ * data_size == 0 means 1 word, and 31 (=2^5 - 1) means 32 words.
+ */
+#define FGRAPH_DATA_BITS 5
+#define FGRAPH_DATA_MASK GENMASK(FGRAPH_DATA_BITS - 1, 0)
+#define FGRAPH_DATA_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS)
+#define FGRAPH_MAX_DATA_SIZE (sizeof(long) * (1 << FGRAPH_DATA_BITS))
+
+#define FGRAPH_DATA_INDEX_BITS 4
+#define FGRAPH_DATA_INDEX_MASK GENMASK(FGRAPH_DATA_INDEX_BITS - 1, 0)
+#define FGRAPH_DATA_INDEX_SHIFT (FGRAPH_DATA_SHIFT + FGRAPH_DATA_BITS)
+
+#define FGRAPH_MAX_INDEX \
+ ((FGRAPH_INDEX_SIZE << FGRAPH_DATA_BITS) + FGRAPH_RET_INDEX)
+
+#define FGRAPH_ARRAY_SIZE FGRAPH_INDEX_BITS
+
+/*
+ * SHADOW_STACK_SIZE: The size in bytes of the entire shadow stack
+ * SHADOW_STACK_OFFSET: The size in long words of the shadow stack
+ * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added
+ */
+#define SHADOW_STACK_SIZE (PAGE_SIZE)
+#define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long))
+/* Leave on a buffer at the end */
+#define SHADOW_STACK_MAX_OFFSET \
+ (SHADOW_STACK_OFFSET - (FGRAPH_FRAME_OFFSET + 1 + FGRAPH_ARRAY_SIZE))
+
+/* RET_STACK(): Return the frame from a given @offset from task @t */
+#define RET_STACK(t, offset) ((struct ftrace_ret_stack *)(&(t)->ret_stack[offset]))
+
+/*
+ * Each fgraph_ops has a reservered unsigned long at the end (top) of the
+ * ret_stack to store task specific state.
+ */
+#define SHADOW_STACK_TASK_VARS(ret_stack) \
+ ((unsigned long *)(&(ret_stack)[SHADOW_STACK_OFFSET - FGRAPH_ARRAY_SIZE]))
DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph);
int ftrace_graph_active;
+static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE];
+static unsigned long fgraph_array_bitmask;
+
+/* LRU index table for fgraph_array */
+static int fgraph_lru_table[FGRAPH_ARRAY_SIZE];
+static int fgraph_lru_next;
+static int fgraph_lru_last;
+
+/* Initialize fgraph_lru_table with unused index */
+static void fgraph_lru_init(void)
+{
+ int i;
+
+ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
+ fgraph_lru_table[i] = i;
+}
+
+/* Release the used index to the LRU table */
+static int fgraph_lru_release_index(int idx)
+{
+ if (idx < 0 || idx >= FGRAPH_ARRAY_SIZE ||
+ WARN_ON_ONCE(fgraph_lru_table[fgraph_lru_last] != -1))
+ return -1;
+
+ fgraph_lru_table[fgraph_lru_last] = idx;
+ fgraph_lru_last = (fgraph_lru_last + 1) % FGRAPH_ARRAY_SIZE;
+
+ clear_bit(idx, &fgraph_array_bitmask);
+ return 0;
+}
+
+/* Allocate a new index from LRU table */
+static int fgraph_lru_alloc_index(void)
+{
+ int idx = fgraph_lru_table[fgraph_lru_next];
+
+ /* No id is available */
+ if (idx == -1)
+ return -1;
+
+ fgraph_lru_table[fgraph_lru_next] = -1;
+ fgraph_lru_next = (fgraph_lru_next + 1) % FGRAPH_ARRAY_SIZE;
+
+ set_bit(idx, &fgraph_array_bitmask);
+ return idx;
+}
+
+/* Get the offset to the fgraph frame from a ret_stack value */
+static inline int __get_offset(unsigned long val)
+{
+ return val & FGRAPH_FRAME_OFFSET_MASK;
+}
+
+/* Get the type of word from a ret_stack value */
+static inline int __get_type(unsigned long val)
+{
+ return (val >> FGRAPH_TYPE_SHIFT) & FGRAPH_TYPE_MASK;
+}
+
+/* Get the data_index for a DATA type ret_stack word */
+static inline int __get_data_index(unsigned long val)
+{
+ return (val >> FGRAPH_DATA_INDEX_SHIFT) & FGRAPH_DATA_INDEX_MASK;
+}
+
+/* Get the data_size for a DATA type ret_stack word */
+static inline int __get_data_size(unsigned long val)
+{
+ return ((val >> FGRAPH_DATA_SHIFT) & FGRAPH_DATA_MASK) + 1;
+}
+
+/* Get the word from the ret_stack at @offset */
+static inline unsigned long get_fgraph_entry(struct task_struct *t, int offset)
+{
+ return t->ret_stack[offset];
+}
+
+/* Get the FRAME_OFFSET from the word from the @offset on ret_stack */
+static inline int get_frame_offset(struct task_struct *t, int offset)
+{
+ return __get_offset(t->ret_stack[offset]);
+}
+
+/* For BITMAP type: get the bitmask from the @offset at ret_stack */
+static inline unsigned long
+get_bitmap_bits(struct task_struct *t, int offset)
+{
+ return (t->ret_stack[offset] >> FGRAPH_INDEX_SHIFT) & FGRAPH_INDEX_MASK;
+}
+
+/* Write the bitmap to the ret_stack at @offset (does index, offset and bitmask) */
+static inline void
+set_bitmap(struct task_struct *t, int offset, unsigned long bitmap)
+{
+ t->ret_stack[offset] = (bitmap << FGRAPH_INDEX_SHIFT) |
+ (FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET;
+}
+
+/* For DATA type: get the data saved under the ret_stack word at @offset */
+static inline void *get_data_type_data(struct task_struct *t, int offset)
+{
+ unsigned long val = t->ret_stack[offset];
+
+ if (__get_type(val) != FGRAPH_TYPE_DATA)
+ return NULL;
+ offset -= __get_data_size(val);
+ return (void *)&t->ret_stack[offset];
+}
+
+/* Create the ret_stack word for a DATA type */
+static inline unsigned long make_data_type_val(int idx, int size, int offset)
+{
+ return (idx << FGRAPH_DATA_INDEX_SHIFT) |
+ ((size - 1) << FGRAPH_DATA_SHIFT) |
+ (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT) | offset;
+}
+
+/* ftrace_graph_entry set to this to tell some archs to run function graph */
+static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops)
+{
+ return 0;
+}
+
+/* ftrace_graph_return set to this to tell some archs to run function graph */
+static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops)
+{
+}
+
+static void ret_stack_set_task_var(struct task_struct *t, int idx, long val)
+{
+ unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack);
+
+ gvals[idx] = val;
+}
+
+static unsigned long *
+ret_stack_get_task_var(struct task_struct *t, int idx)
+{
+ unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack);
+
+ return &gvals[idx];
+}
+
+static void ret_stack_init_task_vars(unsigned long *ret_stack)
+{
+ unsigned long *gvals = SHADOW_STACK_TASK_VARS(ret_stack);
+
+ memset(gvals, 0, sizeof(*gvals) * FGRAPH_ARRAY_SIZE);
+}
+
+/**
+ * fgraph_reserve_data - Reserve storage on the task's ret_stack
+ * @idx: The index of fgraph_array
+ * @size_bytes: The size in bytes to reserve
+ *
+ * Reserves space of up to FGRAPH_MAX_DATA_SIZE bytes on the
+ * task's ret_stack shadow stack, for a given fgraph_ops during
+ * the entryfunc() call. If entryfunc() returns zero, the storage
+ * is discarded. An entryfunc() can only call this once per iteration.
+ * The fgraph_ops retfunc() can retrieve this stored data with
+ * fgraph_retrieve_data().
+ *
+ * Returns: On success, a pointer to the data on the stack.
+ * Otherwise, NULL if there's not enough space left on the
+ * ret_stack for the data, or if fgraph_reserve_data() was called
+ * more than once for a single entryfunc() call.
+ */
+void *fgraph_reserve_data(int idx, int size_bytes)
+{
+ unsigned long val;
+ void *data;
+ int curr_ret_stack = current->curr_ret_stack;
+ int data_size;
+
+ if (size_bytes > FGRAPH_MAX_DATA_SIZE)
+ return NULL;
+
+ /* Convert the data size to number of longs. */
+ data_size = (size_bytes + sizeof(long) - 1) >> (sizeof(long) == 4 ? 2 : 3);
+
+ val = get_fgraph_entry(current, curr_ret_stack - 1);
+ data = &current->ret_stack[curr_ret_stack];
+
+ curr_ret_stack += data_size + 1;
+ if (unlikely(curr_ret_stack >= SHADOW_STACK_MAX_OFFSET))
+ return NULL;
+
+ val = make_data_type_val(idx, data_size, __get_offset(val) + data_size + 1);
+
+ /* Set the last word to be reserved */
+ current->ret_stack[curr_ret_stack - 1] = val;
+
+ /* Make sure interrupts see this */
+ barrier();
+ current->curr_ret_stack = curr_ret_stack;
+ /* Again sync with interrupts, and reset reserve */
+ current->ret_stack[curr_ret_stack - 1] = val;
+
+ return data;
+}
+
+/**
+ * fgraph_retrieve_data - Retrieve stored data from fgraph_reserve_data()
+ * @idx: the index of fgraph_array (fgraph_ops::idx)
+ * @size_bytes: pointer to retrieved data size.
+ *
+ * This is to be called by a fgraph_ops retfunc(), to retrieve data that
+ * was stored by the fgraph_ops entryfunc() on the function entry.
+ * That is, this will retrieve the data that was reserved on the
+ * entry of the function that corresponds to the exit of the function
+ * that the fgraph_ops retfunc() is called on.
+ *
+ * Returns: The stored data from fgraph_reserve_data() called by the
+ * matching entryfunc() for the retfunc() this is called from.
+ * Or NULL if there was nothing stored.
+ */
+void *fgraph_retrieve_data(int idx, int *size_bytes)
+{
+ int offset = current->curr_ret_stack - 1;
+ unsigned long val;
+
+ val = get_fgraph_entry(current, offset);
+ while (__get_type(val) == FGRAPH_TYPE_DATA) {
+ if (__get_data_index(val) == idx)
+ goto found;
+ offset -= __get_data_size(val) + 1;
+ val = get_fgraph_entry(current, offset);
+ }
+ return NULL;
+found:
+ if (size_bytes)
+ *size_bytes = __get_data_size(val) * sizeof(long);
+ return get_data_type_data(current, offset);
+}
+
+/**
+ * fgraph_get_task_var - retrieve a task specific state variable
+ * @gops: The ftrace_ops that owns the task specific variable
+ *
+ * Every registered fgraph_ops has a task state variable
+ * reserved on the task's ret_stack. This function returns the
+ * address to that variable.
+ *
+ * Returns the address to the fgraph_ops @gops tasks specific
+ * unsigned long variable.
+ */
+unsigned long *fgraph_get_task_var(struct fgraph_ops *gops)
+{
+ return ret_stack_get_task_var(current, gops->idx);
+}
+
+/*
+ * @offset: The offset into @t->ret_stack to find the ret_stack entry
+ * @frame_offset: Where to place the offset into @t->ret_stack of that entry
+ *
+ * Returns a pointer to the previous ret_stack below @offset or NULL
+ * when it reaches the bottom of the stack.
+ *
+ * Calling this with:
+ *
+ * offset = task->curr_ret_stack;
+ * do {
+ * ret_stack = get_ret_stack(task, offset, &offset);
+ * } while (ret_stack);
+ *
+ * Will iterate through all the ret_stack entries from curr_ret_stack
+ * down to the first one.
+ */
+static inline struct ftrace_ret_stack *
+get_ret_stack(struct task_struct *t, int offset, int *frame_offset)
+{
+ int offs;
+
+ BUILD_BUG_ON(FGRAPH_FRAME_SIZE % sizeof(long));
+
+ if (unlikely(offset <= 0))
+ return NULL;
+
+ offs = get_frame_offset(t, --offset);
+ if (WARN_ON_ONCE(offs <= 0 || offs > offset))
+ return NULL;
+
+ offset -= offs;
+
+ *frame_offset = offset;
+ return RET_STACK(t, offset);
+}
+
/* Both enabled by default (can be cleared by function_graph tracer flags */
static bool fgraph_sleep_time = true;
@@ -51,6 +483,27 @@ int __weak ftrace_disable_ftrace_graph_caller(void)
}
#endif
+int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops)
+{
+ return 0;
+}
+
+static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops)
+{
+}
+
+static struct fgraph_ops fgraph_stub = {
+ .entryfunc = ftrace_graph_entry_stub,
+ .retfunc = ftrace_graph_ret_stub,
+};
+
+static struct fgraph_ops *fgraph_direct_gops = &fgraph_stub;
+DEFINE_STATIC_CALL(fgraph_func, ftrace_graph_entry_stub);
+DEFINE_STATIC_CALL(fgraph_retfunc, ftrace_graph_ret_stub);
+static DEFINE_STATIC_KEY_TRUE(fgraph_do_direct);
+
/**
* ftrace_graph_stop - set to permanently disable function graph tracing
*
@@ -67,10 +520,13 @@ void ftrace_graph_stop(void)
/* Add a function return address to the trace stack on thread info.*/
static int
ftrace_push_return_trace(unsigned long ret, unsigned long func,
- unsigned long frame_pointer, unsigned long *retp)
+ unsigned long frame_pointer, unsigned long *retp,
+ int fgraph_idx)
{
+ struct ftrace_ret_stack *ret_stack;
unsigned long long calltime;
- int index;
+ unsigned long val;
+ int offset;
if (unlikely(ftrace_graph_is_dead()))
return -EBUSY;
@@ -78,32 +534,67 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
if (!current->ret_stack)
return -EBUSY;
+ BUILD_BUG_ON(SHADOW_STACK_SIZE % sizeof(long));
+
+ /* Set val to "reserved" with the delta to the new fgraph frame */
+ val = (FGRAPH_TYPE_RESERVED << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET;
+
/*
* We must make sure the ret_stack is tested before we read
* anything else.
*/
smp_rmb();
- /* The return trace stack is full */
- if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+ /*
+ * Check if there's room on the shadow stack to fit a fraph frame
+ * and a bitmap word.
+ */
+ if (current->curr_ret_stack + FGRAPH_FRAME_OFFSET + 1 >= SHADOW_STACK_MAX_OFFSET) {
atomic_inc(&current->trace_overrun);
return -EBUSY;
}
calltime = trace_clock_local();
- index = ++current->curr_ret_stack;
+ offset = READ_ONCE(current->curr_ret_stack);
+ ret_stack = RET_STACK(current, offset);
+ offset += FGRAPH_FRAME_OFFSET;
+
+ /* ret offset = FGRAPH_FRAME_OFFSET ; type = reserved */
+ current->ret_stack[offset] = val;
+ ret_stack->ret = ret;
+ /*
+ * The unwinders expect curr_ret_stack to point to either zero
+ * or an offset where to find the next ret_stack. Even though the
+ * ret stack might be bogus, we want to write the ret and the
+ * offset to find the ret_stack before we increment the stack point.
+ * If an interrupt comes in now before we increment the curr_ret_stack
+ * it may blow away what we wrote. But that's fine, because the
+ * offset will still be correct (even though the 'ret' won't be).
+ * What we worry about is the offset being correct after we increment
+ * the curr_ret_stack and before we update that offset, as if an
+ * interrupt comes in and does an unwind stack dump, it will need
+ * at least a correct offset!
+ */
+ barrier();
+ WRITE_ONCE(current->curr_ret_stack, offset + 1);
+ /*
+ * This next barrier is to ensure that an interrupt coming in
+ * will not corrupt what we are about to write.
+ */
barrier();
- current->ret_stack[index].ret = ret;
- current->ret_stack[index].func = func;
- current->ret_stack[index].calltime = calltime;
+
+ /* Still keep it reserved even if an interrupt came in */
+ current->ret_stack[offset] = val;
+
+ ret_stack->ret = ret;
+ ret_stack->func = func;
+ ret_stack->calltime = calltime;
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- current->ret_stack[index].fp = frame_pointer;
-#endif
-#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
- current->ret_stack[index].retp = retp;
+ ret_stack->fp = frame_pointer;
#endif
- return 0;
+ ret_stack->retp = retp;
+ return offset;
}
/*
@@ -120,44 +611,85 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
# define MCOUNT_INSN_SIZE 0
#endif
+/* If the caller does not use ftrace, call this function. */
int function_graph_enter(unsigned long ret, unsigned long func,
unsigned long frame_pointer, unsigned long *retp)
{
struct ftrace_graph_ent trace;
+ unsigned long bitmap = 0;
+ int offset;
+ int i;
trace.func = func;
trace.depth = ++current->curr_ret_depth;
- if (ftrace_push_return_trace(ret, func, frame_pointer, retp))
+ offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0);
+ if (offset < 0)
goto out;
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace))
+#ifdef CONFIG_HAVE_STATIC_CALL
+ if (static_branch_likely(&fgraph_do_direct)) {
+ int save_curr_ret_stack = current->curr_ret_stack;
+
+ if (static_call(fgraph_func)(&trace, fgraph_direct_gops))
+ bitmap |= BIT(fgraph_direct_gops->idx);
+ else
+ /* Clear out any saved storage */
+ current->curr_ret_stack = save_curr_ret_stack;
+ } else
+#endif
+ {
+ for_each_set_bit(i, &fgraph_array_bitmask,
+ sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
+ struct fgraph_ops *gops = READ_ONCE(fgraph_array[i]);
+ int save_curr_ret_stack;
+
+ if (gops == &fgraph_stub)
+ continue;
+
+ save_curr_ret_stack = current->curr_ret_stack;
+ if (ftrace_ops_test(&gops->ops, func, NULL) &&
+ gops->entryfunc(&trace, gops))
+ bitmap |= BIT(i);
+ else
+ /* Clear out any saved storage */
+ current->curr_ret_stack = save_curr_ret_stack;
+ }
+ }
+
+ if (!bitmap)
goto out_ret;
+ /*
+ * Since this function uses fgraph_idx = 0 as a tail-call checking
+ * flag, set that bit always.
+ */
+ set_bitmap(current, offset, bitmap | BIT(0));
+
return 0;
out_ret:
- current->curr_ret_stack--;
+ current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1;
out:
current->curr_ret_depth--;
return -EBUSY;
}
/* Retrieve a function return address to the trace stack on thread info.*/
-static void
+static struct ftrace_ret_stack *
ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
- unsigned long frame_pointer)
+ unsigned long frame_pointer, int *offset)
{
- int index;
+ struct ftrace_ret_stack *ret_stack;
- index = current->curr_ret_stack;
+ ret_stack = get_ret_stack(current, current->curr_ret_stack, offset);
- if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
+ if (unlikely(!ret_stack)) {
ftrace_graph_stop();
- WARN_ON(1);
+ WARN(1, "Bad function graph ret_stack pointer: %d",
+ current->curr_ret_stack);
/* Might as well panic, otherwise we have no where to go */
*ret = (unsigned long)panic;
- return;
+ return NULL;
}
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
@@ -175,30 +707,33 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
* Note, -mfentry does not use frame pointers, and this test
* is not needed if CC_USING_FENTRY is set.
*/
- if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
+ if (unlikely(ret_stack->fp != frame_pointer)) {
ftrace_graph_stop();
WARN(1, "Bad frame pointer: expected %lx, received %lx\n"
" from func %ps return to %lx\n",
- current->ret_stack[index].fp,
+ ret_stack->fp,
frame_pointer,
- (void *)current->ret_stack[index].func,
- current->ret_stack[index].ret);
+ (void *)ret_stack->func,
+ ret_stack->ret);
*ret = (unsigned long)panic;
- return;
+ return NULL;
}
#endif
- *ret = current->ret_stack[index].ret;
- trace->func = current->ret_stack[index].func;
- trace->calltime = current->ret_stack[index].calltime;
+ *offset += FGRAPH_FRAME_OFFSET;
+ *ret = ret_stack->ret;
+ trace->func = ret_stack->func;
+ trace->calltime = ret_stack->calltime;
trace->overrun = atomic_read(&current->trace_overrun);
- trace->depth = current->curr_ret_depth--;
+ trace->depth = current->curr_ret_depth;
/*
* We still want to trace interrupts coming in if
* max_depth is set to 1. Make sure the decrement is
* seen before ftrace_graph_return.
*/
barrier();
+
+ return ret_stack;
}
/*
@@ -236,30 +771,55 @@ struct fgraph_ret_regs;
static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs,
unsigned long frame_pointer)
{
+ struct ftrace_ret_stack *ret_stack;
struct ftrace_graph_ret trace;
+ unsigned long bitmap;
unsigned long ret;
+ int offset;
+ int i;
+
+ ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &offset);
+
+ if (unlikely(!ret_stack)) {
+ ftrace_graph_stop();
+ WARN_ON(1);
+ /* Might as well panic. What else to do? */
+ return (unsigned long)panic;
+ }
- ftrace_pop_return_trace(&trace, &ret, frame_pointer);
+ trace.rettime = trace_clock_local();
#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
trace.retval = fgraph_ret_regs_return_value(ret_regs);
#endif
- trace.rettime = trace_clock_local();
- ftrace_graph_return(&trace);
+
+ bitmap = get_bitmap_bits(current, offset);
+
+#ifdef CONFIG_HAVE_STATIC_CALL
+ if (static_branch_likely(&fgraph_do_direct)) {
+ if (test_bit(fgraph_direct_gops->idx, &bitmap))
+ static_call(fgraph_retfunc)(&trace, fgraph_direct_gops);
+ } else
+#endif
+ {
+ for_each_set_bit(i, &bitmap, sizeof(bitmap) * BITS_PER_BYTE) {
+ struct fgraph_ops *gops = fgraph_array[i];
+
+ if (gops == &fgraph_stub)
+ continue;
+
+ gops->retfunc(&trace, gops);
+ }
+ }
+
/*
* The ftrace_graph_return() may still access the current
* ret_stack structure, we need to make sure the update of
* curr_ret_stack is after that.
*/
barrier();
- current->curr_ret_stack--;
-
- if (unlikely(!ret)) {
- ftrace_graph_stop();
- WARN_ON(1);
- /* Might as well panic. What else to do? */
- ret = (unsigned long)panic;
- }
+ current->curr_ret_stack = offset - FGRAPH_FRAME_OFFSET;
+ current->curr_ret_depth--;
return ret;
}
@@ -282,7 +842,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
/**
* ftrace_graph_get_ret_stack - return the entry of the shadow stack
- * @task: The task to read the shadow stack from
+ * @task: The task to read the shadow stack from.
* @idx: Index down the shadow stack
*
* Return the ret_struct on the shadow stack of the @task at the
@@ -294,104 +854,116 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
struct ftrace_ret_stack *
ftrace_graph_get_ret_stack(struct task_struct *task, int idx)
{
- idx = task->curr_ret_stack - idx;
+ struct ftrace_ret_stack *ret_stack = NULL;
+ int offset = task->curr_ret_stack;
- if (idx >= 0 && idx <= task->curr_ret_stack)
- return &task->ret_stack[idx];
+ if (offset < 0)
+ return NULL;
- return NULL;
+ do {
+ ret_stack = get_ret_stack(task, offset, &offset);
+ } while (ret_stack && --idx >= 0);
+
+ return ret_stack;
}
/**
- * ftrace_graph_ret_addr - convert a potentially modified stack return address
- * to its original value
+ * ftrace_graph_ret_addr - return the original value of the return address
+ * @task: The task the unwinder is being executed on
+ * @idx: An initialized pointer to the next stack index to use
+ * @ret: The current return address (likely pointing to return_handler)
+ * @retp: The address on the stack of the current return location
*
* This function can be called by stack unwinding code to convert a found stack
- * return address ('ret') to its original value, in case the function graph
+ * return address (@ret) to its original value, in case the function graph
* tracer has modified it to be 'return_to_handler'. If the address hasn't
- * been modified, the unchanged value of 'ret' is returned.
+ * been modified, the unchanged value of @ret is returned.
*
- * 'idx' is a state variable which should be initialized by the caller to zero
- * before the first call.
+ * @idx holds the last index used to know where to start from. It should be
+ * initialized to zero for the first iteration as that will mean to start
+ * at the top of the shadow stack. If the location is found, this pointer
+ * will be assigned that location so that if called again, it will continue
+ * where it left off.
*
- * 'retp' is a pointer to the return address on the stack. It's ignored if
- * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
+ * @retp is a pointer to the return address on the stack.
*/
-#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
unsigned long ret, unsigned long *retp)
{
- int index = task->curr_ret_stack;
- int i;
+ struct ftrace_ret_stack *ret_stack;
+ unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler);
+ int i = task->curr_ret_stack;
- if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler))
+ if (ret != return_handler)
return ret;
- if (index < 0)
+ if (!idx)
return ret;
- for (i = 0; i <= index; i++)
- if (task->ret_stack[i].retp == retp)
- return task->ret_stack[i].ret;
+ i = *idx ? : task->curr_ret_stack;
+ while (i > 0) {
+ ret_stack = get_ret_stack(current, i, &i);
+ if (!ret_stack)
+ break;
+ /*
+ * For the tail-call, there would be 2 or more ftrace_ret_stacks on
+ * the ret_stack, which records "return_to_handler" as the return
+ * address except for the last one.
+ * But on the real stack, there should be 1 entry because tail-call
+ * reuses the return address on the stack and jump to the next function.
+ * Thus we will continue to find real return address.
+ */
+ if (ret_stack->retp == retp &&
+ ret_stack->ret != return_handler) {
+ *idx = i;
+ return ret_stack->ret;
+ }
+ }
return ret;
}
-#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
-unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
- unsigned long ret, unsigned long *retp)
-{
- int task_idx;
-
- if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler))
- return ret;
-
- task_idx = task->curr_ret_stack;
-
- if (!task->ret_stack || task_idx < *idx)
- return ret;
-
- task_idx -= *idx;
- (*idx)++;
-
- return task->ret_stack[task_idx].ret;
-}
-#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
static struct ftrace_ops graph_ops = {
.func = ftrace_graph_func,
- .flags = FTRACE_OPS_FL_INITIALIZED |
- FTRACE_OPS_FL_PID |
- FTRACE_OPS_GRAPH_STUB,
+ .flags = FTRACE_OPS_GRAPH_STUB,
#ifdef FTRACE_GRAPH_TRAMP_ADDR
.trampoline = FTRACE_GRAPH_TRAMP_ADDR,
/* trampoline_size is only needed for dynamically allocated tramps */
#endif
- ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
};
-void ftrace_graph_sleep_time_control(bool enable)
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+ struct ftrace_ops *src_ops)
{
- fgraph_sleep_time = enable;
+ dst_ops->flags = FTRACE_OPS_FL_PID | FTRACE_OPS_GRAPH_STUB;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ if (src_ops) {
+ dst_ops->func_hash = &src_ops->local_hash;
+ mutex_init(&dst_ops->local_hash.regex_lock);
+ INIT_LIST_HEAD(&dst_ops->subop_list);
+ dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED;
+ }
+#endif
}
-int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
+void ftrace_graph_sleep_time_control(bool enable)
{
- return 0;
+ fgraph_sleep_time = enable;
}
/*
* Simply points to ftrace_stub, but with the proper protocol.
* Defined by the linker script in linux/vmlinux.lds.h
*/
-extern void ftrace_stub_graph(struct ftrace_graph_ret *);
+void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops);
/* The callbacks that hook a function */
trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph;
trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
-static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
-static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
+static int alloc_retstack_tasklist(unsigned long **ret_stack_list)
{
int i;
int ret = 0;
@@ -399,10 +971,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
struct task_struct *g, *t;
for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) {
- ret_stack_list[i] =
- kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
+ ret_stack_list[i] = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
if (!ret_stack_list[i]) {
start = 0;
end = i;
@@ -420,9 +989,10 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
if (t->ret_stack == NULL) {
atomic_set(&t->trace_overrun, 0);
- t->curr_ret_stack = -1;
+ ret_stack_init_task_vars(ret_stack_list[start]);
+ t->curr_ret_stack = 0;
t->curr_ret_depth = -1;
- /* Make sure the tasks see the -1 first: */
+ /* Make sure the tasks see the 0 first: */
smp_wmb();
t->ret_stack = ret_stack_list[start++];
}
@@ -442,8 +1012,9 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
struct task_struct *next,
unsigned int prev_state)
{
+ struct ftrace_ret_stack *ret_stack;
unsigned long long timestamp;
- int index;
+ int offset;
/*
* Does the user want to count the time a function was asleep.
@@ -466,57 +1037,23 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
*/
timestamp -= next->ftrace_timestamp;
- for (index = next->curr_ret_stack; index >= 0; index--)
- next->ret_stack[index].calltime += timestamp;
-}
-
-static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
-{
- if (!ftrace_ops_test(&global_ops, trace->func, NULL))
- return 0;
- return __ftrace_graph_entry(trace);
-}
-
-/*
- * The function graph tracer should only trace the functions defined
- * by set_ftrace_filter and set_ftrace_notrace. If another function
- * tracer ops is registered, the graph tracer requires testing the
- * function against the global ops, and not just trace any function
- * that any ftrace_ops registered.
- */
-void update_function_graph_func(void)
-{
- struct ftrace_ops *op;
- bool do_test = false;
-
- /*
- * The graph and global ops share the same set of functions
- * to test. If any other ops is on the list, then
- * the graph tracing needs to test if its the function
- * it should call.
- */
- do_for_each_ftrace_op(op, ftrace_ops_list) {
- if (op != &global_ops && op != &graph_ops &&
- op != &ftrace_list_end) {
- do_test = true;
- /* in double loop, break out with goto */
- goto out;
- }
- } while_for_each_ftrace_op(op);
- out:
- if (do_test)
- ftrace_graph_entry = ftrace_graph_entry_test;
- else
- ftrace_graph_entry = __ftrace_graph_entry;
+ for (offset = next->curr_ret_stack; offset > 0; ) {
+ ret_stack = get_ret_stack(next, offset, &offset);
+ if (ret_stack)
+ ret_stack->calltime += timestamp;
+ }
}
-static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
+static DEFINE_PER_CPU(unsigned long *, idle_ret_stack);
static void
-graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
+graph_init_task(struct task_struct *t, unsigned long *ret_stack)
{
atomic_set(&t->trace_overrun, 0);
+ ret_stack_init_task_vars(ret_stack);
t->ftrace_timestamp = 0;
+ t->curr_ret_stack = 0;
+ t->curr_ret_depth = -1;
/* make curr_ret_stack visible before we add the ret_stack */
smp_wmb();
t->ret_stack = ret_stack;
@@ -528,7 +1065,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
*/
void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
{
- t->curr_ret_stack = -1;
+ t->curr_ret_stack = 0;
t->curr_ret_depth = -1;
/*
* The idle task has no parent, it either has its own
@@ -538,14 +1075,11 @@ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
if (ftrace_graph_active) {
- struct ftrace_ret_stack *ret_stack;
+ unsigned long *ret_stack;
ret_stack = per_cpu(idle_ret_stack, cpu);
if (!ret_stack) {
- ret_stack =
- kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
+ ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
if (!ret_stack)
return;
per_cpu(idle_ret_stack, cpu) = ret_stack;
@@ -559,15 +1093,13 @@ void ftrace_graph_init_task(struct task_struct *t)
{
/* Make sure we do not use the parent ret_stack */
t->ret_stack = NULL;
- t->curr_ret_stack = -1;
+ t->curr_ret_stack = 0;
t->curr_ret_depth = -1;
if (ftrace_graph_active) {
- struct ftrace_ret_stack *ret_stack;
+ unsigned long *ret_stack;
- ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH,
- sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
+ ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
if (!ret_stack)
return;
graph_init_task(t, ret_stack);
@@ -576,7 +1108,7 @@ void ftrace_graph_init_task(struct task_struct *t)
void ftrace_graph_exit_task(struct task_struct *t)
{
- struct ftrace_ret_stack *ret_stack = t->ret_stack;
+ unsigned long *ret_stack = t->ret_stack;
t->ret_stack = NULL;
/* NULL must become visible to IRQs before we free it: */
@@ -585,15 +1117,52 @@ void ftrace_graph_exit_task(struct task_struct *t)
kfree(ret_stack);
}
+#ifdef CONFIG_DYNAMIC_FTRACE
+static int fgraph_pid_func(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops)
+{
+ struct trace_array *tr = gops->ops.private;
+ int pid;
+
+ if (tr) {
+ pid = this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid);
+ if (pid == FTRACE_PID_IGNORE)
+ return 0;
+ if (pid != FTRACE_PID_TRACE &&
+ pid != current->pid)
+ return 0;
+ }
+
+ return gops->saved_func(trace, gops);
+}
+
+void fgraph_update_pid_func(void)
+{
+ struct fgraph_ops *gops;
+ struct ftrace_ops *op;
+
+ if (!(graph_ops.flags & FTRACE_OPS_FL_INITIALIZED))
+ return;
+
+ list_for_each_entry(op, &graph_ops.subop_list, list) {
+ if (op->flags & FTRACE_OPS_FL_PID) {
+ gops = container_of(op, struct fgraph_ops, ops);
+ gops->entryfunc = ftrace_pids_enabled(op) ?
+ fgraph_pid_func : gops->saved_func;
+ if (ftrace_graph_active == 1)
+ static_call_update(fgraph_func, gops->entryfunc);
+ }
+ }
+}
+#endif
+
/* Allocate a return stack for each task */
static int start_graph_tracing(void)
{
- struct ftrace_ret_stack **ret_stack_list;
+ unsigned long **ret_stack_list;
int ret, cpu;
- ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE,
- sizeof(struct ftrace_ret_stack *),
- GFP_KERNEL);
+ ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL);
if (!ret_stack_list)
return -ENOMEM;
@@ -619,40 +1188,111 @@ static int start_graph_tracing(void)
return ret;
}
+static void init_task_vars(int idx)
+{
+ struct task_struct *g, *t;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (idle_task(cpu)->ret_stack)
+ ret_stack_set_task_var(idle_task(cpu), idx, 0);
+ }
+
+ read_lock(&tasklist_lock);
+ for_each_process_thread(g, t) {
+ if (t->ret_stack)
+ ret_stack_set_task_var(t, idx, 0);
+ }
+ read_unlock(&tasklist_lock);
+}
+
+static void ftrace_graph_enable_direct(bool enable_branch)
+{
+ trace_func_graph_ent_t func = NULL;
+ trace_func_graph_ret_t retfunc = NULL;
+ int i;
+
+ for_each_set_bit(i, &fgraph_array_bitmask,
+ sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
+ func = fgraph_array[i]->entryfunc;
+ retfunc = fgraph_array[i]->retfunc;
+ fgraph_direct_gops = fgraph_array[i];
+ }
+ if (WARN_ON_ONCE(!func))
+ return;
+
+ static_call_update(fgraph_func, func);
+ static_call_update(fgraph_retfunc, retfunc);
+ if (enable_branch)
+ static_branch_disable(&fgraph_do_direct);
+}
+
+static void ftrace_graph_disable_direct(bool disable_branch)
+{
+ if (disable_branch)
+ static_branch_disable(&fgraph_do_direct);
+ static_call_update(fgraph_func, ftrace_graph_entry_stub);
+ static_call_update(fgraph_retfunc, ftrace_graph_ret_stub);
+ fgraph_direct_gops = &fgraph_stub;
+}
+
int register_ftrace_graph(struct fgraph_ops *gops)
{
+ int command = 0;
int ret = 0;
+ int i = -1;
mutex_lock(&ftrace_lock);
- /* we currently allow only one tracer registered at a time */
- if (ftrace_graph_active) {
- ret = -EBUSY;
+ if (!fgraph_array[0]) {
+ /* The array must always have real data on it */
+ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
+ fgraph_array[i] = &fgraph_stub;
+ fgraph_lru_init();
+ }
+
+ i = fgraph_lru_alloc_index();
+ if (i < 0 || WARN_ON_ONCE(fgraph_array[i] != &fgraph_stub)) {
+ ret = -ENOSPC;
goto out;
}
- register_pm_notifier(&ftrace_suspend_notifier);
+ fgraph_array[i] = gops;
+ gops->idx = i;
ftrace_graph_active++;
- ret = start_graph_tracing();
- if (ret) {
- ftrace_graph_active--;
- goto out;
- }
- ftrace_graph_return = gops->retfunc;
+ if (ftrace_graph_active == 2)
+ ftrace_graph_disable_direct(true);
- /*
- * Update the indirect function to the entryfunc, and the
- * function that gets called to the entry_test first. Then
- * call the update fgraph entry function to determine if
- * the entryfunc should be called directly or not.
- */
- __ftrace_graph_entry = gops->entryfunc;
- ftrace_graph_entry = ftrace_graph_entry_test;
- update_function_graph_func();
+ if (ftrace_graph_active == 1) {
+ ftrace_graph_enable_direct(false);
+ register_pm_notifier(&ftrace_suspend_notifier);
+ ret = start_graph_tracing();
+ if (ret)
+ goto error;
+ /*
+ * Some archs just test to see if these are not
+ * the default function
+ */
+ ftrace_graph_return = return_run;
+ ftrace_graph_entry = entry_run;
+ command = FTRACE_START_FUNC_RET;
+ } else {
+ init_task_vars(gops->idx);
+ }
+
+ /* Always save the function, and reset at unregistering */
+ gops->saved_func = gops->entryfunc;
- ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
+ ret = ftrace_startup_subops(&graph_ops, &gops->ops, command);
+error:
+ if (ret) {
+ fgraph_array[i] = &fgraph_stub;
+ ftrace_graph_active--;
+ gops->saved_func = NULL;
+ fgraph_lru_release_index(i);
+ }
out:
mutex_unlock(&ftrace_lock);
return ret;
@@ -660,19 +1300,41 @@ out:
void unregister_ftrace_graph(struct fgraph_ops *gops)
{
+ int command = 0;
+
mutex_lock(&ftrace_lock);
if (unlikely(!ftrace_graph_active))
goto out;
+ if (unlikely(gops->idx < 0 || gops->idx >= FGRAPH_ARRAY_SIZE ||
+ fgraph_array[gops->idx] != gops))
+ goto out;
+
+ if (fgraph_lru_release_index(gops->idx) < 0)
+ goto out;
+
+ fgraph_array[gops->idx] = &fgraph_stub;
+
ftrace_graph_active--;
- ftrace_graph_return = ftrace_stub_graph;
- ftrace_graph_entry = ftrace_graph_entry_stub;
- __ftrace_graph_entry = ftrace_graph_entry_stub;
- ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
- unregister_pm_notifier(&ftrace_suspend_notifier);
- unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
+ if (!ftrace_graph_active)
+ command = FTRACE_STOP_FUNC_RET;
+
+ ftrace_shutdown_subops(&graph_ops, &gops->ops, command);
+
+ if (ftrace_graph_active == 1)
+ ftrace_graph_enable_direct(true);
+ else if (!ftrace_graph_active)
+ ftrace_graph_disable_direct(false);
+
+ if (!ftrace_graph_active) {
+ ftrace_graph_return = ftrace_stub_graph;
+ ftrace_graph_entry = ftrace_graph_entry_stub;
+ unregister_pm_notifier(&ftrace_suspend_notifier);
+ unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
+ }
out:
+ gops->saved_func = NULL;
mutex_unlock(&ftrace_lock);
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eacab4020508..e5d6a4ab433b 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -74,7 +74,8 @@
#ifdef CONFIG_DYNAMIC_FTRACE
#define INIT_OPS_HASH(opsname) \
.func_hash = &opsname.local_hash, \
- .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
+ .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), \
+ .subop_list = LIST_HEAD_INIT(opsname.subop_list),
#else
#define INIT_OPS_HASH(opsname)
#endif
@@ -99,7 +100,7 @@ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
/* What to set function_trace_op to */
static struct ftrace_ops *set_function_trace_op;
-static bool ftrace_pids_enabled(struct ftrace_ops *ops)
+bool ftrace_pids_enabled(struct ftrace_ops *ops)
{
struct trace_array *tr;
@@ -121,7 +122,7 @@ static int ftrace_disabled __read_mostly;
DEFINE_MUTEX(ftrace_lock);
-struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
+struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = (struct ftrace_ops __rcu *)&ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
struct ftrace_ops global_ops;
@@ -161,12 +162,14 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops)
#ifdef CONFIG_DYNAMIC_FTRACE
if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) {
mutex_init(&ops->local_hash.regex_lock);
+ INIT_LIST_HEAD(&ops->subop_list);
ops->func_hash = &ops->local_hash;
ops->flags |= FTRACE_OPS_FL_INITIALIZED;
}
#endif
}
+/* Call this function for when a callback filters on set_ftrace_pid */
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs)
{
@@ -235,8 +238,6 @@ static void update_ftrace_function(void)
func = ftrace_ops_list_func;
}
- update_function_graph_func();
-
/* If there's no change, then do nothing more here */
if (ftrace_trace_function == func)
return;
@@ -310,7 +311,7 @@ static int remove_ftrace_ops(struct ftrace_ops __rcu **list,
lockdep_is_held(&ftrace_lock)) == ops &&
rcu_dereference_protected(ops->next,
lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
- *list = &ftrace_list_end;
+ rcu_assign_pointer(*list, &ftrace_list_end);
return 0;
}
@@ -406,6 +407,8 @@ static void ftrace_update_pid_func(void)
}
} while_for_each_ftrace_op(op);
+ fgraph_update_pid_func();
+
update_ftrace_function();
}
@@ -817,7 +820,8 @@ void ftrace_graph_graph_time_control(bool enable)
fgraph_graph_time = enable;
}
-static int profile_graph_entry(struct ftrace_graph_ent *trace)
+static int profile_graph_entry(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops)
{
struct ftrace_ret_stack *ret_stack;
@@ -834,7 +838,8 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace)
return 1;
}
-static void profile_graph_return(struct ftrace_graph_ret *trace)
+static void profile_graph_return(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops)
{
struct ftrace_ret_stack *ret_stack;
struct ftrace_profile_stat *stat;
@@ -1314,7 +1319,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
return hash;
}
-
+/* Used to save filters on functions for modules not loaded yet */
static int ftrace_add_mod(struct trace_array *tr,
const char *func, const char *module,
int enable)
@@ -1380,15 +1385,17 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
return NULL;
}
-static void
-ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, int filter_hash);
-static void
-ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash);
+static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops);
+static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops);
static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
struct ftrace_hash *new_hash);
-static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size)
+/*
+ * Allocate a new hash and remove entries from @src and move them to the new hash.
+ * On success, the @src hash will be empty and should be freed.
+ */
+static struct ftrace_hash *__move_hash(struct ftrace_hash *src, int size)
{
struct ftrace_func_entry *entry;
struct ftrace_hash *new_hash;
@@ -1424,6 +1431,7 @@ static struct ftrace_hash *dup_hash(struct ftrace_hash *src, int size)
return new_hash;
}
+/* Move the @src entries to a newly allocated hash */
static struct ftrace_hash *
__ftrace_hash_move(struct ftrace_hash *src)
{
@@ -1435,9 +1443,29 @@ __ftrace_hash_move(struct ftrace_hash *src)
if (ftrace_hash_empty(src))
return EMPTY_HASH;
- return dup_hash(src, size);
+ return __move_hash(src, size);
}
+/**
+ * ftrace_hash_move - move a new hash to a filter and do updates
+ * @ops: The ops with the hash that @dst points to
+ * @enable: True if for the filter hash, false for the notrace hash
+ * @dst: Points to the @ops hash that should be updated
+ * @src: The hash to update @dst with
+ *
+ * This is called when an ftrace_ops hash is being updated and the
+ * the kernel needs to reflect this. Note, this only updates the kernel
+ * function callbacks if the @ops is enabled (not to be confused with
+ * @enable above). If the @ops is enabled, its hash determines what
+ * callbacks get called. This function gets called when the @ops hash
+ * is updated and it requires new callbacks.
+ *
+ * On success the elements of @src is moved to @dst, and @dst is updated
+ * properly, as well as the functions determined by the @ops hashes
+ * are now calling the @ops callback function.
+ *
+ * Regardless of return type, @src should be freed with free_ftrace_hash().
+ */
static int
ftrace_hash_move(struct ftrace_ops *ops, int enable,
struct ftrace_hash **dst, struct ftrace_hash *src)
@@ -1467,11 +1495,11 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
* Remove the current set, update the hash and add
* them back.
*/
- ftrace_hash_rec_disable_modify(ops, enable);
+ ftrace_hash_rec_disable_modify(ops);
rcu_assign_pointer(*dst, new_hash);
- ftrace_hash_rec_enable_modify(ops, enable);
+ ftrace_hash_rec_enable_modify(ops);
return 0;
}
@@ -1694,12 +1722,21 @@ static bool skip_record(struct dyn_ftrace *rec)
!(rec->flags & FTRACE_FL_ENABLED);
}
+/*
+ * This is the main engine to the ftrace updates to the dyn_ftrace records.
+ *
+ * It will iterate through all the available ftrace functions
+ * (the ones that ftrace can have callbacks to) and set the flags
+ * in the associated dyn_ftrace records.
+ *
+ * @inc: If true, the functions associated to @ops are added to
+ * the dyn_ftrace records, otherwise they are removed.
+ */
static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
- int filter_hash,
bool inc)
{
struct ftrace_hash *hash;
- struct ftrace_hash *other_hash;
+ struct ftrace_hash *notrace_hash;
struct ftrace_page *pg;
struct dyn_ftrace *rec;
bool update = false;
@@ -1711,35 +1748,16 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
return false;
/*
- * In the filter_hash case:
* If the count is zero, we update all records.
* Otherwise we just update the items in the hash.
- *
- * In the notrace_hash case:
- * We enable the update in the hash.
- * As disabling notrace means enabling the tracing,
- * and enabling notrace means disabling, the inc variable
- * gets inversed.
*/
- if (filter_hash) {
- hash = ops->func_hash->filter_hash;
- other_hash = ops->func_hash->notrace_hash;
- if (ftrace_hash_empty(hash))
- all = true;
- } else {
- inc = !inc;
- hash = ops->func_hash->notrace_hash;
- other_hash = ops->func_hash->filter_hash;
- /*
- * If the notrace hash has no items,
- * then there's nothing to do.
- */
- if (ftrace_hash_empty(hash))
- return false;
- }
+ hash = ops->func_hash->filter_hash;
+ notrace_hash = ops->func_hash->notrace_hash;
+ if (ftrace_hash_empty(hash))
+ all = true;
do_for_each_ftrace_rec(pg, rec) {
- int in_other_hash = 0;
+ int in_notrace_hash = 0;
int in_hash = 0;
int match = 0;
@@ -1751,26 +1769,17 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
* Only the filter_hash affects all records.
* Update if the record is not in the notrace hash.
*/
- if (!other_hash || !ftrace_lookup_ip(other_hash, rec->ip))
+ if (!notrace_hash || !ftrace_lookup_ip(notrace_hash, rec->ip))
match = 1;
} else {
in_hash = !!ftrace_lookup_ip(hash, rec->ip);
- in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
+ in_notrace_hash = !!ftrace_lookup_ip(notrace_hash, rec->ip);
/*
- * If filter_hash is set, we want to match all functions
- * that are in the hash but not in the other hash.
- *
- * If filter_hash is not set, then we are decrementing.
- * That means we match anything that is in the hash
- * and also in the other_hash. That is, we need to turn
- * off functions in the other hash because they are disabled
- * by this hash.
+ * We want to match all functions that are in the hash but
+ * not in the other hash.
*/
- if (filter_hash && in_hash && !in_other_hash)
- match = 1;
- else if (!filter_hash && in_hash &&
- (in_other_hash || ftrace_hash_empty(other_hash)))
+ if (in_hash && !in_notrace_hash)
match = 1;
}
if (!match)
@@ -1876,24 +1885,48 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
return update;
}
-static bool ftrace_hash_rec_disable(struct ftrace_ops *ops,
- int filter_hash)
+/*
+ * This is called when an ops is removed from tracing. It will decrement
+ * the counters of the dyn_ftrace records for all the functions that
+ * the @ops attached to.
+ */
+static bool ftrace_hash_rec_disable(struct ftrace_ops *ops)
{
- return __ftrace_hash_rec_update(ops, filter_hash, 0);
+ return __ftrace_hash_rec_update(ops, false);
}
-static bool ftrace_hash_rec_enable(struct ftrace_ops *ops,
- int filter_hash)
+/*
+ * This is called when an ops is added to tracing. It will increment
+ * the counters of the dyn_ftrace records for all the functions that
+ * the @ops attached to.
+ */
+static bool ftrace_hash_rec_enable(struct ftrace_ops *ops)
{
- return __ftrace_hash_rec_update(ops, filter_hash, 1);
+ return __ftrace_hash_rec_update(ops, true);
}
-static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops,
- int filter_hash, int inc)
+/*
+ * This function will update what functions @ops traces when its filter
+ * changes.
+ *
+ * The @inc states if the @ops callbacks are going to be added or removed.
+ * When one of the @ops hashes are updated to a "new_hash" the dyn_ftrace
+ * records are update via:
+ *
+ * ftrace_hash_rec_disable_modify(ops);
+ * ops->hash = new_hash
+ * ftrace_hash_rec_enable_modify(ops);
+ *
+ * Where the @ops is removed from all the records it is tracing using
+ * its old hash. The @ops hash is updated to the new hash, and then
+ * the @ops is added back to the records so that it is tracing all
+ * the new functions.
+ */
+static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops, bool inc)
{
struct ftrace_ops *op;
- __ftrace_hash_rec_update(ops, filter_hash, inc);
+ __ftrace_hash_rec_update(ops, inc);
if (ops->func_hash != &global_ops.local_hash)
return;
@@ -1907,20 +1940,18 @@ static void ftrace_hash_rec_update_modify(struct ftrace_ops *ops,
if (op == ops)
continue;
if (op->func_hash == &global_ops.local_hash)
- __ftrace_hash_rec_update(op, filter_hash, inc);
+ __ftrace_hash_rec_update(op, inc);
} while_for_each_ftrace_op(op);
}
-static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops,
- int filter_hash)
+static void ftrace_hash_rec_disable_modify(struct ftrace_ops *ops)
{
- ftrace_hash_rec_update_modify(ops, filter_hash, 0);
+ ftrace_hash_rec_update_modify(ops, false);
}
-static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
- int filter_hash)
+static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops)
{
- ftrace_hash_rec_update_modify(ops, filter_hash, 1);
+ ftrace_hash_rec_update_modify(ops, true);
}
/*
@@ -3043,7 +3074,7 @@ int ftrace_startup(struct ftrace_ops *ops, int command)
return ret;
}
- if (ftrace_hash_rec_enable(ops, 1))
+ if (ftrace_hash_rec_enable(ops))
command |= FTRACE_UPDATE_CALLS;
ftrace_startup_enable(command);
@@ -3085,7 +3116,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command)
/* Disabling ipmodify never fails */
ftrace_hash_ipmodify_disable(ops);
- if (ftrace_hash_rec_disable(ops, 1))
+ if (ftrace_hash_rec_disable(ops))
command |= FTRACE_UPDATE_CALLS;
ops->flags &= ~FTRACE_OPS_FL_ENABLED;
@@ -3164,6 +3195,474 @@ out:
return 0;
}
+/* Simply make a copy of @src and return it */
+static struct ftrace_hash *copy_hash(struct ftrace_hash *src)
+{
+ if (ftrace_hash_empty(src))
+ return EMPTY_HASH;
+
+ return alloc_and_copy_ftrace_hash(src->size_bits, src);
+}
+
+/*
+ * Append @new_hash entries to @hash:
+ *
+ * If @hash is the EMPTY_HASH then it traces all functions and nothing
+ * needs to be done.
+ *
+ * If @new_hash is the EMPTY_HASH, then make *hash the EMPTY_HASH so
+ * that it traces everything.
+ *
+ * Otherwise, go through all of @new_hash and add anything that @hash
+ * doesn't already have, to @hash.
+ *
+ * The filter_hash updates uses just the append_hash() function
+ * and the notrace_hash does not.
+ */
+static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash)
+{
+ struct ftrace_func_entry *entry;
+ int size;
+ int i;
+
+ /* An empty hash does everything */
+ if (ftrace_hash_empty(*hash))
+ return 0;
+
+ /* If new_hash has everything make hash have everything */
+ if (ftrace_hash_empty(new_hash)) {
+ free_ftrace_hash(*hash);
+ *hash = EMPTY_HASH;
+ return 0;
+ }
+
+ size = 1 << new_hash->size_bits;
+ for (i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &new_hash->buckets[i], hlist) {
+ /* Only add if not already in hash */
+ if (!__ftrace_lookup_ip(*hash, entry->ip) &&
+ add_hash_entry(*hash, entry->ip) == NULL)
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Add to @hash only those that are in both @new_hash1 and @new_hash2
+ *
+ * The notrace_hash updates uses just the intersect_hash() function
+ * and the filter_hash does not.
+ */
+static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash1,
+ struct ftrace_hash *new_hash2)
+{
+ struct ftrace_func_entry *entry;
+ int size;
+ int i;
+
+ /*
+ * If new_hash1 or new_hash2 is the EMPTY_HASH then make the hash
+ * empty as well as empty for notrace means none are notraced.
+ */
+ if (ftrace_hash_empty(new_hash1) || ftrace_hash_empty(new_hash2)) {
+ free_ftrace_hash(*hash);
+ *hash = EMPTY_HASH;
+ return 0;
+ }
+
+ size = 1 << new_hash1->size_bits;
+ for (i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &new_hash1->buckets[i], hlist) {
+ /* Only add if in both @new_hash1 and @new_hash2 */
+ if (__ftrace_lookup_ip(new_hash2, entry->ip) &&
+ add_hash_entry(*hash, entry->ip) == NULL)
+ return -ENOMEM;
+ }
+ }
+ /* If nothing intersects, make it the empty set */
+ if (ftrace_hash_empty(*hash)) {
+ free_ftrace_hash(*hash);
+ *hash = EMPTY_HASH;
+ }
+ return 0;
+}
+
+/* Return a new hash that has a union of all @ops->filter_hash entries */
+static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
+{
+ struct ftrace_hash *new_hash;
+ struct ftrace_ops *subops;
+ int ret;
+
+ new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits);
+ if (!new_hash)
+ return NULL;
+
+ list_for_each_entry(subops, &ops->subop_list, list) {
+ ret = append_hash(&new_hash, subops->func_hash->filter_hash);
+ if (ret < 0) {
+ free_ftrace_hash(new_hash);
+ return NULL;
+ }
+ /* Nothing more to do if new_hash is empty */
+ if (ftrace_hash_empty(new_hash))
+ break;
+ }
+ return new_hash;
+}
+
+/* Make @ops trace evenything except what all its subops do not trace */
+static struct ftrace_hash *intersect_hashes(struct ftrace_ops *ops)
+{
+ struct ftrace_hash *new_hash = NULL;
+ struct ftrace_ops *subops;
+ int size_bits;
+ int ret;
+
+ list_for_each_entry(subops, &ops->subop_list, list) {
+ struct ftrace_hash *next_hash;
+
+ if (!new_hash) {
+ size_bits = subops->func_hash->notrace_hash->size_bits;
+ new_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->notrace_hash);
+ if (!new_hash)
+ return NULL;
+ continue;
+ }
+ size_bits = new_hash->size_bits;
+ next_hash = new_hash;
+ new_hash = alloc_ftrace_hash(size_bits);
+ ret = intersect_hash(&new_hash, next_hash, subops->func_hash->notrace_hash);
+ free_ftrace_hash(next_hash);
+ if (ret < 0) {
+ free_ftrace_hash(new_hash);
+ return NULL;
+ }
+ /* Nothing more to do if new_hash is empty */
+ if (ftrace_hash_empty(new_hash))
+ break;
+ }
+ return new_hash;
+}
+
+static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B)
+{
+ struct ftrace_func_entry *entry;
+ int size;
+ int i;
+
+ if (ftrace_hash_empty(A))
+ return ftrace_hash_empty(B);
+
+ if (ftrace_hash_empty(B))
+ return ftrace_hash_empty(A);
+
+ if (A->count != B->count)
+ return false;
+
+ size = 1 << A->size_bits;
+ for (i = 0; i < size; i++) {
+ hlist_for_each_entry(entry, &A->buckets[i], hlist) {
+ if (!__ftrace_lookup_ip(B, entry->ip))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void ftrace_ops_update_code(struct ftrace_ops *ops,
+ struct ftrace_ops_hash *old_hash);
+
+static int __ftrace_hash_move_and_update_ops(struct ftrace_ops *ops,
+ struct ftrace_hash **orig_hash,
+ struct ftrace_hash *hash,
+ int enable)
+{
+ struct ftrace_ops_hash old_hash_ops;
+ struct ftrace_hash *old_hash;
+ int ret;
+
+ old_hash = *orig_hash;
+ old_hash_ops.filter_hash = ops->func_hash->filter_hash;
+ old_hash_ops.notrace_hash = ops->func_hash->notrace_hash;
+ ret = ftrace_hash_move(ops, enable, orig_hash, hash);
+ if (!ret) {
+ ftrace_ops_update_code(ops, &old_hash_ops);
+ free_ftrace_hash_rcu(old_hash);
+ }
+ return ret;
+}
+
+static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_hash,
+ struct ftrace_hash *notrace_hash)
+{
+ int ret;
+
+ if (!ops_equal(filter_hash, ops->func_hash->filter_hash)) {
+ ret = __ftrace_hash_move_and_update_ops(ops, &ops->func_hash->filter_hash,
+ filter_hash, 1);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (!ops_equal(notrace_hash, ops->func_hash->notrace_hash)) {
+ ret = __ftrace_hash_move_and_update_ops(ops, &ops->func_hash->notrace_hash,
+ notrace_hash, 0);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * ftrace_startup_subops - enable tracing for subops of an ops
+ * @ops: Manager ops (used to pick all the functions of its subops)
+ * @subops: A new ops to add to @ops
+ * @command: Extra commands to use to enable tracing
+ *
+ * The @ops is a manager @ops that has the filter that includes all the functions
+ * that its list of subops are tracing. Adding a new @subops will add the
+ * functions of @subops to @ops.
+ */
+int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command)
+{
+ struct ftrace_hash *filter_hash;
+ struct ftrace_hash *notrace_hash;
+ struct ftrace_hash *save_filter_hash;
+ struct ftrace_hash *save_notrace_hash;
+ int size_bits;
+ int ret;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ ftrace_ops_init(ops);
+ ftrace_ops_init(subops);
+
+ if (WARN_ON_ONCE(subops->flags & FTRACE_OPS_FL_ENABLED))
+ return -EBUSY;
+
+ /* Make everything canonical (Just in case!) */
+ if (!ops->func_hash->filter_hash)
+ ops->func_hash->filter_hash = EMPTY_HASH;
+ if (!ops->func_hash->notrace_hash)
+ ops->func_hash->notrace_hash = EMPTY_HASH;
+ if (!subops->func_hash->filter_hash)
+ subops->func_hash->filter_hash = EMPTY_HASH;
+ if (!subops->func_hash->notrace_hash)
+ subops->func_hash->notrace_hash = EMPTY_HASH;
+
+ /* For the first subops to ops just enable it normally */
+ if (list_empty(&ops->subop_list)) {
+ /* Just use the subops hashes */
+ filter_hash = copy_hash(subops->func_hash->filter_hash);
+ notrace_hash = copy_hash(subops->func_hash->notrace_hash);
+ if (!filter_hash || !notrace_hash) {
+ free_ftrace_hash(filter_hash);
+ free_ftrace_hash(notrace_hash);
+ return -ENOMEM;
+ }
+
+ save_filter_hash = ops->func_hash->filter_hash;
+ save_notrace_hash = ops->func_hash->notrace_hash;
+
+ ops->func_hash->filter_hash = filter_hash;
+ ops->func_hash->notrace_hash = notrace_hash;
+ list_add(&subops->list, &ops->subop_list);
+ ret = ftrace_startup(ops, command);
+ if (ret < 0) {
+ list_del(&subops->list);
+ ops->func_hash->filter_hash = save_filter_hash;
+ ops->func_hash->notrace_hash = save_notrace_hash;
+ free_ftrace_hash(filter_hash);
+ free_ftrace_hash(notrace_hash);
+ } else {
+ free_ftrace_hash(save_filter_hash);
+ free_ftrace_hash(save_notrace_hash);
+ subops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP;
+ subops->managed = ops;
+ }
+ return ret;
+ }
+
+ /*
+ * Here there's already something attached. Here are the rules:
+ * o If either filter_hash is empty then the final stays empty
+ * o Otherwise, the final is a superset of both hashes
+ * o If either notrace_hash is empty then the final stays empty
+ * o Otherwise, the final is an intersection between the hashes
+ */
+ if (ftrace_hash_empty(ops->func_hash->filter_hash) ||
+ ftrace_hash_empty(subops->func_hash->filter_hash)) {
+ filter_hash = EMPTY_HASH;
+ } else {
+ size_bits = max(ops->func_hash->filter_hash->size_bits,
+ subops->func_hash->filter_hash->size_bits);
+ filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash);
+ if (!filter_hash)
+ return -ENOMEM;
+ ret = append_hash(&filter_hash, subops->func_hash->filter_hash);
+ if (ret < 0) {
+ free_ftrace_hash(filter_hash);
+ return ret;
+ }
+ }
+
+ if (ftrace_hash_empty(ops->func_hash->notrace_hash) ||
+ ftrace_hash_empty(subops->func_hash->notrace_hash)) {
+ notrace_hash = EMPTY_HASH;
+ } else {
+ size_bits = max(ops->func_hash->filter_hash->size_bits,
+ subops->func_hash->filter_hash->size_bits);
+ notrace_hash = alloc_ftrace_hash(size_bits);
+ if (!notrace_hash) {
+ free_ftrace_hash(filter_hash);
+ return -ENOMEM;
+ }
+
+ ret = intersect_hash(&notrace_hash, ops->func_hash->filter_hash,
+ subops->func_hash->filter_hash);
+ if (ret < 0) {
+ free_ftrace_hash(filter_hash);
+ free_ftrace_hash(notrace_hash);
+ return ret;
+ }
+ }
+
+ list_add(&subops->list, &ops->subop_list);
+
+ ret = ftrace_update_ops(ops, filter_hash, notrace_hash);
+ free_ftrace_hash(filter_hash);
+ free_ftrace_hash(notrace_hash);
+ if (ret < 0) {
+ list_del(&subops->list);
+ } else {
+ subops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP;
+ subops->managed = ops;
+ }
+ return ret;
+}
+
+/**
+ * ftrace_shutdown_subops - Remove a subops from a manager ops
+ * @ops: A manager ops to remove @subops from
+ * @subops: The subops to remove from @ops
+ * @command: Any extra command flags to add to modifying the text
+ *
+ * Removes the functions being traced by the @subops from @ops. Note, it
+ * will not affect functions that are being traced by other subops that
+ * still exist in @ops.
+ *
+ * If the last subops is removed from @ops, then @ops is shutdown normally.
+ */
+int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command)
+{
+ struct ftrace_hash *filter_hash;
+ struct ftrace_hash *notrace_hash;
+ int ret;
+
+ if (unlikely(ftrace_disabled))
+ return -ENODEV;
+
+ if (WARN_ON_ONCE(!(subops->flags & FTRACE_OPS_FL_ENABLED)))
+ return -EINVAL;
+
+ list_del(&subops->list);
+
+ if (list_empty(&ops->subop_list)) {
+ /* Last one, just disable the current ops */
+
+ ret = ftrace_shutdown(ops, command);
+ if (ret < 0) {
+ list_add(&subops->list, &ops->subop_list);
+ return ret;
+ }
+
+ subops->flags &= ~FTRACE_OPS_FL_ENABLED;
+
+ free_ftrace_hash(ops->func_hash->filter_hash);
+ free_ftrace_hash(ops->func_hash->notrace_hash);
+ ops->func_hash->filter_hash = EMPTY_HASH;
+ ops->func_hash->notrace_hash = EMPTY_HASH;
+ subops->flags &= ~(FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP);
+ subops->managed = NULL;
+
+ return 0;
+ }
+
+ /* Rebuild the hashes without subops */
+ filter_hash = append_hashes(ops);
+ notrace_hash = intersect_hashes(ops);
+ if (!filter_hash || !notrace_hash) {
+ free_ftrace_hash(filter_hash);
+ free_ftrace_hash(notrace_hash);
+ list_add(&subops->list, &ops->subop_list);
+ return -ENOMEM;
+ }
+
+ ret = ftrace_update_ops(ops, filter_hash, notrace_hash);
+ if (ret < 0) {
+ list_add(&subops->list, &ops->subop_list);
+ } else {
+ subops->flags &= ~(FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP);
+ subops->managed = NULL;
+ }
+ free_ftrace_hash(filter_hash);
+ free_ftrace_hash(notrace_hash);
+ return ret;
+}
+
+static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops,
+ struct ftrace_hash **orig_subhash,
+ struct ftrace_hash *hash,
+ int enable)
+{
+ struct ftrace_ops *ops = subops->managed;
+ struct ftrace_hash **orig_hash;
+ struct ftrace_hash *save_hash;
+ struct ftrace_hash *new_hash;
+ int ret;
+
+ /* Manager ops can not be subops (yet) */
+ if (WARN_ON_ONCE(!ops || ops->flags & FTRACE_OPS_FL_SUBOP))
+ return -EINVAL;
+
+ /* Move the new hash over to the subops hash */
+ save_hash = *orig_subhash;
+ *orig_subhash = __ftrace_hash_move(hash);
+ if (!*orig_subhash) {
+ *orig_subhash = save_hash;
+ return -ENOMEM;
+ }
+
+ /* Create a new_hash to hold the ops new functions */
+ if (enable) {
+ orig_hash = &ops->func_hash->filter_hash;
+ new_hash = append_hashes(ops);
+ } else {
+ orig_hash = &ops->func_hash->notrace_hash;
+ new_hash = intersect_hashes(ops);
+ }
+
+ /* Move the hash over to the new hash */
+ ret = __ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable);
+
+ free_ftrace_hash(new_hash);
+
+ if (ret) {
+ /* Put back the original hash */
+ free_ftrace_hash_rcu(*orig_subhash);
+ *orig_subhash = save_hash;
+ } else {
+ free_ftrace_hash_rcu(save_hash);
+ }
+ return ret;
+}
+
+
static u64 ftrace_update_time;
unsigned long ftrace_update_tot_cnt;
unsigned long ftrace_number_of_pages;
@@ -4380,19 +4879,33 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops,
struct ftrace_hash *hash,
int enable)
{
- struct ftrace_ops_hash old_hash_ops;
- struct ftrace_hash *old_hash;
- int ret;
+ if (ops->flags & FTRACE_OPS_FL_SUBOP)
+ return ftrace_hash_move_and_update_subops(ops, orig_hash, hash, enable);
- old_hash = *orig_hash;
- old_hash_ops.filter_hash = ops->func_hash->filter_hash;
- old_hash_ops.notrace_hash = ops->func_hash->notrace_hash;
- ret = ftrace_hash_move(ops, enable, orig_hash, hash);
- if (!ret) {
- ftrace_ops_update_code(ops, &old_hash_ops);
- free_ftrace_hash_rcu(old_hash);
+ /*
+ * If this ops is not enabled, it could be sharing its filters
+ * with a subop. If that's the case, update the subop instead of
+ * this ops. Shared filters are only allowed to have one ops set
+ * at a time, and if we update the ops that is not enabled,
+ * it will not affect subops that share it.
+ */
+ if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) {
+ struct ftrace_ops *op;
+
+ /* Check if any other manager subops maps to this hash */
+ do_for_each_ftrace_op(op, ftrace_ops_list) {
+ struct ftrace_ops *subops;
+
+ list_for_each_entry(subops, &op->subop_list, list) {
+ if ((subops->flags & FTRACE_OPS_FL_ENABLED) &&
+ subops->func_hash == ops->func_hash) {
+ return ftrace_hash_move_and_update_subops(subops, orig_hash, hash, enable);
+ }
+ }
+ } while_for_each_ftrace_op(op);
}
- return ret;
+
+ return __ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable);
}
static bool module_exists(const char *module)
@@ -5475,6 +5988,8 @@ EXPORT_SYMBOL_GPL(register_ftrace_direct);
* unregister_ftrace_direct - Remove calls to custom trampoline
* previously registered by register_ftrace_direct for @ops object.
* @ops: The address of the struct ftrace_ops object
+ * @addr: The address of the direct function that is called by the @ops functions
+ * @free_filters: Set to true to remove all filters for the ftrace_ops, false otherwise
*
* This is used to remove a direct calls to @addr from the nop locations
* of the functions registered in @ops (with by ftrace_set_filter_ip
@@ -7324,6 +7839,7 @@ __init void ftrace_init_global_array_ops(struct trace_array *tr)
tr->ops = &global_ops;
tr->ops->private = tr;
ftrace_init_trace_array(tr);
+ init_array_fgraph_ops(tr, tr->ops);
}
void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h
index 5012c04f92c0..3235470e61b3 100644
--- a/kernel/trace/ftrace_internal.h
+++ b/kernel/trace/ftrace_internal.h
@@ -15,6 +15,8 @@ extern struct ftrace_ops global_ops;
int ftrace_startup(struct ftrace_ops *ops, int command);
int ftrace_shutdown(struct ftrace_ops *ops, int command);
int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs);
+int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command);
+int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command);
#else /* !CONFIG_DYNAMIC_FTRACE */
@@ -38,14 +40,26 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
{
return 1;
}
+static inline int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command)
+{
+ return -EINVAL;
+}
+static inline int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
extern int ftrace_graph_active;
-void update_function_graph_func(void);
+# ifdef CONFIG_DYNAMIC_FTRACE
+extern void fgraph_update_pid_func(void);
+# else
+static inline void fgraph_update_pid_func(void) {}
+# endif
#else /* !CONFIG_FUNCTION_GRAPH_TRACER */
# define ftrace_graph_active 0
-static inline void update_function_graph_func(void) { }
+static inline void fgraph_update_pid_func(void) {}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
#else /* !CONFIG_FUNCTION_TRACER */
diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c
index 95106d02b32d..4966e6bbdf6f 100644
--- a/kernel/trace/pid_list.c
+++ b/kernel/trace/pid_list.c
@@ -354,7 +354,7 @@ static void pid_list_refill_irq(struct irq_work *iwork)
while (upper_count-- > 0) {
union upper_chunk *chunk;
- chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+ chunk = kzalloc(sizeof(*chunk), GFP_NOWAIT);
if (!chunk)
break;
*upper_next = chunk;
@@ -365,7 +365,7 @@ static void pid_list_refill_irq(struct irq_work *iwork)
while (lower_count-- > 0) {
union lower_chunk *chunk;
- chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+ chunk = kzalloc(sizeof(*chunk), GFP_NOWAIT);
if (!chunk)
break;
*lower_next = chunk;
@@ -451,6 +451,7 @@ struct trace_pid_list *trace_pid_list_alloc(void)
/**
* trace_pid_list_free - Frees an allocated pid_list.
+ * @pid_list: The pid list to free.
*
* Frees the memory for a pid_list that was allocated.
*/
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 749a182dab48..8783bebd0562 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -397,6 +397,9 @@ struct trace_array {
struct ftrace_ops *ops;
struct trace_pid_list __rcu *function_pids;
struct trace_pid_list __rcu *function_no_pids;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ struct fgraph_ops *gops;
+#endif
#ifdef CONFIG_DYNAMIC_FTRACE
/* All of these are protected by the ftrace_lock */
struct list_head func_probes;
@@ -679,9 +682,8 @@ void trace_latency_header(struct seq_file *m);
void trace_default_header(struct seq_file *m);
void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
-void trace_graph_return(struct ftrace_graph_ret *trace);
-int trace_graph_entry(struct ftrace_graph_ent *trace);
-void set_graph_array(struct trace_array *tr);
+void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops);
+int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
@@ -892,12 +894,59 @@ extern int __trace_graph_entry(struct trace_array *tr,
extern void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned int trace_ctx);
+extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
+extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
+extern void free_fgraph_ops(struct trace_array *tr);
+
+enum {
+ TRACE_GRAPH_FL = 1,
+
+ /*
+ * In the very unlikely case that an interrupt came in
+ * at a start of graph tracing, and we want to trace
+ * the function in that interrupt, the depth can be greater
+ * than zero, because of the preempted start of a previous
+ * trace. In an even more unlikely case, depth could be 2
+ * if a softirq interrupted the start of graph tracing,
+ * followed by an interrupt preempting a start of graph
+ * tracing in the softirq, and depth can even be 3
+ * if an NMI came in at the start of an interrupt function
+ * that preempted a softirq start of a function that
+ * preempted normal context!!!! Luckily, it can't be
+ * greater than 3, so the next two bits are a mask
+ * of what the depth is when we set TRACE_GRAPH_FL
+ */
+
+ TRACE_GRAPH_DEPTH_START_BIT,
+ TRACE_GRAPH_DEPTH_END_BIT,
+
+ /*
+ * To implement set_graph_notrace, if this bit is set, we ignore
+ * function graph tracing of called functions, until the return
+ * function is called to clear it.
+ */
+ TRACE_GRAPH_NOTRACE_BIT,
+};
+
+#define TRACE_GRAPH_NOTRACE (1 << TRACE_GRAPH_NOTRACE_BIT)
+
+static inline unsigned long ftrace_graph_depth(unsigned long *task_var)
+{
+ return (*task_var >> TRACE_GRAPH_DEPTH_START_BIT) & 3;
+}
+
+static inline void ftrace_graph_set_depth(unsigned long *task_var, int depth)
+{
+ *task_var &= ~(3 << TRACE_GRAPH_DEPTH_START_BIT);
+ *task_var |= (depth & 3) << TRACE_GRAPH_DEPTH_START_BIT;
+}
#ifdef CONFIG_DYNAMIC_FTRACE
extern struct ftrace_hash __rcu *ftrace_graph_hash;
extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash;
-static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
+static inline int
+ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace)
{
unsigned long addr = trace->func;
int ret = 0;
@@ -919,13 +968,12 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
}
if (ftrace_lookup_ip(hash, addr)) {
-
/*
* This needs to be cleared on the return functions
* when the depth is zero.
*/
- trace_recursion_set(TRACE_GRAPH_BIT);
- trace_recursion_set_depth(trace->depth);
+ *task_var |= TRACE_GRAPH_FL;
+ ftrace_graph_set_depth(task_var, trace->depth);
/*
* If no irqs are to be traced, but a set_graph_function
@@ -944,11 +992,14 @@ out:
return ret;
}
-static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
+static inline void
+ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace)
{
- if (trace_recursion_test(TRACE_GRAPH_BIT) &&
- trace->depth == trace_recursion_depth())
- trace_recursion_clear(TRACE_GRAPH_BIT);
+ unsigned long *task_var = fgraph_get_task_var(gops);
+
+ if ((*task_var & TRACE_GRAPH_FL) &&
+ trace->depth == ftrace_graph_depth(task_var))
+ *task_var &= ~TRACE_GRAPH_FL;
}
static inline int ftrace_graph_notrace_addr(unsigned long addr)
@@ -974,7 +1025,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
return ret;
}
#else
-static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
+static inline int ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace)
{
return 1;
}
@@ -983,27 +1034,37 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
{
return 0;
}
-static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
+static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace)
{ }
#endif /* CONFIG_DYNAMIC_FTRACE */
extern unsigned int fgraph_max_depth;
-static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace)
+static inline bool
+ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent *trace)
{
+ unsigned long *task_var = fgraph_get_task_var(gops);
+
/* trace it when it is-nested-in or is a function enabled. */
- return !(trace_recursion_test(TRACE_GRAPH_BIT) ||
- ftrace_graph_addr(trace)) ||
+ return !((*task_var & TRACE_GRAPH_FL) ||
+ ftrace_graph_addr(task_var, trace)) ||
(trace->depth < 0) ||
(fgraph_max_depth && trace->depth >= fgraph_max_depth);
}
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+ struct ftrace_ops *src_ops);
+
#else /* CONFIG_FUNCTION_GRAPH_TRACER */
static inline enum print_line_t
print_graph_function_flags(struct trace_iterator *iter, u32 flags)
{
return TRACE_TYPE_UNHANDLED;
}
+static inline void free_fgraph_ops(struct trace_array *tr) { }
+/* ftrace_ops may not be defined */
+#define init_array_fgraph_ops(tr, ops) do { } while (0)
+#define allocate_fgraph_ops(tr, ops) ({ 0; })
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
extern struct list_head ftrace_pids;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 9f1bfbe105e8..3b0cea37e029 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -80,6 +80,7 @@ void ftrace_free_ftrace_ops(struct trace_array *tr)
int ftrace_create_function_files(struct trace_array *tr,
struct dentry *parent)
{
+ int ret;
/*
* The top level array uses the "global_ops", and the files are
* created on boot up.
@@ -90,6 +91,12 @@ int ftrace_create_function_files(struct trace_array *tr,
if (!tr->ops)
return -EINVAL;
+ ret = allocate_fgraph_ops(tr, tr->ops);
+ if (ret) {
+ kfree(tr->ops);
+ return ret;
+ }
+
ftrace_create_filter_files(tr->ops, parent);
return 0;
@@ -99,6 +106,7 @@ void ftrace_destroy_function_files(struct trace_array *tr)
{
ftrace_destroy_filter_files(tr->ops);
ftrace_free_ftrace_ops(tr);
+ free_fgraph_ops(tr);
}
static ftrace_func_t select_trace_function(u32 flags_val)
@@ -223,6 +231,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
long disabled;
int cpu;
unsigned int trace_ctx;
+ int skip = STACK_SKIP;
if (unlikely(!tr->function_enabled))
return;
@@ -239,7 +248,11 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
if (likely(disabled == 1)) {
trace_ctx = tracing_gen_ctx_flags(flags);
trace_function(tr, ip, parent_ip, trace_ctx);
- __trace_stack(tr, trace_ctx, STACK_SKIP);
+#ifdef CONFIG_UNWINDER_FRAME_POINTER
+ if (ftrace_pids_enabled(op))
+ skip++;
+#endif
+ __trace_stack(tr, trace_ctx, skip);
}
atomic_dec(&data->disabled);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index c35fbaab2a47..13d0387ac6a6 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -83,8 +83,6 @@ static struct tracer_flags tracer_flags = {
.opts = trace_opts
};
-static struct trace_array *graph_array;
-
/*
* DURATION column is being also used to display IRQ signs,
* following values are used by print_graph_irq and others
@@ -129,9 +127,11 @@ static inline int ftrace_graph_ignore_irqs(void)
return in_hardirq();
}
-int trace_graph_entry(struct ftrace_graph_ent *trace)
+int trace_graph_entry(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops)
{
- struct trace_array *tr = graph_array;
+ unsigned long *task_var = fgraph_get_task_var(gops);
+ struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
unsigned long flags;
unsigned int trace_ctx;
@@ -139,7 +139,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
int ret;
int cpu;
- if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT))
+ if (*task_var & TRACE_GRAPH_NOTRACE)
return 0;
/*
@@ -150,7 +150,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
* returning from the function.
*/
if (ftrace_graph_notrace_addr(trace->func)) {
- trace_recursion_set(TRACE_GRAPH_NOTRACE_BIT);
+ *task_var |= TRACE_GRAPH_NOTRACE_BIT;
/*
* Need to return 1 to have the return called
* that will clear the NOTRACE bit.
@@ -161,7 +161,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
if (!ftrace_trace_task(tr))
return 0;
- if (ftrace_graph_ignore_func(trace))
+ if (ftrace_graph_ignore_func(gops, trace))
return 0;
if (ftrace_graph_ignore_irqs())
@@ -238,19 +238,21 @@ void __trace_graph_return(struct trace_array *tr,
trace_buffer_unlock_commit_nostack(buffer, event);
}
-void trace_graph_return(struct ftrace_graph_ret *trace)
+void trace_graph_return(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops)
{
- struct trace_array *tr = graph_array;
+ unsigned long *task_var = fgraph_get_task_var(gops);
+ struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
unsigned long flags;
unsigned int trace_ctx;
long disabled;
int cpu;
- ftrace_graph_addr_finish(trace);
+ ftrace_graph_addr_finish(gops, trace);
- if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
- trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
+ if (*task_var & TRACE_GRAPH_NOTRACE) {
+ *task_var &= ~TRACE_GRAPH_NOTRACE;
return;
}
@@ -266,18 +268,10 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
local_irq_restore(flags);
}
-void set_graph_array(struct trace_array *tr)
-{
- graph_array = tr;
-
- /* Make graph_array visible before we start tracing */
-
- smp_mb();
-}
-
-static void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
+static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops)
{
- ftrace_graph_addr_finish(trace);
+ ftrace_graph_addr_finish(gops, trace);
if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
@@ -288,28 +282,60 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
(trace->rettime - trace->calltime < tracing_thresh))
return;
else
- trace_graph_return(trace);
+ trace_graph_return(trace, gops);
}
-static struct fgraph_ops funcgraph_thresh_ops = {
- .entryfunc = &trace_graph_entry,
- .retfunc = &trace_graph_thresh_return,
-};
-
static struct fgraph_ops funcgraph_ops = {
.entryfunc = &trace_graph_entry,
.retfunc = &trace_graph_return,
};
+int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops)
+{
+ struct fgraph_ops *gops;
+
+ gops = kzalloc(sizeof(*gops), GFP_KERNEL);
+ if (!gops)
+ return -ENOMEM;
+
+ gops->entryfunc = &trace_graph_entry;
+ gops->retfunc = &trace_graph_return;
+
+ tr->gops = gops;
+ gops->private = tr;
+
+ fgraph_init_ops(&gops->ops, ops);
+
+ return 0;
+}
+
+void free_fgraph_ops(struct trace_array *tr)
+{
+ kfree(tr->gops);
+}
+
+__init void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops)
+{
+ tr->gops = &funcgraph_ops;
+ funcgraph_ops.private = tr;
+ fgraph_init_ops(&tr->gops->ops, ops);
+}
+
static int graph_trace_init(struct trace_array *tr)
{
int ret;
- set_graph_array(tr);
+ tr->gops->entryfunc = trace_graph_entry;
+
if (tracing_thresh)
- ret = register_ftrace_graph(&funcgraph_thresh_ops);
+ tr->gops->retfunc = trace_graph_thresh_return;
else
- ret = register_ftrace_graph(&funcgraph_ops);
+ tr->gops->retfunc = trace_graph_return;
+
+ /* Make gops functions are visible before we start tracing */
+ smp_mb();
+
+ ret = register_ftrace_graph(tr->gops);
if (ret)
return ret;
tracing_start_cmdline_record();
@@ -320,10 +346,7 @@ static int graph_trace_init(struct trace_array *tr)
static void graph_trace_reset(struct trace_array *tr)
{
tracing_stop_cmdline_record();
- if (tracing_thresh)
- unregister_ftrace_graph(&funcgraph_thresh_ops);
- else
- unregister_ftrace_graph(&funcgraph_ops);
+ unregister_ftrace_graph(tr->gops);
}
static int graph_trace_update_thresh(struct trace_array *tr)
@@ -1362,6 +1385,7 @@ static struct tracer graph_trace __tracer_data = {
.print_header = print_graph_headers,
.flags = &tracer_flags,
.set_flag = func_graph_set_flag,
+ .allow_instances = true,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_function_graph,
#endif
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index ba37f768e2f2..fce064e20570 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -175,7 +175,8 @@ static int irqsoff_display_graph(struct trace_array *tr, int set)
return start_irqsoff_tracer(irqsoff_trace, set);
}
-static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
+static int irqsoff_graph_entry(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
@@ -183,7 +184,7 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
unsigned int trace_ctx;
int ret;
- if (ftrace_graph_ignore_func(trace))
+ if (ftrace_graph_ignore_func(gops, trace))
return 0;
/*
* Do not trace a function if it's filtered by set_graph_notrace.
@@ -205,14 +206,15 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
return ret;
}
-static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
+static void irqsoff_graph_return(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
unsigned int trace_ctx;
- ftrace_graph_addr_finish(trace);
+ ftrace_graph_addr_finish(gops, trace);
if (!func_prolog_dec(tr, &data, &flags))
return;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 16383247bdbf..61a6da808203 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -678,6 +678,21 @@ end:
}
#ifdef CONFIG_MODULES
+static int validate_module_probe_symbol(const char *modname, const char *symbol);
+
+static int register_module_trace_kprobe(struct module *mod, struct trace_kprobe *tk)
+{
+ const char *p;
+ int ret = 0;
+
+ p = strchr(trace_kprobe_symbol(tk), ':');
+ if (p)
+ ret = validate_module_probe_symbol(module_name(mod), p + 1);
+ if (!ret)
+ ret = __register_trace_kprobe(tk);
+ return ret;
+}
+
/* Module notifier call back, checking event on the module */
static int trace_kprobe_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
@@ -696,7 +711,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
if (trace_kprobe_within_module(tk, mod)) {
/* Don't need to check busy - this should have gone. */
__unregister_trace_kprobe(tk);
- ret = __register_trace_kprobe(tk);
+ ret = register_module_trace_kprobe(mod, tk);
if (ret)
pr_warn("Failed to re-register probe %s on %s: %d\n",
trace_probe_name(&tk->tp),
@@ -747,17 +762,81 @@ static int count_mod_symbols(void *data, const char *name, unsigned long unused)
return 0;
}
-static unsigned int number_of_same_symbols(char *func_name)
+static unsigned int number_of_same_symbols(const char *mod, const char *func_name)
{
struct sym_count_ctx ctx = { .count = 0, .name = func_name };
- kallsyms_on_each_match_symbol(count_symbols, func_name, &ctx.count);
+ if (!mod)
+ kallsyms_on_each_match_symbol(count_symbols, func_name, &ctx.count);
- module_kallsyms_on_each_symbol(NULL, count_mod_symbols, &ctx);
+ module_kallsyms_on_each_symbol(mod, count_mod_symbols, &ctx);
return ctx.count;
}
+static int validate_module_probe_symbol(const char *modname, const char *symbol)
+{
+ unsigned int count = number_of_same_symbols(modname, symbol);
+
+ if (count > 1) {
+ /*
+ * Users should use ADDR to remove the ambiguity of
+ * using KSYM only.
+ */
+ return -EADDRNOTAVAIL;
+ } else if (count == 0) {
+ /*
+ * We can return ENOENT earlier than when register the
+ * kprobe.
+ */
+ return -ENOENT;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_MODULES
+/* Return NULL if the module is not loaded or under unloading. */
+static struct module *try_module_get_by_name(const char *name)
+{
+ struct module *mod;
+
+ rcu_read_lock_sched();
+ mod = find_module(name);
+ if (mod && !try_module_get(mod))
+ mod = NULL;
+ rcu_read_unlock_sched();
+
+ return mod;
+}
+#else
+#define try_module_get_by_name(name) (NULL)
+#endif
+
+static int validate_probe_symbol(char *symbol)
+{
+ struct module *mod = NULL;
+ char *modname = NULL, *p;
+ int ret = 0;
+
+ p = strchr(symbol, ':');
+ if (p) {
+ modname = symbol;
+ symbol = p + 1;
+ *p = '\0';
+ mod = try_module_get_by_name(modname);
+ if (!mod)
+ goto out;
+ }
+
+ ret = validate_module_probe_symbol(modname, symbol);
+out:
+ if (p)
+ *p = ':';
+ if (mod)
+ module_put(mod);
+ return ret;
+}
+
static int trace_kprobe_entry_handler(struct kretprobe_instance *ri,
struct pt_regs *regs);
@@ -881,6 +960,14 @@ static int __trace_kprobe_create(int argc, const char *argv[])
trace_probe_log_err(0, BAD_PROBE_ADDR);
goto parse_error;
}
+ ret = validate_probe_symbol(symbol);
+ if (ret) {
+ if (ret == -EADDRNOTAVAIL)
+ trace_probe_log_err(0, NON_UNIQ_SYMBOL);
+ else
+ trace_probe_log_err(0, BAD_PROBE_ADDR);
+ goto parse_error;
+ }
if (is_return)
ctx.flags |= TPARG_FL_RETURN;
ret = kprobe_on_func_entry(NULL, symbol, offset);
@@ -893,31 +980,6 @@ static int __trace_kprobe_create(int argc, const char *argv[])
}
}
- if (symbol && !strchr(symbol, ':')) {
- unsigned int count;
-
- count = number_of_same_symbols(symbol);
- if (count > 1) {
- /*
- * Users should use ADDR to remove the ambiguity of
- * using KSYM only.
- */
- trace_probe_log_err(0, NON_UNIQ_SYMBOL);
- ret = -EADDRNOTAVAIL;
-
- goto error;
- } else if (count == 0) {
- /*
- * We can return ENOENT earlier than when register the
- * kprobe.
- */
- trace_probe_log_err(0, BAD_PROBE_ADDR);
- ret = -ENOENT;
-
- goto error;
- }
- }
-
trace_probe_log_set_index(0);
if (event) {
ret = traceprobe_parse_event_name(&event, &group, gbuf,
@@ -1835,21 +1897,9 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
char *event;
if (func) {
- unsigned int count;
-
- count = number_of_same_symbols(func);
- if (count > 1)
- /*
- * Users should use addr to remove the ambiguity of
- * using func only.
- */
- return ERR_PTR(-EADDRNOTAVAIL);
- else if (count == 0)
- /*
- * We can return ENOENT earlier than when register the
- * kprobe.
- */
- return ERR_PTR(-ENOENT);
+ ret = validate_probe_symbol(func);
+ if (ret)
+ return ERR_PTR(ret);
}
/*
@@ -2023,19 +2073,16 @@ static __init int kprobe_trace_self_tests_init(void)
pr_info("Testing kprobe tracing: ");
ret = create_or_delete_trace_kprobe("p:testprobe kprobe_trace_selftest_target $stack $stack0 +0($stack)");
- if (WARN_ON_ONCE(ret)) {
- pr_warn("error on probing function entry.\n");
+ if (WARN_ONCE(ret, "error on probing function entry.")) {
warn++;
} else {
/* Enable trace point */
tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
- if (WARN_ON_ONCE(tk == NULL)) {
- pr_warn("error on getting new probe.\n");
+ if (WARN_ONCE(tk == NULL, "error on probing function entry.")) {
warn++;
} else {
file = find_trace_probe_file(tk, top_trace_array());
- if (WARN_ON_ONCE(file == NULL)) {
- pr_warn("error on getting probe file.\n");
+ if (WARN_ONCE(file == NULL, "error on getting probe file.")) {
warn++;
} else
enable_trace_kprobe(
@@ -2044,19 +2091,16 @@ static __init int kprobe_trace_self_tests_init(void)
}
ret = create_or_delete_trace_kprobe("r:testprobe2 kprobe_trace_selftest_target $retval");
- if (WARN_ON_ONCE(ret)) {
- pr_warn("error on probing function return.\n");
+ if (WARN_ONCE(ret, "error on probing function return.")) {
warn++;
} else {
/* Enable trace point */
tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
- if (WARN_ON_ONCE(tk == NULL)) {
- pr_warn("error on getting 2nd new probe.\n");
+ if (WARN_ONCE(tk == NULL, "error on getting 2nd new probe.")) {
warn++;
} else {
file = find_trace_probe_file(tk, top_trace_array());
- if (WARN_ON_ONCE(file == NULL)) {
- pr_warn("error on getting probe file.\n");
+ if (WARN_ONCE(file == NULL, "error on getting probe file.")) {
warn++;
} else
enable_trace_kprobe(
@@ -2079,18 +2123,15 @@ static __init int kprobe_trace_self_tests_init(void)
/* Disable trace points before removing it */
tk = find_trace_kprobe("testprobe", KPROBE_EVENT_SYSTEM);
- if (WARN_ON_ONCE(tk == NULL)) {
- pr_warn("error on getting test probe.\n");
+ if (WARN_ONCE(tk == NULL, "error on getting test probe.")) {
warn++;
} else {
- if (trace_kprobe_nhit(tk) != 1) {
- pr_warn("incorrect number of testprobe hits\n");
+ if (WARN_ONCE(trace_kprobe_nhit(tk) != 1,
+ "incorrect number of testprobe hits."))
warn++;
- }
file = find_trace_probe_file(tk, top_trace_array());
- if (WARN_ON_ONCE(file == NULL)) {
- pr_warn("error on getting probe file.\n");
+ if (WARN_ONCE(file == NULL, "error on getting probe file.")) {
warn++;
} else
disable_trace_kprobe(
@@ -2098,18 +2139,15 @@ static __init int kprobe_trace_self_tests_init(void)
}
tk = find_trace_kprobe("testprobe2", KPROBE_EVENT_SYSTEM);
- if (WARN_ON_ONCE(tk == NULL)) {
- pr_warn("error on getting 2nd test probe.\n");
+ if (WARN_ONCE(tk == NULL, "error on getting 2nd test probe.")) {
warn++;
} else {
- if (trace_kprobe_nhit(tk) != 1) {
- pr_warn("incorrect number of testprobe2 hits\n");
+ if (WARN_ONCE(trace_kprobe_nhit(tk) != 1,
+ "incorrect number of testprobe2 hits."))
warn++;
- }
file = find_trace_probe_file(tk, top_trace_array());
- if (WARN_ON_ONCE(file == NULL)) {
- pr_warn("error on getting probe file.\n");
+ if (WARN_ONCE(file == NULL, "error on getting probe file.")) {
warn++;
} else
disable_trace_kprobe(
@@ -2117,23 +2155,15 @@ static __init int kprobe_trace_self_tests_init(void)
}
ret = create_or_delete_trace_kprobe("-:testprobe");
- if (WARN_ON_ONCE(ret)) {
- pr_warn("error on deleting a probe.\n");
+ if (WARN_ONCE(ret, "error on deleting a probe."))
warn++;
- }
ret = create_or_delete_trace_kprobe("-:testprobe2");
- if (WARN_ON_ONCE(ret)) {
- pr_warn("error on deleting a probe.\n");
+ if (WARN_ONCE(ret, "error on deleting a probe."))
warn++;
- }
+
end:
- ret = dyn_events_release_all(&trace_kprobe_ops);
- if (WARN_ON_ONCE(ret)) {
- pr_warn("error on cleaning up probes.\n");
- warn++;
- }
/*
* Wait for the optimizer work to finish. Otherwise it might fiddle
* with probes in already freed __init text.
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index a8e28f9b9271..66a871553d4a 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1444,9 +1444,9 @@ static int run_osnoise(void)
save_osn_sample_stats(osn_var, &s);
/*
- * if threshold is 0, use the default value of 5 us.
+ * if threshold is 0, use the default value of 1 us.
*/
- threshold = tracing_thresh ? : 5000;
+ threshold = tracing_thresh ? : 1000;
/*
* Apply PREEMPT and IRQ disabled options.
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 0469a04a355f..130ca7e7787e 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -112,14 +112,15 @@ static int wakeup_display_graph(struct trace_array *tr, int set)
return start_func_tracer(tr, set);
}
-static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
+static int wakeup_graph_entry(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned int trace_ctx;
int ret = 0;
- if (ftrace_graph_ignore_func(trace))
+ if (ftrace_graph_ignore_func(gops, trace))
return 0;
/*
* Do not trace a function if it's filtered by set_graph_notrace.
@@ -141,13 +142,14 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
return ret;
}
-static void wakeup_graph_return(struct ftrace_graph_ret *trace)
+static void wakeup_graph_return(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned int trace_ctx;
- ftrace_graph_addr_finish(trace);
+ ftrace_graph_addr_finish(gops, trace);
if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
return;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index e9c5058a8efd..97f1e4bc47dc 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -756,13 +756,262 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define CHAR_NUMBER 123
+#define SHORT_NUMBER 12345
+#define WORD_NUMBER 1234567890
+#define LONG_NUMBER 1234567890123456789LL
+#define ERRSTR_BUFLEN 128
+
+struct fgraph_fixture {
+ struct fgraph_ops gops;
+ int store_size;
+ const char *store_type_name;
+ char error_str_buf[ERRSTR_BUFLEN];
+ char *error_str;
+};
+
+static __init int store_entry(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops)
+{
+ struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops);
+ const char *type = fixture->store_type_name;
+ int size = fixture->store_size;
+ void *p;
+
+ p = fgraph_reserve_data(gops->idx, size);
+ if (!p) {
+ snprintf(fixture->error_str_buf, ERRSTR_BUFLEN,
+ "Failed to reserve %s\n", type);
+ return 0;
+ }
+
+ switch (size) {
+ case 1:
+ *(char *)p = CHAR_NUMBER;
+ break;
+ case 2:
+ *(short *)p = SHORT_NUMBER;
+ break;
+ case 4:
+ *(int *)p = WORD_NUMBER;
+ break;
+ case 8:
+ *(long long *)p = LONG_NUMBER;
+ break;
+ }
+
+ return 1;
+}
+
+static __init void store_return(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops)
+{
+ struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops);
+ const char *type = fixture->store_type_name;
+ long long expect = 0;
+ long long found = -1;
+ int size;
+ char *p;
+
+ p = fgraph_retrieve_data(gops->idx, &size);
+ if (!p) {
+ snprintf(fixture->error_str_buf, ERRSTR_BUFLEN,
+ "Failed to retrieve %s\n", type);
+ return;
+ }
+ if (fixture->store_size > size) {
+ snprintf(fixture->error_str_buf, ERRSTR_BUFLEN,
+ "Retrieved size %d is smaller than expected %d\n",
+ size, (int)fixture->store_size);
+ return;
+ }
+
+ switch (fixture->store_size) {
+ case 1:
+ expect = CHAR_NUMBER;
+ found = *(char *)p;
+ break;
+ case 2:
+ expect = SHORT_NUMBER;
+ found = *(short *)p;
+ break;
+ case 4:
+ expect = WORD_NUMBER;
+ found = *(int *)p;
+ break;
+ case 8:
+ expect = LONG_NUMBER;
+ found = *(long long *)p;
+ break;
+ }
+
+ if (found != expect) {
+ snprintf(fixture->error_str_buf, ERRSTR_BUFLEN,
+ "%s returned not %lld but %lld\n", type, expect, found);
+ return;
+ }
+ fixture->error_str = NULL;
+}
+
+static int __init init_fgraph_fixture(struct fgraph_fixture *fixture)
+{
+ char *func_name;
+ int len;
+
+ snprintf(fixture->error_str_buf, ERRSTR_BUFLEN,
+ "Failed to execute storage %s\n", fixture->store_type_name);
+ fixture->error_str = fixture->error_str_buf;
+
+ func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
+ len = strlen(func_name);
+
+ return ftrace_set_filter(&fixture->gops.ops, func_name, len, 1);
+}
+
+/* Test fgraph storage for each size */
+static int __init test_graph_storage_single(struct fgraph_fixture *fixture)
+{
+ int size = fixture->store_size;
+ int ret;
+
+ pr_cont("PASSED\n");
+ pr_info("Testing fgraph storage of %d byte%s: ", size, str_plural(size));
+
+ ret = init_fgraph_fixture(fixture);
+ if (ret && ret != -ENODEV) {
+ pr_cont("*Could not set filter* ");
+ return -1;
+ }
+
+ ret = register_ftrace_graph(&fixture->gops);
+ if (ret) {
+ pr_warn("Failed to init store_bytes fgraph tracing\n");
+ return -1;
+ }
+
+ DYN_FTRACE_TEST_NAME();
+
+ unregister_ftrace_graph(&fixture->gops);
+
+ if (fixture->error_str) {
+ pr_cont("*** %s ***", fixture->error_str);
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct fgraph_fixture store_bytes[4] __initdata = {
+ [0] = {
+ .gops = {
+ .entryfunc = store_entry,
+ .retfunc = store_return,
+ },
+ .store_size = 1,
+ .store_type_name = "byte",
+ },
+ [1] = {
+ .gops = {
+ .entryfunc = store_entry,
+ .retfunc = store_return,
+ },
+ .store_size = 2,
+ .store_type_name = "short",
+ },
+ [2] = {
+ .gops = {
+ .entryfunc = store_entry,
+ .retfunc = store_return,
+ },
+ .store_size = 4,
+ .store_type_name = "word",
+ },
+ [3] = {
+ .gops = {
+ .entryfunc = store_entry,
+ .retfunc = store_return,
+ },
+ .store_size = 8,
+ .store_type_name = "long long",
+ },
+};
+
+static __init int test_graph_storage_multi(void)
+{
+ struct fgraph_fixture *fixture;
+ bool printed = false;
+ int i, ret;
+
+ pr_cont("PASSED\n");
+ pr_info("Testing multiple fgraph storage on a function: ");
+
+ for (i = 0; i < ARRAY_SIZE(store_bytes); i++) {
+ fixture = &store_bytes[i];
+ ret = init_fgraph_fixture(fixture);
+ if (ret && ret != -ENODEV) {
+ pr_cont("*Could not set filter* ");
+ printed = true;
+ goto out;
+ }
+
+ ret = register_ftrace_graph(&fixture->gops);
+ if (ret) {
+ pr_warn("Failed to init store_bytes fgraph tracing\n");
+ printed = true;
+ goto out;
+ }
+ }
+
+ DYN_FTRACE_TEST_NAME();
+out:
+ while (--i >= 0) {
+ fixture = &store_bytes[i];
+ unregister_ftrace_graph(&fixture->gops);
+
+ if (fixture->error_str && !printed) {
+ pr_cont("*** %s ***", fixture->error_str);
+ printed = true;
+ }
+ }
+ return printed ? -1 : 0;
+}
+
+/* Test the storage passed across function_graph entry and return */
+static __init int test_graph_storage(void)
+{
+ int ret;
+
+ ret = test_graph_storage_single(&store_bytes[0]);
+ if (ret)
+ return ret;
+ ret = test_graph_storage_single(&store_bytes[1]);
+ if (ret)
+ return ret;
+ ret = test_graph_storage_single(&store_bytes[2]);
+ if (ret)
+ return ret;
+ ret = test_graph_storage_single(&store_bytes[3]);
+ if (ret)
+ return ret;
+ ret = test_graph_storage_multi();
+ if (ret)
+ return ret;
+ return 0;
+}
+#else
+static inline int test_graph_storage(void) { return 0; }
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
/* Maximum number of functions to trace before diagnosing a hang */
#define GRAPH_MAX_FUNC_TEST 100000000
static unsigned int graph_hang_thresh;
/* Wrap the real function entry probe to avoid possible hanging */
-static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
+static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops)
{
/* This is harmlessly racy, we want to approximately detect a hang */
if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
@@ -776,7 +1025,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
return 0;
}
- return trace_graph_entry(trace);
+ return trace_graph_entry(trace, gops);
}
static struct fgraph_ops fgraph_ops __initdata = {
@@ -812,7 +1061,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
* to detect and recover from possible hangs
*/
tracing_reset_online_cpus(&tr->array_buffer);
- set_graph_array(tr);
+ fgraph_ops.private = tr;
ret = register_ftrace_graph(&fgraph_ops);
if (ret) {
warn_failed_init_tracer(trace, ret);
@@ -855,7 +1104,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
cond_resched();
tracing_reset_online_cpus(&tr->array_buffer);
- set_graph_array(tr);
+ fgraph_ops.private = tr;
/*
* Some archs *cough*PowerPC*cough* add characters to the
@@ -912,6 +1161,8 @@ trace_selftest_startup_function_graph(struct tracer *trace,
ftrace_set_global_filter(NULL, 0, 1);
#endif
+ ret = test_graph_storage();
+
/* Don't test dynamic tracing, the function tracer already did */
out:
/* Stop it if we failed */