From 633f6f58af445022e38417599a4789b5fc510b71 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 19 Feb 2016 13:59:54 -0500 Subject: tracing: Remove duplicate checks for online CPUs Some trace events have conditions that check if the current CPU is online or not before recording the tracepoint. That's because certain trace events are in locations that can be called as the CPU is going offline and when RCU no longer monitors it (like kfree and friends). The check was added because trace events require RCU to be active. This is a trace event infrastructure issue and not something that individual trace events should worry about. The tracepoint.h code now has added a check to see if the current CPU is considered online, and it only does the tracepoint if it is. There's no more need for individual trace events to also include this check. It is now redundant. Cc: Shreyas B. Prabhu Signed-off-by: Steven Rostedt --- include/trace/events/kmem.h | 42 ++++-------------------------------------- include/trace/events/tlb.h | 4 +--- 2 files changed, 5 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index f7554fd7fc62..325b2a9b1959 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -140,42 +140,19 @@ DEFINE_EVENT(kmem_free, kfree, TP_ARGS(call_site, ptr) ); -DEFINE_EVENT_CONDITION(kmem_free, kmem_cache_free, +DEFINE_EVENT(kmem_free, kmem_cache_free, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr), - - /* - * This trace can be potentially called from an offlined cpu. - * Since trace points use RCU and RCU should not be used from - * offline cpus, filter such calls out. - * While this trace can be called from a preemptable section, - * it has no impact on the condition since tasks can migrate - * only from online cpus to other online cpus. Thus its safe - * to use raw_smp_processor_id. - */ - TP_CONDITION(cpu_online(raw_smp_processor_id())) + TP_ARGS(call_site, ptr) ); -TRACE_EVENT_CONDITION(mm_page_free, +TRACE_EVENT(mm_page_free, TP_PROTO(struct page *page, unsigned int order), TP_ARGS(page, order), - - /* - * This trace can be potentially called from an offlined cpu. - * Since trace points use RCU and RCU should not be used from - * offline cpus, filter such calls out. - * While this trace can be called from a preemptable section, - * it has no impact on the condition since tasks can migrate - * only from online cpus to other online cpus. Thus its safe - * to use raw_smp_processor_id. - */ - TP_CONDITION(cpu_online(raw_smp_processor_id())), - TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) @@ -276,23 +253,12 @@ DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, TP_ARGS(page, order, migratetype) ); -TRACE_EVENT_CONDITION(mm_page_pcpu_drain, +TRACE_EVENT(mm_page_pcpu_drain, TP_PROTO(struct page *page, unsigned int order, int migratetype), TP_ARGS(page, order, migratetype), - /* - * This trace can be potentially called from an offlined cpu. - * Since trace points use RCU and RCU should not be used from - * offline cpus, filter such calls out. - * While this trace can be called from a preemptable section, - * it has no impact on the condition since tasks can migrate - * only from online cpus to other online cpus. Thus its safe - * to use raw_smp_processor_id. - */ - TP_CONDITION(cpu_online(raw_smp_processor_id())), - TP_STRUCT__entry( __field( unsigned long, pfn ) __field( unsigned int, order ) diff --git a/include/trace/events/tlb.h b/include/trace/events/tlb.h index bc8815f45f3b..9d14b1992108 100644 --- a/include/trace/events/tlb.h +++ b/include/trace/events/tlb.h @@ -34,13 +34,11 @@ TLB_FLUSH_REASON #define EM(a,b) { a, b }, #define EMe(a,b) { a, b } -TRACE_EVENT_CONDITION(tlb_flush, +TRACE_EVENT(tlb_flush, TP_PROTO(int reason, unsigned long pages), TP_ARGS(reason, pages), - TP_CONDITION(cpu_online(smp_processor_id())), - TP_STRUCT__entry( __field( int, reason) __field(unsigned long, pages) -- cgit v1.2.3-58-ga151 From c4a5923055c9e0c87dfc0387f7cda5ee2bbac3c1 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Thu, 10 Dec 2015 12:50:45 -0600 Subject: tracing: Add event record param to trigger_ops.func() Some triggers may need access to the trace event, so pass it in. Also fix up the existing trigger funcs and their callers. Link: http://lkml.kernel.org/r/543e31e9fc445ef61077421ab219033401c39846.1449767187.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi Tested-by: Masami Hiramatsu Reviewed-by: Namhyung Kim Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 7 ++++--- kernel/trace/trace.h | 6 ++++-- kernel/trace/trace_events_trigger.c | 36 +++++++++++++++++++----------------- 3 files changed, 27 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 925730bc9fc1..0d6930e941e8 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -430,7 +430,8 @@ extern int call_filter_check_discard(struct trace_event_call *call, void *rec, extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, void *rec); extern void event_triggers_post_call(struct trace_event_file *file, - enum event_trigger_type tt); + enum event_trigger_type tt, + void *rec); bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); @@ -517,7 +518,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); if (tt) - event_triggers_post_call(file, tt); + event_triggers_post_call(file, tt, entry); } /** @@ -550,7 +551,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file, irq_flags, pc, regs); if (tt) - event_triggers_post_call(file, tt); + event_triggers_post_call(file, tt, entry); } #ifdef CONFIG_BPF_EVENTS diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b2bc956e2b0d..c10456e72106 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1201,7 +1201,8 @@ extern int register_event_command(struct event_command *cmd); * @func: The trigger 'probe' function called when the triggering * event occurs. The data passed into this callback is the data * that was supplied to the event_command @reg() function that - * registered the trigger (see struct event_command). + * registered the trigger (see struct event_command) along with + * the trace record, rec. * * @init: An optional initialization function called for the trigger * when the trigger is registered (via the event_command reg() @@ -1226,7 +1227,8 @@ extern int register_event_command(struct event_command *cmd); * (see trace_event_triggers.c). */ struct event_trigger_ops { - void (*func)(struct event_trigger_data *data); + void (*func)(struct event_trigger_data *data, + void *rec); int (*init)(struct event_trigger_ops *ops, struct event_trigger_data *data); void (*free)(struct event_trigger_ops *ops, diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index f40424f35dcb..0a62887c63c0 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -73,7 +73,7 @@ event_triggers_call(struct trace_event_file *file, void *rec) list_for_each_entry_rcu(data, &file->triggers, list) { if (!rec) { - data->ops->func(data); + data->ops->func(data, rec); continue; } filter = rcu_dereference_sched(data->filter); @@ -83,7 +83,7 @@ event_triggers_call(struct trace_event_file *file, void *rec) tt |= data->cmd_ops->trigger_type; continue; } - data->ops->func(data); + data->ops->func(data, rec); } return tt; } @@ -93,6 +93,7 @@ EXPORT_SYMBOL_GPL(event_triggers_call); * event_triggers_post_call - Call 'post_triggers' for a trace event * @file: The trace_event_file associated with the event * @tt: enum event_trigger_type containing a set bit for each trigger to invoke + * @rec: The trace entry for the event * * For each trigger associated with an event, invoke the trigger * function registered with the associated trigger command, if the @@ -103,13 +104,14 @@ EXPORT_SYMBOL_GPL(event_triggers_call); */ void event_triggers_post_call(struct trace_event_file *file, - enum event_trigger_type tt) + enum event_trigger_type tt, + void *rec) { struct event_trigger_data *data; list_for_each_entry_rcu(data, &file->triggers, list) { if (data->cmd_ops->trigger_type & tt) - data->ops->func(data); + data->ops->func(data, rec); } } EXPORT_SYMBOL_GPL(event_triggers_post_call); @@ -745,7 +747,7 @@ int set_trigger_filter(char *filter_str, } static void -traceon_trigger(struct event_trigger_data *data) +traceon_trigger(struct event_trigger_data *data, void *rec) { if (tracing_is_on()) return; @@ -754,7 +756,7 @@ traceon_trigger(struct event_trigger_data *data) } static void -traceon_count_trigger(struct event_trigger_data *data) +traceon_count_trigger(struct event_trigger_data *data, void *rec) { if (tracing_is_on()) return; @@ -769,7 +771,7 @@ traceon_count_trigger(struct event_trigger_data *data) } static void -traceoff_trigger(struct event_trigger_data *data) +traceoff_trigger(struct event_trigger_data *data, void *rec) { if (!tracing_is_on()) return; @@ -778,7 +780,7 @@ traceoff_trigger(struct event_trigger_data *data) } static void -traceoff_count_trigger(struct event_trigger_data *data) +traceoff_count_trigger(struct event_trigger_data *data, void *rec) { if (!tracing_is_on()) return; @@ -874,13 +876,13 @@ static struct event_command trigger_traceoff_cmd = { #ifdef CONFIG_TRACER_SNAPSHOT static void -snapshot_trigger(struct event_trigger_data *data) +snapshot_trigger(struct event_trigger_data *data, void *rec) { tracing_snapshot(); } static void -snapshot_count_trigger(struct event_trigger_data *data) +snapshot_count_trigger(struct event_trigger_data *data, void *rec) { if (!data->count) return; @@ -888,7 +890,7 @@ snapshot_count_trigger(struct event_trigger_data *data) if (data->count != -1) (data->count)--; - snapshot_trigger(data); + snapshot_trigger(data, rec); } static int @@ -967,13 +969,13 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; } #define STACK_SKIP 3 static void -stacktrace_trigger(struct event_trigger_data *data) +stacktrace_trigger(struct event_trigger_data *data, void *rec) { trace_dump_stack(STACK_SKIP); } static void -stacktrace_count_trigger(struct event_trigger_data *data) +stacktrace_count_trigger(struct event_trigger_data *data, void *rec) { if (!data->count) return; @@ -981,7 +983,7 @@ stacktrace_count_trigger(struct event_trigger_data *data) if (data->count != -1) (data->count)--; - stacktrace_trigger(data); + stacktrace_trigger(data, rec); } static int @@ -1052,7 +1054,7 @@ struct enable_trigger_data { }; static void -event_enable_trigger(struct event_trigger_data *data) +event_enable_trigger(struct event_trigger_data *data, void *rec) { struct enable_trigger_data *enable_data = data->private_data; @@ -1063,7 +1065,7 @@ event_enable_trigger(struct event_trigger_data *data) } static void -event_enable_count_trigger(struct event_trigger_data *data) +event_enable_count_trigger(struct event_trigger_data *data, void *rec) { struct enable_trigger_data *enable_data = data->private_data; @@ -1077,7 +1079,7 @@ event_enable_count_trigger(struct event_trigger_data *data) if (data->count != -1) (data->count)--; - event_enable_trigger(data); + event_enable_trigger(data, rec); } static int -- cgit v1.2.3-58-ga151 From a664edb374c704a734a0df41fc742c285a5beb52 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 3 Mar 2016 01:08:57 -0800 Subject: tracing, writeback: Replace cgroup path to cgroup ino commit 5634cc2aa9aebc77bc862992e7805469dcf83dac ("writeback: update writeback tracepoints to report cgroup") made writeback tracepoints print out cgroup path when CGROUP_WRITEBACK is enabled, but it may trigger the below bug on -rt kernel since kernfs_path and kernfs_path_len are called by tracepoints, which acquire spin lock that is sleepable on -rt kernel. BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:930 in_atomic(): 1, irqs_disabled(): 0, pid: 625, name: kworker/u16:3 INFO: lockdep is turned off. Preemption disabled at:[] wb_writeback+0xec/0x830 CPU: 7 PID: 625 Comm: kworker/u16:3 Not tainted 4.4.1-rt5 #20 Hardware name: Freescale Layerscape 2085a RDB Board (DT) Workqueue: writeback wb_workfn (flush-7:0) Call trace: [] dump_backtrace+0x0/0x200 [] show_stack+0x24/0x30 [] dump_stack+0x88/0xa8 [] ___might_sleep+0x2ec/0x300 [] rt_spin_lock+0x38/0xb8 [] kernfs_path_len+0x30/0x90 [] trace_event_raw_event_writeback_work_class+0xe8/0x2e8 [] wb_writeback+0x620/0x830 [] wb_workfn+0x61c/0x950 [] process_one_work+0x3ac/0xb30 [] worker_thread+0x9c/0x7a8 [] kthread+0x190/0x1b0 [] ret_from_fork+0x10/0x30 With unlocked kernfs_* functions, synchronize_sched() has to be called in kernfs_rename which could be called in syscall path, but it is problematic. So, print out cgroup ino instead of path name, which could be converted to path name by userland. Withouth CGROUP_WRITEBACK enabled, it just prints out root dir. But, root dir ino vary from different filesystems, so printing out -1U to indicate an invalid cgroup ino. Link: http://lkml.kernel.org/r/1456996137-8354-1-git-send-email-yang.shi@linaro.org Acked-by: Tejun Heo Signed-off-by: Yang Shi Signed-off-by: Steven Rostedt --- include/trace/events/writeback.h | 121 +++++++++++++++------------------------ 1 file changed, 45 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index fff846b512e6..73614ce1d204 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -134,58 +134,28 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, #ifdef CREATE_TRACE_POINTS #ifdef CONFIG_CGROUP_WRITEBACK -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb) +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb) { - return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1; + return wb->memcg_css->cgroup->kn->ino; } -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb) -{ - struct cgroup *cgrp = wb->memcg_css->cgroup; - char *path; - - path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1); - WARN_ON_ONCE(path != buf); -} - -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc) -{ - if (wbc->wb) - return __trace_wb_cgroup_size(wbc->wb); - else - return 2; -} - -static inline void __trace_wbc_assign_cgroup(char *buf, - struct writeback_control *wbc) +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc) { if (wbc->wb) - __trace_wb_assign_cgroup(buf, wbc->wb); + return __trace_wb_assign_cgroup(wbc->wb); else - strcpy(buf, "/"); + return -1U; } - #else /* CONFIG_CGROUP_WRITEBACK */ -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb) -{ - return 2; -} - -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb) -{ - strcpy(buf, "/"); -} - -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc) +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb) { - return 2; + return -1U; } -static inline void __trace_wbc_assign_cgroup(char *buf, - struct writeback_control *wbc) +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc) { - strcpy(buf, "/"); + return -1U; } #endif /* CONFIG_CGROUP_WRITEBACK */ @@ -201,7 +171,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, __array(char, name, 32) __field(unsigned long, ino) __field(int, sync_mode) - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc)) + __field(unsigned int, cgroup_ino) ), TP_fast_assign( @@ -209,14 +179,14 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, dev_name(inode_to_bdi(inode)->dev), 32); __entry->ino = inode->i_ino; __entry->sync_mode = wbc->sync_mode; - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); ), - TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s", + TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%u", __entry->name, __entry->ino, __entry->sync_mode, - __get_str(cgroup) + __entry->cgroup_ino ) ); @@ -246,7 +216,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, __field(int, range_cyclic) __field(int, for_background) __field(int, reason) - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) + __field(unsigned int, cgroup_ino) ), TP_fast_assign( strncpy(__entry->name, @@ -258,10 +228,10 @@ DECLARE_EVENT_CLASS(writeback_work_class, __entry->range_cyclic = work->range_cyclic; __entry->for_background = work->for_background; __entry->reason = work->reason; - __trace_wb_assign_cgroup(__get_str(cgroup), wb); + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d " - "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s", + "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%u", __entry->name, MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev), __entry->nr_pages, @@ -270,7 +240,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, __entry->range_cyclic, __entry->for_background, __print_symbolic(__entry->reason, WB_WORK_REASON), - __get_str(cgroup) + __entry->cgroup_ino ) ); #define DEFINE_WRITEBACK_WORK_EVENT(name) \ @@ -300,15 +270,15 @@ DECLARE_EVENT_CLASS(writeback_class, TP_ARGS(wb), TP_STRUCT__entry( __array(char, name, 32) - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) + __field(unsigned int, cgroup_ino) ), TP_fast_assign( strncpy(__entry->name, dev_name(wb->bdi->dev), 32); - __trace_wb_assign_cgroup(__get_str(cgroup), wb); + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), - TP_printk("bdi %s: cgroup=%s", + TP_printk("bdi %s: cgroup_ino=%u", __entry->name, - __get_str(cgroup) + __entry->cgroup_ino ) ); #define DEFINE_WRITEBACK_EVENT(name) \ @@ -347,7 +317,7 @@ DECLARE_EVENT_CLASS(wbc_class, __field(int, range_cyclic) __field(long, range_start) __field(long, range_end) - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc)) + __field(unsigned int, cgroup_ino) ), TP_fast_assign( @@ -361,12 +331,12 @@ DECLARE_EVENT_CLASS(wbc_class, __entry->range_cyclic = wbc->range_cyclic; __entry->range_start = (long)wbc->range_start; __entry->range_end = (long)wbc->range_end; - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); ), TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " "bgrd=%d reclm=%d cyclic=%d " - "start=0x%lx end=0x%lx cgroup=%s", + "start=0x%lx end=0x%lx cgroup_ino=%u", __entry->name, __entry->nr_to_write, __entry->pages_skipped, @@ -377,7 +347,7 @@ DECLARE_EVENT_CLASS(wbc_class, __entry->range_cyclic, __entry->range_start, __entry->range_end, - __get_str(cgroup) + __entry->cgroup_ino ) ) @@ -398,7 +368,7 @@ TRACE_EVENT(writeback_queue_io, __field(long, age) __field(int, moved) __field(int, reason) - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) + __field(unsigned int, cgroup_ino) ), TP_fast_assign( unsigned long *older_than_this = work->older_than_this; @@ -408,15 +378,15 @@ TRACE_EVENT(writeback_queue_io, (jiffies - *older_than_this) * 1000 / HZ : -1; __entry->moved = moved; __entry->reason = work->reason; - __trace_wb_assign_cgroup(__get_str(cgroup), wb); + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), - TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s", + TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%u", __entry->name, __entry->older, /* older_than_this in jiffies */ __entry->age, /* older_than_this in relative milliseconds */ __entry->moved, __print_symbolic(__entry->reason, WB_WORK_REASON), - __get_str(cgroup) + __entry->cgroup_ino ) ); @@ -484,7 +454,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, __field(unsigned long, dirty_ratelimit) __field(unsigned long, task_ratelimit) __field(unsigned long, balanced_dirty_ratelimit) - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) + __field(unsigned int, cgroup_ino) ), TP_fast_assign( @@ -496,13 +466,13 @@ TRACE_EVENT(bdi_dirty_ratelimit, __entry->task_ratelimit = KBps(task_ratelimit); __entry->balanced_dirty_ratelimit = KBps(wb->balanced_dirty_ratelimit); - __trace_wb_assign_cgroup(__get_str(cgroup), wb); + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: " "write_bw=%lu awrite_bw=%lu dirty_rate=%lu " "dirty_ratelimit=%lu task_ratelimit=%lu " - "balanced_dirty_ratelimit=%lu cgroup=%s", + "balanced_dirty_ratelimit=%lu cgroup_ino=%u", __entry->bdi, __entry->write_bw, /* write bandwidth */ __entry->avg_write_bw, /* avg write bandwidth */ @@ -510,7 +480,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, __entry->dirty_ratelimit, /* base ratelimit */ __entry->task_ratelimit, /* ratelimit with position control */ __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */ - __get_str(cgroup) + __entry->cgroup_ino ) ); @@ -548,7 +518,7 @@ TRACE_EVENT(balance_dirty_pages, __field( long, pause) __field(unsigned long, period) __field( long, think) - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) + __field(unsigned int, cgroup_ino) ), TP_fast_assign( @@ -571,7 +541,7 @@ TRACE_EVENT(balance_dirty_pages, __entry->period = period * 1000 / HZ; __entry->pause = pause * 1000 / HZ; __entry->paused = (jiffies - start_time) * 1000 / HZ; - __trace_wb_assign_cgroup(__get_str(cgroup), wb); + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), @@ -580,7 +550,7 @@ TRACE_EVENT(balance_dirty_pages, "bdi_setpoint=%lu bdi_dirty=%lu " "dirty_ratelimit=%lu task_ratelimit=%lu " "dirtied=%u dirtied_pause=%u " - "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s", + "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%u", __entry->bdi, __entry->limit, __entry->setpoint, @@ -595,7 +565,7 @@ TRACE_EVENT(balance_dirty_pages, __entry->pause, /* ms */ __entry->period, /* ms */ __entry->think, /* ms */ - __get_str(cgroup) + __entry->cgroup_ino ) ); @@ -609,8 +579,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue, __field(unsigned long, ino) __field(unsigned long, state) __field(unsigned long, dirtied_when) - __dynamic_array(char, cgroup, - __trace_wb_cgroup_size(inode_to_wb(inode))) + __field(unsigned int, cgroup_ino) ), TP_fast_assign( @@ -619,16 +588,16 @@ TRACE_EVENT(writeback_sb_inodes_requeue, __entry->ino = inode->i_ino; __entry->state = inode->i_state; __entry->dirtied_when = inode->dirtied_when; - __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode)); + __entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode)); ), - TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s", + TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%u", __entry->name, __entry->ino, show_inode_state(__entry->state), __entry->dirtied_when, (jiffies - __entry->dirtied_when) / HZ, - __get_str(cgroup) + __entry->cgroup_ino ) ); @@ -684,7 +653,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, __field(unsigned long, writeback_index) __field(long, nr_to_write) __field(unsigned long, wrote) - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc)) + __field(unsigned int, cgroup_ino) ), TP_fast_assign( @@ -696,11 +665,11 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, __entry->writeback_index = inode->i_mapping->writeback_index; __entry->nr_to_write = nr_to_write; __entry->wrote = nr_to_write - wbc->nr_to_write; - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); ), TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu " - "index=%lu to_write=%ld wrote=%lu cgroup=%s", + "index=%lu to_write=%ld wrote=%lu cgroup_ino=%u", __entry->name, __entry->ino, show_inode_state(__entry->state), @@ -709,7 +678,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, __entry->writeback_index, __entry->nr_to_write, __entry->wrote, - __get_str(cgroup) + __entry->cgroup_ino ) ); -- cgit v1.2.3-58-ga151 From 3debb0a9ddb16526de8b456491b7db60114f7b5e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 22 Mar 2016 17:30:58 -0400 Subject: tracing: Fix trace_printk() to print when not using bprintk() The trace_printk() code will allocate extra buffers if the compile detects that a trace_printk() is used. To do this, the format of the trace_printk() is saved to the __trace_printk_fmt section, and if that section is bigger than zero, the buffers are allocated (along with a message that this has happened). If trace_printk() uses a format that is not a constant, and thus something not guaranteed to be around when the print happens, the compiler optimizes the fmt out, as it is not used, and the __trace_printk_fmt section is not filled. This means the kernel will not allocate the special buffers needed for the trace_printk() and the trace_printk() will not write anything to the tracing buffer. Adding a "__used" to the variable in the __trace_printk_fmt section will keep it around, even though it is set to NULL. This will keep the string from being printed in the debugfs/tracing/printk_formats section as it is not needed. Reported-by: Vlastimil Babka Fixes: 07d777fe8c398 "tracing: Add percpu buffers for trace_printk()" Cc: stable@vger.kernel.org # v3.5+ Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 6 +++--- kernel/trace/trace_printk.c | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f31638c6e873..95452f72349a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -635,7 +635,7 @@ do { \ #define do_trace_printk(fmt, args...) \ do { \ - static const char *trace_printk_fmt \ + static const char *trace_printk_fmt __used \ __attribute__((section("__trace_printk_fmt"))) = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ @@ -679,7 +679,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...); */ #define trace_puts(str) ({ \ - static const char *trace_printk_fmt \ + static const char *trace_printk_fmt __used \ __attribute__((section("__trace_printk_fmt"))) = \ __builtin_constant_p(str) ? str : NULL; \ \ @@ -701,7 +701,7 @@ extern void trace_dump_stack(int skip); #define ftrace_vprintk(fmt, vargs) \ do { \ if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ + static const char *trace_printk_fmt __used \ __attribute__((section("__trace_printk_fmt"))) = \ __builtin_constant_p(fmt) ? fmt : NULL; \ \ diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 060df67dbdd1..f96f0383f6c6 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -296,6 +296,9 @@ static int t_show(struct seq_file *m, void *v) const char *str = *fmt; int i; + if (!*fmt) + return 0; + seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt); /* -- cgit v1.2.3-58-ga151