summaryrefslogtreecommitdiff
path: root/kernel/time/timer_stats.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2007-02-16 01:28:13 -0800
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-16 08:13:59 -0800
commit82f67cd9fca8c8762c15ba7ed0d5747588c1e221 (patch)
tree1ff7e5cc496580b85bb42fb1d7b19dcbef7b7776 /kernel/time/timer_stats.c
parent8bfd9a7a229b5f3d3eda5d7d45c2eebec5b4ba16 (diff)
[PATCH] Add debugging feature /proc/timer_stat
Add /proc/timer_stats support: debugging feature to profile timer expiration. Both the starting site, process/PID and the expiration function is captured. This allows the quick identification of timer event sources in a system. Sample output: # echo 1 > /proc/timer_stats # cat /proc/timer_stats Timer Stats Version: v0.1 Sample period: 4.010 s 24, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) 11, 0 swapper sk_reset_timer (tcp_delack_timer) 6, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) 2, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) 17, 0 swapper hrtimer_restart_sched_tick (hrtimer_sched_tick) 2, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) 4, 2050 pcscd do_nanosleep (hrtimer_wakeup) 5, 4179 sshd sk_reset_timer (tcp_write_timer) 4, 2248 yum-updatesd schedule_timeout (process_timeout) 18, 0 swapper hrtimer_restart_sched_tick (hrtimer_sched_tick) 3, 0 swapper sk_reset_timer (tcp_delack_timer) 1, 1 swapper neigh_table_init_no_netlink (neigh_periodic_timer) 2, 1 swapper e1000_up (e1000_watchdog) 1, 1 init schedule_timeout (process_timeout) 100 total events, 25.24 events/sec [ cleanups and hrtimers support from Thomas Gleixner <tglx@linutronix.de> ] [bunk@stusta.de: nr_entries can become static] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: john stultz <johnstul@us.ibm.com> Cc: Roman Zippel <zippel@linux-m68k.org> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Adrian Bunk <bunk@stusta.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/time/timer_stats.c')
-rw-r--r--kernel/time/timer_stats.c411
1 files changed, 411 insertions, 0 deletions
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
new file mode 100644
index 000000000000..1bc4882e28e0
--- /dev/null
+++ b/kernel/time/timer_stats.c
@@ -0,0 +1,411 @@
+/*
+ * kernel/time/timer_stats.c
+ *
+ * Collect timer usage statistics.
+ *
+ * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
+ * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
+ *
+ * timer_stats is based on timer_top, a similar functionality which was part of
+ * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
+ * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
+ * on dynamic allocation of the statistics entries and linear search based
+ * lookup combined with a global lock, rather than the static array, hash
+ * and per-CPU locking which is used by timer_stats. It was written for the
+ * pre hrtimer kernel code and therefore did not take hrtimers into account.
+ * Nevertheless it provided the base for the timer_stats implementation and
+ * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
+ * for this effort.
+ *
+ * timer_top.c is
+ * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
+ * Written by Daniel Petrini <d.pensator@gmail.com>
+ * timer_top.c was released under the GNU General Public License version 2
+ *
+ * We export the addresses and counting of timer functions being called,
+ * the pid and cmdline from the owner process if applicable.
+ *
+ * Start/stop data collection:
+ * # echo 1[0] >/proc/timer_stats
+ *
+ * Display the information collected so far:
+ * # cat /proc/timer_stats
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * This is our basic unit of interest: a timer expiry event identified
+ * by the timer, its start/expire functions and the PID of the task that
+ * started the timer. We count the number of times an event happens:
+ */
+struct entry {
+ /*
+ * Hash list:
+ */
+ struct entry *next;
+
+ /*
+ * Hash keys:
+ */
+ void *timer;
+ void *start_func;
+ void *expire_func;
+ pid_t pid;
+
+ /*
+ * Number of timeout events:
+ */
+ unsigned long count;
+
+ /*
+ * We save the command-line string to preserve
+ * this information past task exit:
+ */
+ char comm[TASK_COMM_LEN + 1];
+
+} ____cacheline_aligned_in_smp;
+
+/*
+ * Spinlock protecting the tables - not taken during lookup:
+ */
+static DEFINE_SPINLOCK(table_lock);
+
+/*
+ * Per-CPU lookup locks for fast hash lookup:
+ */
+static DEFINE_PER_CPU(spinlock_t, lookup_lock);
+
+/*
+ * Mutex to serialize state changes with show-stats activities:
+ */
+static DEFINE_MUTEX(show_mutex);
+
+/*
+ * Collection status, active/inactive:
+ */
+static int __read_mostly active;
+
+/*
+ * Beginning/end timestamps of measurement:
+ */
+static ktime_t time_start, time_stop;
+
+/*
+ * tstat entry structs only get allocated while collection is
+ * active and never freed during that time - this simplifies
+ * things quite a bit.
+ *
+ * They get freed when a new collection period is started.
+ */
+#define MAX_ENTRIES_BITS 10
+#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS)
+
+static unsigned long nr_entries;
+static struct entry entries[MAX_ENTRIES];
+
+static atomic_t overflow_count;
+
+static void reset_entries(void)
+{
+ nr_entries = 0;
+ memset(entries, 0, sizeof(entries));
+ atomic_set(&overflow_count, 0);
+}
+
+static struct entry *alloc_entry(void)
+{
+ if (nr_entries >= MAX_ENTRIES)
+ return NULL;
+
+ return entries + nr_entries++;
+}
+
+/*
+ * The entries are in a hash-table, for fast lookup:
+ */
+#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1)
+#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS)
+#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1)
+
+#define __tstat_hashfn(entry) \
+ (((unsigned long)(entry)->timer ^ \
+ (unsigned long)(entry)->start_func ^ \
+ (unsigned long)(entry)->expire_func ^ \
+ (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK)
+
+#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry))
+
+static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
+
+static int match_entries(struct entry *entry1, struct entry *entry2)
+{
+ return entry1->timer == entry2->timer &&
+ entry1->start_func == entry2->start_func &&
+ entry1->expire_func == entry2->expire_func &&
+ entry1->pid == entry2->pid;
+}
+
+/*
+ * Look up whether an entry matching this item is present
+ * in the hash already. Must be called with irqs off and the
+ * lookup lock held:
+ */
+static struct entry *tstat_lookup(struct entry *entry, char *comm)
+{
+ struct entry **head, *curr, *prev;
+
+ head = tstat_hashentry(entry);
+ curr = *head;
+
+ /*
+ * The fastpath is when the entry is already hashed,
+ * we do this with the lookup lock held, but with the
+ * table lock not held:
+ */
+ while (curr) {
+ if (match_entries(curr, entry))
+ return curr;
+
+ curr = curr->next;
+ }
+ /*
+ * Slowpath: allocate, set up and link a new hash entry:
+ */
+ prev = NULL;
+ curr = *head;
+
+ spin_lock(&table_lock);
+ /*
+ * Make sure we have not raced with another CPU:
+ */
+ while (curr) {
+ if (match_entries(curr, entry))
+ goto out_unlock;
+
+ prev = curr;
+ curr = curr->next;
+ }
+
+ curr = alloc_entry();
+ if (curr) {
+ *curr = *entry;
+ curr->count = 0;
+ memcpy(curr->comm, comm, TASK_COMM_LEN);
+ if (prev)
+ prev->next = curr;
+ else
+ *head = curr;
+ curr->next = NULL;
+ }
+ out_unlock:
+ spin_unlock(&table_lock);
+
+ return curr;
+}
+
+/**
+ * timer_stats_update_stats - Update the statistics for a timer.
+ * @timer: pointer to either a timer_list or a hrtimer
+ * @pid: the pid of the task which set up the timer
+ * @startf: pointer to the function which did the timer setup
+ * @timerf: pointer to the timer callback function of the timer
+ * @comm: name of the process which set up the timer
+ *
+ * When the timer is already registered, then the event counter is
+ * incremented. Otherwise the timer is registered in a free slot.
+ */
+void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
+ void *timerf, char * comm)
+{
+ /*
+ * It doesnt matter which lock we take:
+ */
+ spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id());
+ struct entry *entry, input;
+ unsigned long flags;
+
+ input.timer = timer;
+ input.start_func = startf;
+ input.expire_func = timerf;
+ input.pid = pid;
+
+ spin_lock_irqsave(lock, flags);
+ if (!active)
+ goto out_unlock;
+
+ entry = tstat_lookup(&input, comm);
+ if (likely(entry))
+ entry->count++;
+ else
+ atomic_inc(&overflow_count);
+
+ out_unlock:
+ spin_unlock_irqrestore(lock, flags);
+}
+
+static void print_name_offset(struct seq_file *m, unsigned long addr)
+{
+ char namebuf[KSYM_NAME_LEN+1];
+ unsigned long size, offset;
+ const char *sym_name;
+ char *modname;
+
+ sym_name = kallsyms_lookup(addr, &size, &offset, &modname, namebuf);
+ if (sym_name)
+ seq_printf(m, "%s", sym_name);
+ else
+ seq_printf(m, "<%p>", (void *)addr);
+}
+
+static int tstats_show(struct seq_file *m, void *v)
+{
+ struct timespec period;
+ struct entry *entry;
+ unsigned long ms;
+ long events = 0;
+ ktime_t time;
+ int i;
+
+ mutex_lock(&show_mutex);
+ /*
+ * If still active then calculate up to now:
+ */
+ if (active)
+ time_stop = ktime_get();
+
+ time = ktime_sub(time_stop, time_start);
+
+ period = ktime_to_timespec(time);
+ ms = period.tv_nsec / 1000000;
+
+ seq_puts(m, "Timer Stats Version: v0.1\n");
+ seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
+ if (atomic_read(&overflow_count))
+ seq_printf(m, "Overflow: %d entries\n",
+ atomic_read(&overflow_count));
+
+ for (i = 0; i < nr_entries; i++) {
+ entry = entries + i;
+ seq_printf(m, "%4lu, %5d %-16s ",
+ entry->count, entry->pid, entry->comm);
+
+ print_name_offset(m, (unsigned long)entry->start_func);
+ seq_puts(m, " (");
+ print_name_offset(m, (unsigned long)entry->expire_func);
+ seq_puts(m, ")\n");
+
+ events += entry->count;
+ }
+
+ ms += period.tv_sec * 1000;
+ if (!ms)
+ ms = 1;
+
+ if (events && period.tv_sec)
+ seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events,
+ events / period.tv_sec, events * 1000 / ms);
+ else
+ seq_printf(m, "%ld total events\n", events);
+
+ mutex_unlock(&show_mutex);
+
+ return 0;
+}
+
+/*
+ * After a state change, make sure all concurrent lookup/update
+ * activities have stopped:
+ */
+static void sync_access(void)
+{
+ unsigned long flags;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
+ /* nothing */
+ spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
+ }
+}
+
+static ssize_t tstats_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offs)
+{
+ char ctl[2];
+
+ if (count != 2 || *offs)
+ return -EINVAL;
+
+ if (copy_from_user(ctl, buf, count))
+ return -EFAULT;
+
+ mutex_lock(&show_mutex);
+ switch (ctl[0]) {
+ case '0':
+ if (active) {
+ active = 0;
+ time_stop = ktime_get();
+ sync_access();
+ }
+ break;
+ case '1':
+ if (!active) {
+ reset_entries();
+ time_start = ktime_get();
+ active = 1;
+ }
+ break;
+ default:
+ count = -EINVAL;
+ }
+ mutex_unlock(&show_mutex);
+
+ return count;
+}
+
+static int tstats_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, tstats_show, NULL);
+}
+
+static struct file_operations tstats_fops = {
+ .open = tstats_open,
+ .read = seq_read,
+ .write = tstats_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+void __init init_timer_stats(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ spin_lock_init(&per_cpu(lookup_lock, cpu));
+}
+
+static int __init init_tstats_procfs(void)
+{
+ struct proc_dir_entry *pe;
+
+ pe = create_proc_entry("timer_stats", 0644, NULL);
+ if (!pe)
+ return -ENOMEM;
+
+ pe->proc_fops = &tstats_fops;
+
+ return 0;
+}
+__initcall(init_tstats_procfs);