linux/kernel/time/timer_stats.c
<<
>>
Prefs
   1/*
   2 * kernel/time/timer_stats.c
   3 *
   4 * Collect timer usage statistics.
   5 *
   6 * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
   7 * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
   8 *
   9 * timer_stats is based on timer_top, a similar functionality which was part of
  10 * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
  11 * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
  12 * on dynamic allocation of the statistics entries and linear search based
  13 * lookup combined with a global lock, rather than the static array, hash
  14 * and per-CPU locking which is used by timer_stats. It was written for the
  15 * pre hrtimer kernel code and therefore did not take hrtimers into account.
  16 * Nevertheless it provided the base for the timer_stats implementation and
  17 * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
  18 * for this effort.
  19 *
  20 * timer_top.c is
  21 *      Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
  22 *      Written by Daniel Petrini <d.pensator@gmail.com>
  23 *      timer_top.c was released under the GNU General Public License version 2
  24 *
  25 * We export the addresses and counting of timer functions being called,
  26 * the pid and cmdline from the owner process if applicable.
  27 *
  28 * Start/stop data collection:
  29 * # echo [1|0] >/proc/timer_stats
  30 *
  31 * Display the information collected so far:
  32 * # cat /proc/timer_stats
  33 *
  34 * This program is free software; you can redistribute it and/or modify
  35 * it under the terms of the GNU General Public License version 2 as
  36 * published by the Free Software Foundation.
  37 */
  38
  39#include <linux/proc_fs.h>
  40#include <linux/module.h>
  41#include <linux/spinlock.h>
  42#include <linux/sched.h>
  43#include <linux/seq_file.h>
  44#include <linux/kallsyms.h>
  45
  46#include <asm/uaccess.h>
  47
  48/*
  49 * This is our basic unit of interest: a timer expiry event identified
  50 * by the timer, its start/expire functions and the PID of the task that
  51 * started the timer. We count the number of times an event happens:
  52 */
  53struct entry {
  54        /*
  55         * Hash list:
  56         */
  57        struct entry            *next;
  58
  59        /*
  60         * Hash keys:
  61         */
  62        void                    *timer;
  63        void                    *start_func;
  64        void                    *expire_func;
  65        pid_t                   pid;
  66
  67        /*
  68         * Number of timeout events:
  69         */
  70        unsigned long           count;
  71        unsigned int            timer_flag;
  72
  73        /*
  74         * We save the command-line string to preserve
  75         * this information past task exit:
  76         */
  77        char                    comm[TASK_COMM_LEN + 1];
  78
  79} ____cacheline_aligned_in_smp;
  80
  81/*
  82 * Spinlock protecting the tables - not taken during lookup:
  83 */
  84static DEFINE_SPINLOCK(table_lock);
  85
  86/*
  87 * Per-CPU lookup locks for fast hash lookup:
  88 */
  89static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock);
  90
  91/*
  92 * Mutex to serialize state changes with show-stats activities:
  93 */
  94static DEFINE_MUTEX(show_mutex);
  95
  96/*
  97 * Collection status, active/inactive:
  98 */
  99int __read_mostly timer_stats_active;
 100
 101/*
 102 * Beginning/end timestamps of measurement:
 103 */
 104static ktime_t time_start, time_stop;
 105
 106/*
 107 * tstat entry structs only get allocated while collection is
 108 * active and never freed during that time - this simplifies
 109 * things quite a bit.
 110 *
 111 * They get freed when a new collection period is started.
 112 */
 113#define MAX_ENTRIES_BITS        10
 114#define MAX_ENTRIES             (1UL << MAX_ENTRIES_BITS)
 115
 116static unsigned long nr_entries;
 117static struct entry entries[MAX_ENTRIES];
 118
 119static atomic_t overflow_count;
 120
 121/*
 122 * The entries are in a hash-table, for fast lookup:
 123 */
 124#define TSTAT_HASH_BITS         (MAX_ENTRIES_BITS - 1)
 125#define TSTAT_HASH_SIZE         (1UL << TSTAT_HASH_BITS)
 126#define TSTAT_HASH_MASK         (TSTAT_HASH_SIZE - 1)
 127
 128#define __tstat_hashfn(entry)                                           \
 129        (((unsigned long)(entry)->timer       ^                         \
 130          (unsigned long)(entry)->start_func  ^                         \
 131          (unsigned long)(entry)->expire_func ^                         \
 132          (unsigned long)(entry)->pid           ) & TSTAT_HASH_MASK)
 133
 134#define tstat_hashentry(entry)  (tstat_hash_table + __tstat_hashfn(entry))
 135
 136static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
 137
 138static void reset_entries(void)
 139{
 140        nr_entries = 0;
 141        memset(entries, 0, sizeof(entries));
 142        memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
 143        atomic_set(&overflow_count, 0);
 144}
 145
 146static struct entry *alloc_entry(void)
 147{
 148        if (nr_entries >= MAX_ENTRIES)
 149                return NULL;
 150
 151        return entries + nr_entries++;
 152}
 153
 154static int match_entries(struct entry *entry1, struct entry *entry2)
 155{
 156        return entry1->timer       == entry2->timer       &&
 157               entry1->start_func  == entry2->start_func  &&
 158               entry1->expire_func == entry2->expire_func &&
 159               entry1->pid         == entry2->pid;
 160}
 161
 162/*
 163 * Look up whether an entry matching this item is present
 164 * in the hash already. Must be called with irqs off and the
 165 * lookup lock held:
 166 */
 167static struct entry *tstat_lookup(struct entry *entry, char *comm)
 168{
 169        struct entry **head, *curr, *prev;
 170
 171        head = tstat_hashentry(entry);
 172        curr = *head;
 173
 174        /*
 175         * The fastpath is when the entry is already hashed,
 176         * we do this with the lookup lock held, but with the
 177         * table lock not held:
 178         */
 179        while (curr) {
 180                if (match_entries(curr, entry))
 181                        return curr;
 182
 183                curr = curr->next;
 184        }
 185        /*
 186         * Slowpath: allocate, set up and link a new hash entry:
 187         */
 188        prev = NULL;
 189        curr = *head;
 190
 191        spin_lock(&table_lock);
 192        /*
 193         * Make sure we have not raced with another CPU:
 194         */
 195        while (curr) {
 196                if (match_entries(curr, entry))
 197                        goto out_unlock;
 198
 199                prev = curr;
 200                curr = curr->next;
 201        }
 202
 203        curr = alloc_entry();
 204        if (curr) {
 205                *curr = *entry;
 206                curr->count = 0;
 207                curr->next = NULL;
 208                memcpy(curr->comm, comm, TASK_COMM_LEN);
 209
 210                smp_mb(); /* Ensure that curr is initialized before insert */
 211
 212                if (prev)
 213                        prev->next = curr;
 214                else
 215                        *head = curr;
 216        }
 217 out_unlock:
 218        spin_unlock(&table_lock);
 219
 220        return curr;
 221}
 222
 223/**
 224 * timer_stats_update_stats - Update the statistics for a timer.
 225 * @timer:      pointer to either a timer_list or a hrtimer
 226 * @pid:        the pid of the task which set up the timer
 227 * @startf:     pointer to the function which did the timer setup
 228 * @timerf:     pointer to the timer callback function of the timer
 229 * @comm:       name of the process which set up the timer
 230 *
 231 * When the timer is already registered, then the event counter is
 232 * incremented. Otherwise the timer is registered in a free slot.
 233 */
 234void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 235                              void *timerf, char *comm,
 236                              unsigned int timer_flag)
 237{
 238        /*
 239         * It doesn't matter which lock we take:
 240         */
 241        raw_spinlock_t *lock;
 242        struct entry *entry, input;
 243        unsigned long flags;
 244
 245        if (likely(!timer_stats_active))
 246                return;
 247
 248        lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id());
 249
 250        input.timer = timer;
 251        input.start_func = startf;
 252        input.expire_func = timerf;
 253        input.pid = pid;
 254        input.timer_flag = timer_flag;
 255
 256        raw_spin_lock_irqsave(lock, flags);
 257        if (!timer_stats_active)
 258                goto out_unlock;
 259
 260        entry = tstat_lookup(&input, comm);
 261        if (likely(entry))
 262                entry->count++;
 263        else
 264                atomic_inc(&overflow_count);
 265
 266 out_unlock:
 267        raw_spin_unlock_irqrestore(lock, flags);
 268}
 269
 270static void print_name_offset(struct seq_file *m, unsigned long addr)
 271{
 272        char symname[KSYM_NAME_LEN];
 273
 274        if (lookup_symbol_name(addr, symname) < 0)
 275                seq_printf(m, "<%p>", (void *)addr);
 276        else
 277                seq_printf(m, "%s", symname);
 278}
 279
 280static int tstats_show(struct seq_file *m, void *v)
 281{
 282        struct timespec period;
 283        struct entry *entry;
 284        unsigned long ms;
 285        long events = 0;
 286        ktime_t time;
 287        int i;
 288
 289        mutex_lock(&show_mutex);
 290        /*
 291         * If still active then calculate up to now:
 292         */
 293        if (timer_stats_active)
 294                time_stop = ktime_get();
 295
 296        time = ktime_sub(time_stop, time_start);
 297
 298        period = ktime_to_timespec(time);
 299        ms = period.tv_nsec / 1000000;
 300
 301        seq_puts(m, "Timer Stats Version: v0.2\n");
 302        seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
 303        if (atomic_read(&overflow_count))
 304                seq_printf(m, "Overflow: %d entries\n",
 305                        atomic_read(&overflow_count));
 306
 307        for (i = 0; i < nr_entries; i++) {
 308                entry = entries + i;
 309                if (entry->timer_flag & TIMER_STATS_FLAG_DEFERRABLE) {
 310                        seq_printf(m, "%4luD, %5d %-16s ",
 311                                entry->count, entry->pid, entry->comm);
 312                } else {
 313                        seq_printf(m, " %4lu, %5d %-16s ",
 314                                entry->count, entry->pid, entry->comm);
 315                }
 316
 317                print_name_offset(m, (unsigned long)entry->start_func);
 318                seq_puts(m, " (");
 319                print_name_offset(m, (unsigned long)entry->expire_func);
 320                seq_puts(m, ")\n");
 321
 322                events += entry->count;
 323        }
 324
 325        ms += period.tv_sec * 1000;
 326        if (!ms)
 327                ms = 1;
 328
 329        if (events && period.tv_sec)
 330                seq_printf(m, "%ld total events, %ld.%03ld events/sec\n",
 331                           events, events * 1000 / ms,
 332                           (events * 1000000 / ms) % 1000);
 333        else
 334                seq_printf(m, "%ld total events\n", events);
 335
 336        mutex_unlock(&show_mutex);
 337
 338        return 0;
 339}
 340
 341/*
 342 * After a state change, make sure all concurrent lookup/update
 343 * activities have stopped:
 344 */
 345static void sync_access(void)
 346{
 347        unsigned long flags;
 348        int cpu;
 349
 350        for_each_online_cpu(cpu) {
 351                raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu);
 352
 353                raw_spin_lock_irqsave(lock, flags);
 354                /* nothing */
 355                raw_spin_unlock_irqrestore(lock, flags);
 356        }
 357}
 358
 359static ssize_t tstats_write(struct file *file, const char __user *buf,
 360                            size_t count, loff_t *offs)
 361{
 362        char ctl[2];
 363
 364        if (count != 2 || *offs)
 365                return -EINVAL;
 366
 367        if (copy_from_user(ctl, buf, count))
 368                return -EFAULT;
 369
 370        mutex_lock(&show_mutex);
 371        switch (ctl[0]) {
 372        case '0':
 373                if (timer_stats_active) {
 374                        timer_stats_active = 0;
 375                        time_stop = ktime_get();
 376                        sync_access();
 377                }
 378                break;
 379        case '1':
 380                if (!timer_stats_active) {
 381                        reset_entries();
 382                        time_start = ktime_get();
 383                        smp_mb();
 384                        timer_stats_active = 1;
 385                }
 386                break;
 387        default:
 388                count = -EINVAL;
 389        }
 390        mutex_unlock(&show_mutex);
 391
 392        return count;
 393}
 394
 395static int tstats_open(struct inode *inode, struct file *filp)
 396{
 397        return single_open(filp, tstats_show, NULL);
 398}
 399
 400static const struct file_operations tstats_fops = {
 401        .open           = tstats_open,
 402        .read           = seq_read,
 403        .write          = tstats_write,
 404        .llseek         = seq_lseek,
 405        .release        = single_release,
 406};
 407
 408void __init init_timer_stats(void)
 409{
 410        int cpu;
 411
 412        for_each_possible_cpu(cpu)
 413                raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu));
 414}
 415
 416static int __init init_tstats_procfs(void)
 417{
 418        struct proc_dir_entry *pe;
 419
 420        pe = proc_create("timer_stats", 0644, NULL, &tstats_fops);
 421        if (!pe)
 422                return -ENOMEM;
 423        return 0;
 424}
 425__initcall(init_tstats_procfs);
 426