linux/kernel/tracepoint.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2008 Mathieu Desnoyers
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 */
  18#include <linux/module.h>
  19#include <linux/mutex.h>
  20#include <linux/types.h>
  21#include <linux/jhash.h>
  22#include <linux/list.h>
  23#include <linux/rcupdate.h>
  24#include <linux/tracepoint.h>
  25#include <linux/err.h>
  26#include <linux/slab.h>
  27#include <linux/sched.h>
  28
  29extern struct tracepoint __start___tracepoints[];
  30extern struct tracepoint __stop___tracepoints[];
  31
  32/* Set to 1 to enable tracepoint debug output */
  33static const int tracepoint_debug;
  34
  35/*
  36 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
  37 * builtin and module tracepoints and the hash table.
  38 */
  39static DEFINE_MUTEX(tracepoints_mutex);
  40
  41/*
  42 * Tracepoint hash table, containing the active tracepoints.
  43 * Protected by tracepoints_mutex.
  44 */
  45#define TRACEPOINT_HASH_BITS 6
  46#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
  47static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
  48
  49/*
  50 * Note about RCU :
  51 * It is used to delay the free of multiple probes array until a quiescent
  52 * state is reached.
  53 * Tracepoint entries modifications are protected by the tracepoints_mutex.
  54 */
  55struct tracepoint_entry {
  56        struct hlist_node hlist;
  57        void **funcs;
  58        int refcount;   /* Number of times armed. 0 if disarmed. */
  59        char name[0];
  60};
  61
  62struct tp_probes {
  63        union {
  64                struct rcu_head rcu;
  65                struct list_head list;
  66        } u;
  67        void *probes[0];
  68};
  69
  70static inline void *allocate_probes(int count)
  71{
  72        struct tp_probes *p  = kmalloc(count * sizeof(void *)
  73                        + sizeof(struct tp_probes), GFP_KERNEL);
  74        return p == NULL ? NULL : p->probes;
  75}
  76
  77static void rcu_free_old_probes(struct rcu_head *head)
  78{
  79        kfree(container_of(head, struct tp_probes, u.rcu));
  80}
  81
  82static inline void release_probes(void *old)
  83{
  84        if (old) {
  85                struct tp_probes *tp_probes = container_of(old,
  86                        struct tp_probes, probes[0]);
  87                call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
  88        }
  89}
  90
  91static void debug_print_probes(struct tracepoint_entry *entry)
  92{
  93        int i;
  94
  95        if (!tracepoint_debug || !entry->funcs)
  96                return;
  97
  98        for (i = 0; entry->funcs[i]; i++)
  99                printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]);
 100}
 101
 102static void *
 103tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe)
 104{
 105        int nr_probes = 0;
 106        void **old, **new;
 107
 108        WARN_ON(!probe);
 109
 110        debug_print_probes(entry);
 111        old = entry->funcs;
 112        if (old) {
 113                /* (N -> N+1), (N != 0, 1) probes */
 114                for (nr_probes = 0; old[nr_probes]; nr_probes++)
 115                        if (old[nr_probes] == probe)
 116                                return ERR_PTR(-EEXIST);
 117        }
 118        /* + 2 : one for new probe, one for NULL func */
 119        new = allocate_probes(nr_probes + 2);
 120        if (new == NULL)
 121                return ERR_PTR(-ENOMEM);
 122        if (old)
 123                memcpy(new, old, nr_probes * sizeof(void *));
 124        new[nr_probes] = probe;
 125        new[nr_probes + 1] = NULL;
 126        entry->refcount = nr_probes + 1;
 127        entry->funcs = new;
 128        debug_print_probes(entry);
 129        return old;
 130}
 131
 132static void *
 133tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
 134{
 135        int nr_probes = 0, nr_del = 0, i;
 136        void **old, **new;
 137
 138        old = entry->funcs;
 139
 140        if (!old)
 141                return ERR_PTR(-ENOENT);
 142
 143        debug_print_probes(entry);
 144        /* (N -> M), (N > 1, M >= 0) probes */
 145        for (nr_probes = 0; old[nr_probes]; nr_probes++) {
 146                if ((!probe || old[nr_probes] == probe))
 147                        nr_del++;
 148        }
 149
 150        if (nr_probes - nr_del == 0) {
 151                /* N -> 0, (N > 1) */
 152                entry->funcs = NULL;
 153                entry->refcount = 0;
 154                debug_print_probes(entry);
 155                return old;
 156        } else {
 157                int j = 0;
 158                /* N -> M, (N > 1, M > 0) */
 159                /* + 1 for NULL */
 160                new = allocate_probes(nr_probes - nr_del + 1);
 161                if (new == NULL)
 162                        return ERR_PTR(-ENOMEM);
 163                for (i = 0; old[i]; i++)
 164                        if ((probe && old[i] != probe))
 165                                new[j++] = old[i];
 166                new[nr_probes - nr_del] = NULL;
 167                entry->refcount = nr_probes - nr_del;
 168                entry->funcs = new;
 169        }
 170        debug_print_probes(entry);
 171        return old;
 172}
 173
 174/*
 175 * Get tracepoint if the tracepoint is present in the tracepoint hash table.
 176 * Must be called with tracepoints_mutex held.
 177 * Returns NULL if not present.
 178 */
 179static struct tracepoint_entry *get_tracepoint(const char *name)
 180{
 181        struct hlist_head *head;
 182        struct hlist_node *node;
 183        struct tracepoint_entry *e;
 184        u32 hash = jhash(name, strlen(name), 0);
 185
 186        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 187        hlist_for_each_entry(e, node, head, hlist) {
 188                if (!strcmp(name, e->name))
 189                        return e;
 190        }
 191        return NULL;
 192}
 193
 194/*
 195 * Add the tracepoint to the tracepoint hash table. Must be called with
 196 * tracepoints_mutex held.
 197 */
 198static struct tracepoint_entry *add_tracepoint(const char *name)
 199{
 200        struct hlist_head *head;
 201        struct hlist_node *node;
 202        struct tracepoint_entry *e;
 203        size_t name_len = strlen(name) + 1;
 204        u32 hash = jhash(name, name_len-1, 0);
 205
 206        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 207        hlist_for_each_entry(e, node, head, hlist) {
 208                if (!strcmp(name, e->name)) {
 209                        printk(KERN_NOTICE
 210                                "tracepoint %s busy\n", name);
 211                        return ERR_PTR(-EEXIST);        /* Already there */
 212                }
 213        }
 214        /*
 215         * Using kmalloc here to allocate a variable length element. Could
 216         * cause some memory fragmentation if overused.
 217         */
 218        e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
 219        if (!e)
 220                return ERR_PTR(-ENOMEM);
 221        memcpy(&e->name[0], name, name_len);
 222        e->funcs = NULL;
 223        e->refcount = 0;
 224        hlist_add_head(&e->hlist, head);
 225        return e;
 226}
 227
 228/*
 229 * Remove the tracepoint from the tracepoint hash table. Must be called with
 230 * mutex_lock held.
 231 */
 232static inline void remove_tracepoint(struct tracepoint_entry *e)
 233{
 234        hlist_del(&e->hlist);
 235        kfree(e);
 236}
 237
 238/*
 239 * Sets the probe callback corresponding to one tracepoint.
 240 */
 241static void set_tracepoint(struct tracepoint_entry **entry,
 242        struct tracepoint *elem, int active)
 243{
 244        WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 245
 246        if (elem->regfunc && !elem->state && active)
 247                elem->regfunc();
 248        else if (elem->unregfunc && elem->state && !active)
 249                elem->unregfunc();
 250
 251        /*
 252         * rcu_assign_pointer has a smp_wmb() which makes sure that the new
 253         * probe callbacks array is consistent before setting a pointer to it.
 254         * This array is referenced by __DO_TRACE from
 255         * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
 256         * is used.
 257         */
 258        rcu_assign_pointer(elem->funcs, (*entry)->funcs);
 259        elem->state = active;
 260}
 261
 262/*
 263 * Disable a tracepoint and its probe callback.
 264 * Note: only waiting an RCU period after setting elem->call to the empty
 265 * function insures that the original callback is not used anymore. This insured
 266 * by preempt_disable around the call site.
 267 */
 268static void disable_tracepoint(struct tracepoint *elem)
 269{
 270        if (elem->unregfunc && elem->state)
 271                elem->unregfunc();
 272
 273        elem->state = 0;
 274        rcu_assign_pointer(elem->funcs, NULL);
 275}
 276
 277/**
 278 * tracepoint_update_probe_range - Update a probe range
 279 * @begin: beginning of the range
 280 * @end: end of the range
 281 *
 282 * Updates the probe callback corresponding to a range of tracepoints.
 283 */
 284void
 285tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)
 286{
 287        struct tracepoint *iter;
 288        struct tracepoint_entry *mark_entry;
 289
 290        if (!begin)
 291                return;
 292
 293        mutex_lock(&tracepoints_mutex);
 294        for (iter = begin; iter < end; iter++) {
 295                mark_entry = get_tracepoint(iter->name);
 296                if (mark_entry) {
 297                        set_tracepoint(&mark_entry, iter,
 298                                        !!mark_entry->refcount);
 299                } else {
 300                        disable_tracepoint(iter);
 301                }
 302        }
 303        mutex_unlock(&tracepoints_mutex);
 304}
 305
 306/*
 307 * Update probes, removing the faulty probes.
 308 */
 309static void tracepoint_update_probes(void)
 310{
 311        /* Core kernel tracepoints */
 312        tracepoint_update_probe_range(__start___tracepoints,
 313                __stop___tracepoints);
 314        /* tracepoints in modules. */
 315        module_update_tracepoints();
 316}
 317
 318static void *tracepoint_add_probe(const char *name, void *probe)
 319{
 320        struct tracepoint_entry *entry;
 321        void *old;
 322
 323        entry = get_tracepoint(name);
 324        if (!entry) {
 325                entry = add_tracepoint(name);
 326                if (IS_ERR(entry))
 327                        return entry;
 328        }
 329        old = tracepoint_entry_add_probe(entry, probe);
 330        if (IS_ERR(old) && !entry->refcount)
 331                remove_tracepoint(entry);
 332        return old;
 333}
 334
 335/**
 336 * tracepoint_probe_register -  Connect a probe to a tracepoint
 337 * @name: tracepoint name
 338 * @probe: probe handler
 339 *
 340 * Returns 0 if ok, error value on error.
 341 * The probe address must at least be aligned on the architecture pointer size.
 342 */
 343int tracepoint_probe_register(const char *name, void *probe)
 344{
 345        void *old;
 346
 347        mutex_lock(&tracepoints_mutex);
 348        old = tracepoint_add_probe(name, probe);
 349        mutex_unlock(&tracepoints_mutex);
 350        if (IS_ERR(old))
 351                return PTR_ERR(old);
 352
 353        tracepoint_update_probes();             /* may update entry */
 354        release_probes(old);
 355        return 0;
 356}
 357EXPORT_SYMBOL_GPL(tracepoint_probe_register);
 358
 359static void *tracepoint_remove_probe(const char *name, void *probe)
 360{
 361        struct tracepoint_entry *entry;
 362        void *old;
 363
 364        entry = get_tracepoint(name);
 365        if (!entry)
 366                return ERR_PTR(-ENOENT);
 367        old = tracepoint_entry_remove_probe(entry, probe);
 368        if (IS_ERR(old))
 369                return old;
 370        if (!entry->refcount)
 371                remove_tracepoint(entry);
 372        return old;
 373}
 374
 375/**
 376 * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
 377 * @name: tracepoint name
 378 * @probe: probe function pointer
 379 *
 380 * We do not need to call a synchronize_sched to make sure the probes have
 381 * finished running before doing a module unload, because the module unload
 382 * itself uses stop_machine(), which insures that every preempt disabled section
 383 * have finished.
 384 */
 385int tracepoint_probe_unregister(const char *name, void *probe)
 386{
 387        void *old;
 388
 389        mutex_lock(&tracepoints_mutex);
 390        old = tracepoint_remove_probe(name, probe);
 391        mutex_unlock(&tracepoints_mutex);
 392        if (IS_ERR(old))
 393                return PTR_ERR(old);
 394
 395        tracepoint_update_probes();             /* may update entry */
 396        release_probes(old);
 397        return 0;
 398}
 399EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 400
 401static LIST_HEAD(old_probes);
 402static int need_update;
 403
 404static void tracepoint_add_old_probes(void *old)
 405{
 406        need_update = 1;
 407        if (old) {
 408                struct tp_probes *tp_probes = container_of(old,
 409                        struct tp_probes, probes[0]);
 410                list_add(&tp_probes->u.list, &old_probes);
 411        }
 412}
 413
 414/**
 415 * tracepoint_probe_register_noupdate -  register a probe but not connect
 416 * @name: tracepoint name
 417 * @probe: probe handler
 418 *
 419 * caller must call tracepoint_probe_update_all()
 420 */
 421int tracepoint_probe_register_noupdate(const char *name, void *probe)
 422{
 423        void *old;
 424
 425        mutex_lock(&tracepoints_mutex);
 426        old = tracepoint_add_probe(name, probe);
 427        if (IS_ERR(old)) {
 428                mutex_unlock(&tracepoints_mutex);
 429                return PTR_ERR(old);
 430        }
 431        tracepoint_add_old_probes(old);
 432        mutex_unlock(&tracepoints_mutex);
 433        return 0;
 434}
 435EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
 436
 437/**
 438 * tracepoint_probe_unregister_noupdate -  remove a probe but not disconnect
 439 * @name: tracepoint name
 440 * @probe: probe function pointer
 441 *
 442 * caller must call tracepoint_probe_update_all()
 443 */
 444int tracepoint_probe_unregister_noupdate(const char *name, void *probe)
 445{
 446        void *old;
 447
 448        mutex_lock(&tracepoints_mutex);
 449        old = tracepoint_remove_probe(name, probe);
 450        if (IS_ERR(old)) {
 451                mutex_unlock(&tracepoints_mutex);
 452                return PTR_ERR(old);
 453        }
 454        tracepoint_add_old_probes(old);
 455        mutex_unlock(&tracepoints_mutex);
 456        return 0;
 457}
 458EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
 459
 460/**
 461 * tracepoint_probe_update_all -  update tracepoints
 462 */
 463void tracepoint_probe_update_all(void)
 464{
 465        LIST_HEAD(release_probes);
 466        struct tp_probes *pos, *next;
 467
 468        mutex_lock(&tracepoints_mutex);
 469        if (!need_update) {
 470                mutex_unlock(&tracepoints_mutex);
 471                return;
 472        }
 473        if (!list_empty(&old_probes))
 474                list_replace_init(&old_probes, &release_probes);
 475        need_update = 0;
 476        mutex_unlock(&tracepoints_mutex);
 477
 478        tracepoint_update_probes();
 479        list_for_each_entry_safe(pos, next, &release_probes, u.list) {
 480                list_del(&pos->u.list);
 481                call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
 482        }
 483}
 484EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
 485
 486/**
 487 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
 488 * @tracepoint: current tracepoints (in), next tracepoint (out)
 489 * @begin: beginning of the range
 490 * @end: end of the range
 491 *
 492 * Returns whether a next tracepoint has been found (1) or not (0).
 493 * Will return the first tracepoint in the range if the input tracepoint is
 494 * NULL.
 495 */
 496int tracepoint_get_iter_range(struct tracepoint **tracepoint,
 497        struct tracepoint *begin, struct tracepoint *end)
 498{
 499        if (!*tracepoint && begin != end) {
 500                *tracepoint = begin;
 501                return 1;
 502        }
 503        if (*tracepoint >= begin && *tracepoint < end)
 504                return 1;
 505        return 0;
 506}
 507EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
 508
 509static void tracepoint_get_iter(struct tracepoint_iter *iter)
 510{
 511        int found = 0;
 512
 513        /* Core kernel tracepoints */
 514        if (!iter->module) {
 515                found = tracepoint_get_iter_range(&iter->tracepoint,
 516                                __start___tracepoints, __stop___tracepoints);
 517                if (found)
 518                        goto end;
 519        }
 520        /* tracepoints in modules. */
 521        found = module_get_iter_tracepoints(iter);
 522end:
 523        if (!found)
 524                tracepoint_iter_reset(iter);
 525}
 526
 527void tracepoint_iter_start(struct tracepoint_iter *iter)
 528{
 529        tracepoint_get_iter(iter);
 530}
 531EXPORT_SYMBOL_GPL(tracepoint_iter_start);
 532
 533void tracepoint_iter_next(struct tracepoint_iter *iter)
 534{
 535        iter->tracepoint++;
 536        /*
 537         * iter->tracepoint may be invalid because we blindly incremented it.
 538         * Make sure it is valid by marshalling on the tracepoints, getting the
 539         * tracepoints from following modules if necessary.
 540         */
 541        tracepoint_get_iter(iter);
 542}
 543EXPORT_SYMBOL_GPL(tracepoint_iter_next);
 544
 545void tracepoint_iter_stop(struct tracepoint_iter *iter)
 546{
 547}
 548EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
 549
 550void tracepoint_iter_reset(struct tracepoint_iter *iter)
 551{
 552        iter->module = NULL;
 553        iter->tracepoint = NULL;
 554}
 555EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
 556
 557#ifdef CONFIG_MODULES
 558
 559int tracepoint_module_notify(struct notifier_block *self,
 560                             unsigned long val, void *data)
 561{
 562        struct module *mod = data;
 563
 564        switch (val) {
 565        case MODULE_STATE_COMING:
 566        case MODULE_STATE_GOING:
 567                tracepoint_update_probe_range(mod->tracepoints,
 568                        mod->tracepoints + mod->num_tracepoints);
 569                break;
 570        }
 571        return 0;
 572}
 573
 574struct notifier_block tracepoint_module_nb = {
 575        .notifier_call = tracepoint_module_notify,
 576        .priority = 0,
 577};
 578
 579static int init_tracepoints(void)
 580{
 581        return register_module_notifier(&tracepoint_module_nb);
 582}
 583__initcall(init_tracepoints);
 584
 585#endif /* CONFIG_MODULES */
 586
 587#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 588
 589/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
 590static int sys_tracepoint_refcount;
 591
 592void syscall_regfunc(void)
 593{
 594        unsigned long flags;
 595        struct task_struct *g, *t;
 596
 597        if (!sys_tracepoint_refcount) {
 598                read_lock_irqsave(&tasklist_lock, flags);
 599                do_each_thread(g, t) {
 600                        /* Skip kernel threads. */
 601                        if (t->mm)
 602                                set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
 603                } while_each_thread(g, t);
 604                read_unlock_irqrestore(&tasklist_lock, flags);
 605        }
 606        sys_tracepoint_refcount++;
 607}
 608
 609void syscall_unregfunc(void)
 610{
 611        unsigned long flags;
 612        struct task_struct *g, *t;
 613
 614        sys_tracepoint_refcount--;
 615        if (!sys_tracepoint_refcount) {
 616                read_lock_irqsave(&tasklist_lock, flags);
 617                do_each_thread(g, t) {
 618                        clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
 619                } while_each_thread(g, t);
 620                read_unlock_irqrestore(&tasklist_lock, flags);
 621        }
 622}
 623#endif
 624