linux/kernel/tracepoint.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2008 Mathieu Desnoyers
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License as published by
   6 * the Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17 */
  18#include <linux/module.h>
  19#include <linux/mutex.h>
  20#include <linux/types.h>
  21#include <linux/jhash.h>
  22#include <linux/list.h>
  23#include <linux/rcupdate.h>
  24#include <linux/tracepoint.h>
  25#include <linux/err.h>
  26#include <linux/slab.h>
  27#include <linux/sched.h>
  28#include <linux/jump_label.h>
  29
  30extern struct tracepoint * const __start___tracepoints_ptrs[];
  31extern struct tracepoint * const __stop___tracepoints_ptrs[];
  32
  33/* Set to 1 to enable tracepoint debug output */
  34static const int tracepoint_debug;
  35
  36/*
  37 * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the
  38 * builtin and module tracepoints and the hash table.
  39 */
  40static DEFINE_MUTEX(tracepoints_mutex);
  41
  42/*
  43 * Tracepoint hash table, containing the active tracepoints.
  44 * Protected by tracepoints_mutex.
  45 */
  46#define TRACEPOINT_HASH_BITS 6
  47#define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS)
  48static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
  49
  50/*
  51 * Note about RCU :
  52 * It is used to delay the free of multiple probes array until a quiescent
  53 * state is reached.
  54 * Tracepoint entries modifications are protected by the tracepoints_mutex.
  55 */
  56struct tracepoint_entry {
  57        struct hlist_node hlist;
  58        struct tracepoint_func *funcs;
  59        int refcount;   /* Number of times armed. 0 if disarmed. */
  60        char name[0];
  61};
  62
  63struct tp_probes {
  64        union {
  65                struct rcu_head rcu;
  66                struct list_head list;
  67        } u;
  68        struct tracepoint_func probes[0];
  69};
  70
  71static inline void *allocate_probes(int count)
  72{
  73        struct tp_probes *p  = kmalloc(count * sizeof(struct tracepoint_func)
  74                        + sizeof(struct tp_probes), GFP_KERNEL);
  75        return p == NULL ? NULL : p->probes;
  76}
  77
  78static void rcu_free_old_probes(struct rcu_head *head)
  79{
  80        kfree(container_of(head, struct tp_probes, u.rcu));
  81}
  82
  83static inline void release_probes(struct tracepoint_func *old)
  84{
  85        if (old) {
  86                struct tp_probes *tp_probes = container_of(old,
  87                        struct tp_probes, probes[0]);
  88                call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
  89        }
  90}
  91
  92static void debug_print_probes(struct tracepoint_entry *entry)
  93{
  94        int i;
  95
  96        if (!tracepoint_debug || !entry->funcs)
  97                return;
  98
  99        for (i = 0; entry->funcs[i].func; i++)
 100                printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i].func);
 101}
 102
 103static struct tracepoint_func *
 104tracepoint_entry_add_probe(struct tracepoint_entry *entry,
 105                           void *probe, void *data)
 106{
 107        int nr_probes = 0;
 108        struct tracepoint_func *old, *new;
 109
 110        WARN_ON(!probe);
 111
 112        debug_print_probes(entry);
 113        old = entry->funcs;
 114        if (old) {
 115                /* (N -> N+1), (N != 0, 1) probes */
 116                for (nr_probes = 0; old[nr_probes].func; nr_probes++)
 117                        if (old[nr_probes].func == probe &&
 118                            old[nr_probes].data == data)
 119                                return ERR_PTR(-EEXIST);
 120        }
 121        /* + 2 : one for new probe, one for NULL func */
 122        new = allocate_probes(nr_probes + 2);
 123        if (new == NULL)
 124                return ERR_PTR(-ENOMEM);
 125        if (old)
 126                memcpy(new, old, nr_probes * sizeof(struct tracepoint_func));
 127        new[nr_probes].func = probe;
 128        new[nr_probes].data = data;
 129        new[nr_probes + 1].func = NULL;
 130        entry->refcount = nr_probes + 1;
 131        entry->funcs = new;
 132        debug_print_probes(entry);
 133        return old;
 134}
 135
 136static void *
 137tracepoint_entry_remove_probe(struct tracepoint_entry *entry,
 138                              void *probe, void *data)
 139{
 140        int nr_probes = 0, nr_del = 0, i;
 141        struct tracepoint_func *old, *new;
 142
 143        old = entry->funcs;
 144
 145        if (!old)
 146                return ERR_PTR(-ENOENT);
 147
 148        debug_print_probes(entry);
 149        /* (N -> M), (N > 1, M >= 0) probes */
 150        for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
 151                if (!probe ||
 152                    (old[nr_probes].func == probe &&
 153                     old[nr_probes].data == data))
 154                        nr_del++;
 155        }
 156
 157        if (nr_probes - nr_del == 0) {
 158                /* N -> 0, (N > 1) */
 159                entry->funcs = NULL;
 160                entry->refcount = 0;
 161                debug_print_probes(entry);
 162                return old;
 163        } else {
 164                int j = 0;
 165                /* N -> M, (N > 1, M > 0) */
 166                /* + 1 for NULL */
 167                new = allocate_probes(nr_probes - nr_del + 1);
 168                if (new == NULL)
 169                        return ERR_PTR(-ENOMEM);
 170                for (i = 0; old[i].func; i++)
 171                        if (probe &&
 172                            (old[i].func != probe || old[i].data != data))
 173                                new[j++] = old[i];
 174                new[nr_probes - nr_del].func = NULL;
 175                entry->refcount = nr_probes - nr_del;
 176                entry->funcs = new;
 177        }
 178        debug_print_probes(entry);
 179        return old;
 180}
 181
 182/*
 183 * Get tracepoint if the tracepoint is present in the tracepoint hash table.
 184 * Must be called with tracepoints_mutex held.
 185 * Returns NULL if not present.
 186 */
 187static struct tracepoint_entry *get_tracepoint(const char *name)
 188{
 189        struct hlist_head *head;
 190        struct hlist_node *node;
 191        struct tracepoint_entry *e;
 192        u32 hash = jhash(name, strlen(name), 0);
 193
 194        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 195        hlist_for_each_entry(e, node, head, hlist) {
 196                if (!strcmp(name, e->name))
 197                        return e;
 198        }
 199        return NULL;
 200}
 201
 202/*
 203 * Add the tracepoint to the tracepoint hash table. Must be called with
 204 * tracepoints_mutex held.
 205 */
 206static struct tracepoint_entry *add_tracepoint(const char *name)
 207{
 208        struct hlist_head *head;
 209        struct hlist_node *node;
 210        struct tracepoint_entry *e;
 211        size_t name_len = strlen(name) + 1;
 212        u32 hash = jhash(name, name_len-1, 0);
 213
 214        head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)];
 215        hlist_for_each_entry(e, node, head, hlist) {
 216                if (!strcmp(name, e->name)) {
 217                        printk(KERN_NOTICE
 218                                "tracepoint %s busy\n", name);
 219                        return ERR_PTR(-EEXIST);        /* Already there */
 220                }
 221        }
 222        /*
 223         * Using kmalloc here to allocate a variable length element. Could
 224         * cause some memory fragmentation if overused.
 225         */
 226        e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL);
 227        if (!e)
 228                return ERR_PTR(-ENOMEM);
 229        memcpy(&e->name[0], name, name_len);
 230        e->funcs = NULL;
 231        e->refcount = 0;
 232        hlist_add_head(&e->hlist, head);
 233        return e;
 234}
 235
 236/*
 237 * Remove the tracepoint from the tracepoint hash table. Must be called with
 238 * mutex_lock held.
 239 */
 240static inline void remove_tracepoint(struct tracepoint_entry *e)
 241{
 242        hlist_del(&e->hlist);
 243        kfree(e);
 244}
 245
 246/*
 247 * Sets the probe callback corresponding to one tracepoint.
 248 */
 249static void set_tracepoint(struct tracepoint_entry **entry,
 250        struct tracepoint *elem, int active)
 251{
 252        WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 253
 254        if (elem->regfunc && !elem->state && active)
 255                elem->regfunc();
 256        else if (elem->unregfunc && elem->state && !active)
 257                elem->unregfunc();
 258
 259        /*
 260         * rcu_assign_pointer has a smp_wmb() which makes sure that the new
 261         * probe callbacks array is consistent before setting a pointer to it.
 262         * This array is referenced by __DO_TRACE from
 263         * include/linux/tracepoints.h. A matching smp_read_barrier_depends()
 264         * is used.
 265         */
 266        rcu_assign_pointer(elem->funcs, (*entry)->funcs);
 267        if (!elem->state && active) {
 268                jump_label_enable(&elem->state);
 269                elem->state = active;
 270        } else if (elem->state && !active) {
 271                jump_label_disable(&elem->state);
 272                elem->state = active;
 273        }
 274}
 275
 276/*
 277 * Disable a tracepoint and its probe callback.
 278 * Note: only waiting an RCU period after setting elem->call to the empty
 279 * function insures that the original callback is not used anymore. This insured
 280 * by preempt_disable around the call site.
 281 */
 282static void disable_tracepoint(struct tracepoint *elem)
 283{
 284        if (elem->unregfunc && elem->state)
 285                elem->unregfunc();
 286
 287        if (elem->state) {
 288                jump_label_disable(&elem->state);
 289                elem->state = 0;
 290        }
 291        rcu_assign_pointer(elem->funcs, NULL);
 292}
 293
 294/**
 295 * tracepoint_update_probe_range - Update a probe range
 296 * @begin: beginning of the range
 297 * @end: end of the range
 298 *
 299 * Updates the probe callback corresponding to a range of tracepoints.
 300 */
 301void tracepoint_update_probe_range(struct tracepoint * const *begin,
 302                                   struct tracepoint * const *end)
 303{
 304        struct tracepoint * const *iter;
 305        struct tracepoint_entry *mark_entry;
 306
 307        if (!begin)
 308                return;
 309
 310        mutex_lock(&tracepoints_mutex);
 311        for (iter = begin; iter < end; iter++) {
 312                mark_entry = get_tracepoint((*iter)->name);
 313                if (mark_entry) {
 314                        set_tracepoint(&mark_entry, *iter,
 315                                        !!mark_entry->refcount);
 316                } else {
 317                        disable_tracepoint(*iter);
 318                }
 319        }
 320        mutex_unlock(&tracepoints_mutex);
 321}
 322
 323/*
 324 * Update probes, removing the faulty probes.
 325 */
 326static void tracepoint_update_probes(void)
 327{
 328        /* Core kernel tracepoints */
 329        tracepoint_update_probe_range(__start___tracepoints_ptrs,
 330                __stop___tracepoints_ptrs);
 331        /* tracepoints in modules. */
 332        module_update_tracepoints();
 333}
 334
 335static struct tracepoint_func *
 336tracepoint_add_probe(const char *name, void *probe, void *data)
 337{
 338        struct tracepoint_entry *entry;
 339        struct tracepoint_func *old;
 340
 341        entry = get_tracepoint(name);
 342        if (!entry) {
 343                entry = add_tracepoint(name);
 344                if (IS_ERR(entry))
 345                        return (struct tracepoint_func *)entry;
 346        }
 347        old = tracepoint_entry_add_probe(entry, probe, data);
 348        if (IS_ERR(old) && !entry->refcount)
 349                remove_tracepoint(entry);
 350        return old;
 351}
 352
 353/**
 354 * tracepoint_probe_register -  Connect a probe to a tracepoint
 355 * @name: tracepoint name
 356 * @probe: probe handler
 357 *
 358 * Returns 0 if ok, error value on error.
 359 * The probe address must at least be aligned on the architecture pointer size.
 360 */
 361int tracepoint_probe_register(const char *name, void *probe, void *data)
 362{
 363        struct tracepoint_func *old;
 364
 365        mutex_lock(&tracepoints_mutex);
 366        old = tracepoint_add_probe(name, probe, data);
 367        mutex_unlock(&tracepoints_mutex);
 368        if (IS_ERR(old))
 369                return PTR_ERR(old);
 370
 371        tracepoint_update_probes();             /* may update entry */
 372        release_probes(old);
 373        return 0;
 374}
 375EXPORT_SYMBOL_GPL(tracepoint_probe_register);
 376
 377static struct tracepoint_func *
 378tracepoint_remove_probe(const char *name, void *probe, void *data)
 379{
 380        struct tracepoint_entry *entry;
 381        struct tracepoint_func *old;
 382
 383        entry = get_tracepoint(name);
 384        if (!entry)
 385                return ERR_PTR(-ENOENT);
 386        old = tracepoint_entry_remove_probe(entry, probe, data);
 387        if (IS_ERR(old))
 388                return old;
 389        if (!entry->refcount)
 390                remove_tracepoint(entry);
 391        return old;
 392}
 393
 394/**
 395 * tracepoint_probe_unregister -  Disconnect a probe from a tracepoint
 396 * @name: tracepoint name
 397 * @probe: probe function pointer
 398 *
 399 * We do not need to call a synchronize_sched to make sure the probes have
 400 * finished running before doing a module unload, because the module unload
 401 * itself uses stop_machine(), which insures that every preempt disabled section
 402 * have finished.
 403 */
 404int tracepoint_probe_unregister(const char *name, void *probe, void *data)
 405{
 406        struct tracepoint_func *old;
 407
 408        mutex_lock(&tracepoints_mutex);
 409        old = tracepoint_remove_probe(name, probe, data);
 410        mutex_unlock(&tracepoints_mutex);
 411        if (IS_ERR(old))
 412                return PTR_ERR(old);
 413
 414        tracepoint_update_probes();             /* may update entry */
 415        release_probes(old);
 416        return 0;
 417}
 418EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 419
 420static LIST_HEAD(old_probes);
 421static int need_update;
 422
 423static void tracepoint_add_old_probes(void *old)
 424{
 425        need_update = 1;
 426        if (old) {
 427                struct tp_probes *tp_probes = container_of(old,
 428                        struct tp_probes, probes[0]);
 429                list_add(&tp_probes->u.list, &old_probes);
 430        }
 431}
 432
 433/**
 434 * tracepoint_probe_register_noupdate -  register a probe but not connect
 435 * @name: tracepoint name
 436 * @probe: probe handler
 437 *
 438 * caller must call tracepoint_probe_update_all()
 439 */
 440int tracepoint_probe_register_noupdate(const char *name, void *probe,
 441                                       void *data)
 442{
 443        struct tracepoint_func *old;
 444
 445        mutex_lock(&tracepoints_mutex);
 446        old = tracepoint_add_probe(name, probe, data);
 447        if (IS_ERR(old)) {
 448                mutex_unlock(&tracepoints_mutex);
 449                return PTR_ERR(old);
 450        }
 451        tracepoint_add_old_probes(old);
 452        mutex_unlock(&tracepoints_mutex);
 453        return 0;
 454}
 455EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
 456
 457/**
 458 * tracepoint_probe_unregister_noupdate -  remove a probe but not disconnect
 459 * @name: tracepoint name
 460 * @probe: probe function pointer
 461 *
 462 * caller must call tracepoint_probe_update_all()
 463 */
 464int tracepoint_probe_unregister_noupdate(const char *name, void *probe,
 465                                         void *data)
 466{
 467        struct tracepoint_func *old;
 468
 469        mutex_lock(&tracepoints_mutex);
 470        old = tracepoint_remove_probe(name, probe, data);
 471        if (IS_ERR(old)) {
 472                mutex_unlock(&tracepoints_mutex);
 473                return PTR_ERR(old);
 474        }
 475        tracepoint_add_old_probes(old);
 476        mutex_unlock(&tracepoints_mutex);
 477        return 0;
 478}
 479EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate);
 480
 481/**
 482 * tracepoint_probe_update_all -  update tracepoints
 483 */
 484void tracepoint_probe_update_all(void)
 485{
 486        LIST_HEAD(release_probes);
 487        struct tp_probes *pos, *next;
 488
 489        mutex_lock(&tracepoints_mutex);
 490        if (!need_update) {
 491                mutex_unlock(&tracepoints_mutex);
 492                return;
 493        }
 494        if (!list_empty(&old_probes))
 495                list_replace_init(&old_probes, &release_probes);
 496        need_update = 0;
 497        mutex_unlock(&tracepoints_mutex);
 498
 499        tracepoint_update_probes();
 500        list_for_each_entry_safe(pos, next, &release_probes, u.list) {
 501                list_del(&pos->u.list);
 502                call_rcu_sched(&pos->u.rcu, rcu_free_old_probes);
 503        }
 504}
 505EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
 506
 507/**
 508 * tracepoint_get_iter_range - Get a next tracepoint iterator given a range.
 509 * @tracepoint: current tracepoints (in), next tracepoint (out)
 510 * @begin: beginning of the range
 511 * @end: end of the range
 512 *
 513 * Returns whether a next tracepoint has been found (1) or not (0).
 514 * Will return the first tracepoint in the range if the input tracepoint is
 515 * NULL.
 516 */
 517int tracepoint_get_iter_range(struct tracepoint * const **tracepoint,
 518        struct tracepoint * const *begin, struct tracepoint * const *end)
 519{
 520        if (!*tracepoint && begin != end) {
 521                *tracepoint = begin;
 522                return 1;
 523        }
 524        if (*tracepoint >= begin && *tracepoint < end)
 525                return 1;
 526        return 0;
 527}
 528EXPORT_SYMBOL_GPL(tracepoint_get_iter_range);
 529
 530static void tracepoint_get_iter(struct tracepoint_iter *iter)
 531{
 532        int found = 0;
 533
 534        /* Core kernel tracepoints */
 535        if (!iter->module) {
 536                found = tracepoint_get_iter_range(&iter->tracepoint,
 537                                __start___tracepoints_ptrs,
 538                                __stop___tracepoints_ptrs);
 539                if (found)
 540                        goto end;
 541        }
 542        /* tracepoints in modules. */
 543        found = module_get_iter_tracepoints(iter);
 544end:
 545        if (!found)
 546                tracepoint_iter_reset(iter);
 547}
 548
 549void tracepoint_iter_start(struct tracepoint_iter *iter)
 550{
 551        tracepoint_get_iter(iter);
 552}
 553EXPORT_SYMBOL_GPL(tracepoint_iter_start);
 554
 555void tracepoint_iter_next(struct tracepoint_iter *iter)
 556{
 557        iter->tracepoint++;
 558        /*
 559         * iter->tracepoint may be invalid because we blindly incremented it.
 560         * Make sure it is valid by marshalling on the tracepoints, getting the
 561         * tracepoints from following modules if necessary.
 562         */
 563        tracepoint_get_iter(iter);
 564}
 565EXPORT_SYMBOL_GPL(tracepoint_iter_next);
 566
 567void tracepoint_iter_stop(struct tracepoint_iter *iter)
 568{
 569}
 570EXPORT_SYMBOL_GPL(tracepoint_iter_stop);
 571
 572void tracepoint_iter_reset(struct tracepoint_iter *iter)
 573{
 574        iter->module = NULL;
 575        iter->tracepoint = NULL;
 576}
 577EXPORT_SYMBOL_GPL(tracepoint_iter_reset);
 578
 579#ifdef CONFIG_MODULES
 580
 581int tracepoint_module_notify(struct notifier_block *self,
 582                             unsigned long val, void *data)
 583{
 584        struct module *mod = data;
 585
 586        switch (val) {
 587        case MODULE_STATE_COMING:
 588        case MODULE_STATE_GOING:
 589                tracepoint_update_probe_range(mod->tracepoints_ptrs,
 590                        mod->tracepoints_ptrs + mod->num_tracepoints);
 591                break;
 592        }
 593        return 0;
 594}
 595
 596struct notifier_block tracepoint_module_nb = {
 597        .notifier_call = tracepoint_module_notify,
 598        .priority = 0,
 599};
 600
 601static int init_tracepoints(void)
 602{
 603        return register_module_notifier(&tracepoint_module_nb);
 604}
 605__initcall(init_tracepoints);
 606
 607#endif /* CONFIG_MODULES */
 608
 609#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 610
 611/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
 612static int sys_tracepoint_refcount;
 613
 614void syscall_regfunc(void)
 615{
 616        unsigned long flags;
 617        struct task_struct *g, *t;
 618
 619        if (!sys_tracepoint_refcount) {
 620                read_lock_irqsave(&tasklist_lock, flags);
 621                do_each_thread(g, t) {
 622                        /* Skip kernel threads. */
 623                        if (t->mm)
 624                                set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
 625                } while_each_thread(g, t);
 626                read_unlock_irqrestore(&tasklist_lock, flags);
 627        }
 628        sys_tracepoint_refcount++;
 629}
 630
 631void syscall_unregfunc(void)
 632{
 633        unsigned long flags;
 634        struct task_struct *g, *t;
 635
 636        sys_tracepoint_refcount--;
 637        if (!sys_tracepoint_refcount) {
 638                read_lock_irqsave(&tasklist_lock, flags);
 639                do_each_thread(g, t) {
 640                        clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
 641                } while_each_thread(g, t);
 642                read_unlock_irqrestore(&tasklist_lock, flags);
 643        }
 644}
 645#endif
 646