linux/kernel/events/hw_breakpoint.c
<<
>>
Prefs
   1/*
   2 * This program is free software; you can redistribute it and/or modify
   3 * it under the terms of the GNU General Public License as published by
   4 * the Free Software Foundation; either version 2 of the License, or
   5 * (at your option) any later version.
   6 *
   7 * This program is distributed in the hope that it will be useful,
   8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10 * GNU General Public License for more details.
  11 *
  12 * You should have received a copy of the GNU General Public License
  13 * along with this program; if not, write to the Free Software
  14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  15 *
  16 * Copyright (C) 2007 Alan Stern
  17 * Copyright (C) IBM Corporation, 2009
  18 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
  19 *
  20 * Thanks to Ingo Molnar for his many suggestions.
  21 *
  22 * Authors: Alan Stern <stern@rowland.harvard.edu>
  23 *          K.Prasad <prasad@linux.vnet.ibm.com>
  24 *          Frederic Weisbecker <fweisbec@gmail.com>
  25 */
  26
  27/*
  28 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
  29 * using the CPU's debug registers.
  30 * This file contains the arch-independent routines.
  31 */
  32
  33#include <linux/irqflags.h>
  34#include <linux/kallsyms.h>
  35#include <linux/notifier.h>
  36#include <linux/kprobes.h>
  37#include <linux/kdebug.h>
  38#include <linux/kernel.h>
  39#include <linux/module.h>
  40#include <linux/percpu.h>
  41#include <linux/sched.h>
  42#include <linux/init.h>
  43#include <linux/slab.h>
  44#include <linux/list.h>
  45#include <linux/cpu.h>
  46#include <linux/smp.h>
  47
  48#include <linux/hw_breakpoint.h>
  49
  50
  51/*
  52 * Constraints data
  53 */
  54
  55/* Number of pinned cpu breakpoints in a cpu */
  56static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
  57
  58/* Number of pinned task breakpoints in a cpu */
  59static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]);
  60
  61/* Number of non-pinned cpu/task breakpoints in a cpu */
  62static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
  63
  64static int nr_slots[TYPE_MAX];
  65
  66/* Keep track of the breakpoints attached to tasks */
  67static LIST_HEAD(bp_task_head);
  68
  69static int constraints_initialized;
  70
  71/* Gather the number of total pinned and un-pinned bp in a cpuset */
  72struct bp_busy_slots {
  73        unsigned int pinned;
  74        unsigned int flexible;
  75};
  76
  77/* Serialize accesses to the above constraints */
  78static DEFINE_MUTEX(nr_bp_mutex);
  79
  80__weak int hw_breakpoint_weight(struct perf_event *bp)
  81{
  82        return 1;
  83}
  84
  85static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
  86{
  87        if (bp->attr.bp_type & HW_BREAKPOINT_RW)
  88                return TYPE_DATA;
  89
  90        return TYPE_INST;
  91}
  92
  93/*
  94 * Report the maximum number of pinned breakpoints a task
  95 * have in this cpu
  96 */
  97static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
  98{
  99        int i;
 100        unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
 101
 102        for (i = nr_slots[type] - 1; i >= 0; i--) {
 103                if (tsk_pinned[i] > 0)
 104                        return i + 1;
 105        }
 106
 107        return 0;
 108}
 109
 110/*
 111 * Count the number of breakpoints of the same type and same task.
 112 * The given event must be not on the list.
 113 */
 114static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 115{
 116        struct task_struct *tsk = bp->hw.bp_target;
 117        struct perf_event *iter;
 118        int count = 0;
 119
 120        list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
 121                if (iter->hw.bp_target == tsk &&
 122                    find_slot_idx(iter) == type &&
 123                    (iter->cpu < 0 || cpu == iter->cpu))
 124                        count += hw_breakpoint_weight(iter);
 125        }
 126
 127        return count;
 128}
 129
 130/*
 131 * Report the number of pinned/un-pinned breakpoints we have in
 132 * a given cpu (cpu > -1) or in all of them (cpu = -1).
 133 */
 134static void
 135fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 136                    enum bp_type_idx type)
 137{
 138        int cpu = bp->cpu;
 139        struct task_struct *tsk = bp->hw.bp_target;
 140
 141        if (cpu >= 0) {
 142                slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
 143                if (!tsk)
 144                        slots->pinned += max_task_bp_pinned(cpu, type);
 145                else
 146                        slots->pinned += task_bp_pinned(cpu, bp, type);
 147                slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
 148
 149                return;
 150        }
 151
 152        for_each_possible_cpu(cpu) {
 153                unsigned int nr;
 154
 155                nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
 156                if (!tsk)
 157                        nr += max_task_bp_pinned(cpu, type);
 158                else
 159                        nr += task_bp_pinned(cpu, bp, type);
 160
 161                if (nr > slots->pinned)
 162                        slots->pinned = nr;
 163
 164                nr = per_cpu(nr_bp_flexible[type], cpu);
 165
 166                if (nr > slots->flexible)
 167                        slots->flexible = nr;
 168        }
 169}
 170
 171/*
 172 * For now, continue to consider flexible as pinned, until we can
 173 * ensure no flexible event can ever be scheduled before a pinned event
 174 * in a same cpu.
 175 */
 176static void
 177fetch_this_slot(struct bp_busy_slots *slots, int weight)
 178{
 179        slots->pinned += weight;
 180}
 181
 182/*
 183 * Add a pinned breakpoint for the given task in our constraint table
 184 */
 185static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
 186                                enum bp_type_idx type, int weight)
 187{
 188        unsigned int *tsk_pinned;
 189        int old_count = 0;
 190        int old_idx = 0;
 191        int idx = 0;
 192
 193        old_count = task_bp_pinned(cpu, bp, type);
 194        old_idx = old_count - 1;
 195        idx = old_idx + weight;
 196
 197        /* tsk_pinned[n] is the number of tasks having n breakpoints */
 198        tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
 199        if (enable) {
 200                tsk_pinned[idx]++;
 201                if (old_count > 0)
 202                        tsk_pinned[old_idx]--;
 203        } else {
 204                tsk_pinned[idx]--;
 205                if (old_count > 0)
 206                        tsk_pinned[old_idx]++;
 207        }
 208}
 209
 210/*
 211 * Add/remove the given breakpoint in our constraint table
 212 */
 213static void
 214toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 215               int weight)
 216{
 217        int cpu = bp->cpu;
 218        struct task_struct *tsk = bp->hw.bp_target;
 219
 220        /* Pinned counter cpu profiling */
 221        if (!tsk) {
 222
 223                if (enable)
 224                        per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
 225                else
 226                        per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
 227                return;
 228        }
 229
 230        /* Pinned counter task profiling */
 231
 232        if (!enable)
 233                list_del(&bp->hw.bp_list);
 234
 235        if (cpu >= 0) {
 236                toggle_bp_task_slot(bp, cpu, enable, type, weight);
 237        } else {
 238                for_each_possible_cpu(cpu)
 239                        toggle_bp_task_slot(bp, cpu, enable, type, weight);
 240        }
 241
 242        if (enable)
 243                list_add_tail(&bp->hw.bp_list, &bp_task_head);
 244}
 245
 246/*
 247 * Function to perform processor-specific cleanup during unregistration
 248 */
 249__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
 250{
 251        /*
 252         * A weak stub function here for those archs that don't define
 253         * it inside arch/.../kernel/hw_breakpoint.c
 254         */
 255}
 256
 257/*
 258 * Contraints to check before allowing this new breakpoint counter:
 259 *
 260 *  == Non-pinned counter == (Considered as pinned for now)
 261 *
 262 *   - If attached to a single cpu, check:
 263 *
 264 *       (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
 265 *           + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
 266 *
 267 *       -> If there are already non-pinned counters in this cpu, it means
 268 *          there is already a free slot for them.
 269 *          Otherwise, we check that the maximum number of per task
 270 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
 271 *          (for this cpu) doesn't cover every registers.
 272 *
 273 *   - If attached to every cpus, check:
 274 *
 275 *       (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
 276 *           + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
 277 *
 278 *       -> This is roughly the same, except we check the number of per cpu
 279 *          bp for every cpu and we keep the max one. Same for the per tasks
 280 *          breakpoints.
 281 *
 282 *
 283 * == Pinned counter ==
 284 *
 285 *   - If attached to a single cpu, check:
 286 *
 287 *       ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
 288 *            + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
 289 *
 290 *       -> Same checks as before. But now the nr_bp_flexible, if any, must keep
 291 *          one register at least (or they will never be fed).
 292 *
 293 *   - If attached to every cpus, check:
 294 *
 295 *       ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
 296 *            + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
 297 */
 298static int __reserve_bp_slot(struct perf_event *bp)
 299{
 300        struct bp_busy_slots slots = {0};
 301        enum bp_type_idx type;
 302        int weight;
 303
 304        /* We couldn't initialize breakpoint constraints on boot */
 305        if (!constraints_initialized)
 306                return -ENOMEM;
 307
 308        /* Basic checks */
 309        if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
 310            bp->attr.bp_type == HW_BREAKPOINT_INVALID)
 311                return -EINVAL;
 312
 313        type = find_slot_idx(bp);
 314        weight = hw_breakpoint_weight(bp);
 315
 316        fetch_bp_busy_slots(&slots, bp, type);
 317        /*
 318         * Simulate the addition of this breakpoint to the constraints
 319         * and see the result.
 320         */
 321        fetch_this_slot(&slots, weight);
 322
 323        /* Flexible counters need to keep at least one slot */
 324        if (slots.pinned + (!!slots.flexible) > nr_slots[type])
 325                return -ENOSPC;
 326
 327        toggle_bp_slot(bp, true, type, weight);
 328
 329        return 0;
 330}
 331
 332int reserve_bp_slot(struct perf_event *bp)
 333{
 334        int ret;
 335
 336        mutex_lock(&nr_bp_mutex);
 337
 338        ret = __reserve_bp_slot(bp);
 339
 340        mutex_unlock(&nr_bp_mutex);
 341
 342        return ret;
 343}
 344
 345static void __release_bp_slot(struct perf_event *bp)
 346{
 347        enum bp_type_idx type;
 348        int weight;
 349
 350        type = find_slot_idx(bp);
 351        weight = hw_breakpoint_weight(bp);
 352        toggle_bp_slot(bp, false, type, weight);
 353}
 354
 355void release_bp_slot(struct perf_event *bp)
 356{
 357        mutex_lock(&nr_bp_mutex);
 358
 359        arch_unregister_hw_breakpoint(bp);
 360        __release_bp_slot(bp);
 361
 362        mutex_unlock(&nr_bp_mutex);
 363}
 364
 365/*
 366 * Allow the kernel debugger to reserve breakpoint slots without
 367 * taking a lock using the dbg_* variant of for the reserve and
 368 * release breakpoint slots.
 369 */
 370int dbg_reserve_bp_slot(struct perf_event *bp)
 371{
 372        if (mutex_is_locked(&nr_bp_mutex))
 373                return -1;
 374
 375        return __reserve_bp_slot(bp);
 376}
 377
 378int dbg_release_bp_slot(struct perf_event *bp)
 379{
 380        if (mutex_is_locked(&nr_bp_mutex))
 381                return -1;
 382
 383        __release_bp_slot(bp);
 384
 385        return 0;
 386}
 387
 388static int validate_hw_breakpoint(struct perf_event *bp)
 389{
 390        int ret;
 391
 392        ret = arch_validate_hwbkpt_settings(bp);
 393        if (ret)
 394                return ret;
 395
 396        if (arch_check_bp_in_kernelspace(bp)) {
 397                if (bp->attr.exclude_kernel)
 398                        return -EINVAL;
 399                /*
 400                 * Don't let unprivileged users set a breakpoint in the trap
 401                 * path to avoid trap recursion attacks.
 402                 */
 403                if (!capable(CAP_SYS_ADMIN))
 404                        return -EPERM;
 405        }
 406
 407        return 0;
 408}
 409
 410int register_perf_hw_breakpoint(struct perf_event *bp)
 411{
 412        int ret;
 413
 414        ret = reserve_bp_slot(bp);
 415        if (ret)
 416                return ret;
 417
 418        ret = validate_hw_breakpoint(bp);
 419
 420        /* if arch_validate_hwbkpt_settings() fails then release bp slot */
 421        if (ret)
 422                release_bp_slot(bp);
 423
 424        return ret;
 425}
 426
 427/**
 428 * register_user_hw_breakpoint - register a hardware breakpoint for user space
 429 * @attr: breakpoint attributes
 430 * @triggered: callback to trigger when we hit the breakpoint
 431 * @tsk: pointer to 'task_struct' of the process to which the address belongs
 432 */
 433struct perf_event *
 434register_user_hw_breakpoint(struct perf_event_attr *attr,
 435                            perf_overflow_handler_t triggered,
 436                            void *context,
 437                            struct task_struct *tsk)
 438{
 439        return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
 440                                                context);
 441}
 442EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 443
 444/**
 445 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
 446 * @bp: the breakpoint structure to modify
 447 * @attr: new breakpoint attributes
 448 * @triggered: callback to trigger when we hit the breakpoint
 449 * @tsk: pointer to 'task_struct' of the process to which the address belongs
 450 */
 451int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 452{
 453        u64 old_addr = bp->attr.bp_addr;
 454        u64 old_len = bp->attr.bp_len;
 455        int old_type = bp->attr.bp_type;
 456        int err = 0;
 457
 458        /*
 459         * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
 460         * will not be possible to raise IPIs that invoke __perf_event_disable.
 461         * So call the function directly after making sure we are targeting the
 462         * current task.
 463         */
 464        if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
 465                __perf_event_disable(bp);
 466        else
 467                perf_event_disable(bp);
 468
 469        bp->attr.bp_addr = attr->bp_addr;
 470        bp->attr.bp_type = attr->bp_type;
 471        bp->attr.bp_len = attr->bp_len;
 472
 473        if (attr->disabled)
 474                goto end;
 475
 476        err = validate_hw_breakpoint(bp);
 477        if (!err)
 478                perf_event_enable(bp);
 479
 480        if (err) {
 481                bp->attr.bp_addr = old_addr;
 482                bp->attr.bp_type = old_type;
 483                bp->attr.bp_len = old_len;
 484                if (!bp->attr.disabled)
 485                        perf_event_enable(bp);
 486
 487                return err;
 488        }
 489
 490end:
 491        bp->attr.disabled = attr->disabled;
 492
 493        return 0;
 494}
 495EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 496
 497/**
 498 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
 499 * @bp: the breakpoint structure to unregister
 500 */
 501void unregister_hw_breakpoint(struct perf_event *bp)
 502{
 503        if (!bp)
 504                return;
 505        perf_event_release_kernel(bp);
 506}
 507EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 508
 509/**
 510 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
 511 * @attr: breakpoint attributes
 512 * @triggered: callback to trigger when we hit the breakpoint
 513 *
 514 * @return a set of per_cpu pointers to perf events
 515 */
 516struct perf_event * __percpu *
 517register_wide_hw_breakpoint(struct perf_event_attr *attr,
 518                            perf_overflow_handler_t triggered,
 519                            void *context)
 520{
 521        struct perf_event * __percpu *cpu_events, **pevent, *bp;
 522        long err;
 523        int cpu;
 524
 525        cpu_events = alloc_percpu(typeof(*cpu_events));
 526        if (!cpu_events)
 527                return (void __percpu __force *)ERR_PTR(-ENOMEM);
 528
 529        get_online_cpus();
 530        for_each_online_cpu(cpu) {
 531                pevent = per_cpu_ptr(cpu_events, cpu);
 532                bp = perf_event_create_kernel_counter(attr, cpu, NULL,
 533                                                      triggered, context);
 534
 535                *pevent = bp;
 536
 537                if (IS_ERR(bp)) {
 538                        err = PTR_ERR(bp);
 539                        goto fail;
 540                }
 541        }
 542        put_online_cpus();
 543
 544        return cpu_events;
 545
 546fail:
 547        for_each_online_cpu(cpu) {
 548                pevent = per_cpu_ptr(cpu_events, cpu);
 549                if (IS_ERR(*pevent))
 550                        break;
 551                unregister_hw_breakpoint(*pevent);
 552        }
 553        put_online_cpus();
 554
 555        free_percpu(cpu_events);
 556        return (void __percpu __force *)ERR_PTR(err);
 557}
 558EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
 559
 560/**
 561 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
 562 * @cpu_events: the per cpu set of events to unregister
 563 */
 564void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
 565{
 566        int cpu;
 567        struct perf_event **pevent;
 568
 569        for_each_possible_cpu(cpu) {
 570                pevent = per_cpu_ptr(cpu_events, cpu);
 571                unregister_hw_breakpoint(*pevent);
 572        }
 573        free_percpu(cpu_events);
 574}
 575EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
 576
 577static struct notifier_block hw_breakpoint_exceptions_nb = {
 578        .notifier_call = hw_breakpoint_exceptions_notify,
 579        /* we need to be notified first */
 580        .priority = 0x7fffffff
 581};
 582
 583static void bp_perf_event_destroy(struct perf_event *event)
 584{
 585        release_bp_slot(event);
 586}
 587
 588static int hw_breakpoint_event_init(struct perf_event *bp)
 589{
 590        int err;
 591
 592        if (bp->attr.type != PERF_TYPE_BREAKPOINT)
 593                return -ENOENT;
 594
 595        /*
 596         * no branch sampling for breakpoint events
 597         */
 598        if (has_branch_stack(bp))
 599                return -EOPNOTSUPP;
 600
 601        err = register_perf_hw_breakpoint(bp);
 602        if (err)
 603                return err;
 604
 605        bp->destroy = bp_perf_event_destroy;
 606
 607        return 0;
 608}
 609
 610static int hw_breakpoint_add(struct perf_event *bp, int flags)
 611{
 612        if (!(flags & PERF_EF_START))
 613                bp->hw.state = PERF_HES_STOPPED;
 614
 615        return arch_install_hw_breakpoint(bp);
 616}
 617
 618static void hw_breakpoint_del(struct perf_event *bp, int flags)
 619{
 620        arch_uninstall_hw_breakpoint(bp);
 621}
 622
 623static void hw_breakpoint_start(struct perf_event *bp, int flags)
 624{
 625        bp->hw.state = 0;
 626}
 627
 628static void hw_breakpoint_stop(struct perf_event *bp, int flags)
 629{
 630        bp->hw.state = PERF_HES_STOPPED;
 631}
 632
 633static int hw_breakpoint_event_idx(struct perf_event *bp)
 634{
 635        return 0;
 636}
 637
 638static struct pmu perf_breakpoint = {
 639        .task_ctx_nr    = perf_sw_context, /* could eventually get its own */
 640
 641        .event_init     = hw_breakpoint_event_init,
 642        .add            = hw_breakpoint_add,
 643        .del            = hw_breakpoint_del,
 644        .start          = hw_breakpoint_start,
 645        .stop           = hw_breakpoint_stop,
 646        .read           = hw_breakpoint_pmu_read,
 647
 648        .event_idx      = hw_breakpoint_event_idx,
 649};
 650
 651int __init init_hw_breakpoint(void)
 652{
 653        unsigned int **task_bp_pinned;
 654        int cpu, err_cpu;
 655        int i;
 656
 657        for (i = 0; i < TYPE_MAX; i++)
 658                nr_slots[i] = hw_breakpoint_slots(i);
 659
 660        for_each_possible_cpu(cpu) {
 661                for (i = 0; i < TYPE_MAX; i++) {
 662                        task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
 663                        *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
 664                                                  GFP_KERNEL);
 665                        if (!*task_bp_pinned)
 666                                goto err_alloc;
 667                }
 668        }
 669
 670        constraints_initialized = 1;
 671
 672        perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
 673
 674        return register_die_notifier(&hw_breakpoint_exceptions_nb);
 675
 676 err_alloc:
 677        for_each_possible_cpu(err_cpu) {
 678                for (i = 0; i < TYPE_MAX; i++)
 679                        kfree(per_cpu(nr_task_bp_pinned[i], err_cpu));
 680                if (err_cpu == cpu)
 681                        break;
 682        }
 683
 684        return -ENOMEM;
 685}
 686
 687
 688