linux/kernel/events/hw_breakpoint.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2007 Alan Stern
   4 * Copyright (C) IBM Corporation, 2009
   5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
   6 *
   7 * Thanks to Ingo Molnar for his many suggestions.
   8 *
   9 * Authors: Alan Stern <stern@rowland.harvard.edu>
  10 *          K.Prasad <prasad@linux.vnet.ibm.com>
  11 *          Frederic Weisbecker <fweisbec@gmail.com>
  12 */
  13
  14/*
  15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
  16 * using the CPU's debug registers.
  17 * This file contains the arch-independent routines.
  18 */
  19
  20#include <linux/irqflags.h>
  21#include <linux/kallsyms.h>
  22#include <linux/notifier.h>
  23#include <linux/kprobes.h>
  24#include <linux/kdebug.h>
  25#include <linux/kernel.h>
  26#include <linux/module.h>
  27#include <linux/percpu.h>
  28#include <linux/sched.h>
  29#include <linux/init.h>
  30#include <linux/slab.h>
  31#include <linux/list.h>
  32#include <linux/cpu.h>
  33#include <linux/smp.h>
  34#include <linux/bug.h>
  35
  36#include <linux/hw_breakpoint.h>
  37/*
  38 * Constraints data
  39 */
  40struct bp_cpuinfo {
  41        /* Number of pinned cpu breakpoints in a cpu */
  42        unsigned int    cpu_pinned;
  43        /* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
  44        unsigned int    *tsk_pinned;
  45        /* Number of non-pinned cpu/task breakpoints in a cpu */
  46        unsigned int    flexible; /* XXX: placeholder, see fetch_this_slot() */
  47};
  48
  49static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
  50static int nr_slots[TYPE_MAX];
  51
  52static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
  53{
  54        return per_cpu_ptr(bp_cpuinfo + type, cpu);
  55}
  56
  57/* Keep track of the breakpoints attached to tasks */
  58static LIST_HEAD(bp_task_head);
  59
  60static int constraints_initialized;
  61
  62/* Gather the number of total pinned and un-pinned bp in a cpuset */
  63struct bp_busy_slots {
  64        unsigned int pinned;
  65        unsigned int flexible;
  66};
  67
  68/* Serialize accesses to the above constraints */
  69static DEFINE_MUTEX(nr_bp_mutex);
  70
  71__weak int hw_breakpoint_weight(struct perf_event *bp)
  72{
  73        return 1;
  74}
  75
  76static inline enum bp_type_idx find_slot_idx(u64 bp_type)
  77{
  78        if (bp_type & HW_BREAKPOINT_RW)
  79                return TYPE_DATA;
  80
  81        return TYPE_INST;
  82}
  83
  84/*
  85 * Report the maximum number of pinned breakpoints a task
  86 * have in this cpu
  87 */
  88static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
  89{
  90        unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
  91        int i;
  92
  93        for (i = nr_slots[type] - 1; i >= 0; i--) {
  94                if (tsk_pinned[i] > 0)
  95                        return i + 1;
  96        }
  97
  98        return 0;
  99}
 100
 101/*
 102 * Count the number of breakpoints of the same type and same task.
 103 * The given event must be not on the list.
 104 */
 105static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 106{
 107        struct task_struct *tsk = bp->hw.target;
 108        struct perf_event *iter;
 109        int count = 0;
 110
 111        list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
 112                if (iter->hw.target == tsk &&
 113                    find_slot_idx(iter->attr.bp_type) == type &&
 114                    (iter->cpu < 0 || cpu == iter->cpu))
 115                        count += hw_breakpoint_weight(iter);
 116        }
 117
 118        return count;
 119}
 120
 121static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
 122{
 123        if (bp->cpu >= 0)
 124                return cpumask_of(bp->cpu);
 125        return cpu_possible_mask;
 126}
 127
 128/*
 129 * Report the number of pinned/un-pinned breakpoints we have in
 130 * a given cpu (cpu > -1) or in all of them (cpu = -1).
 131 */
 132static void
 133fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 134                    enum bp_type_idx type)
 135{
 136        const struct cpumask *cpumask = cpumask_of_bp(bp);
 137        int cpu;
 138
 139        for_each_cpu(cpu, cpumask) {
 140                struct bp_cpuinfo *info = get_bp_info(cpu, type);
 141                int nr;
 142
 143                nr = info->cpu_pinned;
 144                if (!bp->hw.target)
 145                        nr += max_task_bp_pinned(cpu, type);
 146                else
 147                        nr += task_bp_pinned(cpu, bp, type);
 148
 149                if (nr > slots->pinned)
 150                        slots->pinned = nr;
 151
 152                nr = info->flexible;
 153                if (nr > slots->flexible)
 154                        slots->flexible = nr;
 155        }
 156}
 157
 158/*
 159 * For now, continue to consider flexible as pinned, until we can
 160 * ensure no flexible event can ever be scheduled before a pinned event
 161 * in a same cpu.
 162 */
 163static void
 164fetch_this_slot(struct bp_busy_slots *slots, int weight)
 165{
 166        slots->pinned += weight;
 167}
 168
 169/*
 170 * Add a pinned breakpoint for the given task in our constraint table
 171 */
 172static void toggle_bp_task_slot(struct perf_event *bp, int cpu,
 173                                enum bp_type_idx type, int weight)
 174{
 175        unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
 176        int old_idx, new_idx;
 177
 178        old_idx = task_bp_pinned(cpu, bp, type) - 1;
 179        new_idx = old_idx + weight;
 180
 181        if (old_idx >= 0)
 182                tsk_pinned[old_idx]--;
 183        if (new_idx >= 0)
 184                tsk_pinned[new_idx]++;
 185}
 186
 187/*
 188 * Add/remove the given breakpoint in our constraint table
 189 */
 190static void
 191toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 192               int weight)
 193{
 194        const struct cpumask *cpumask = cpumask_of_bp(bp);
 195        int cpu;
 196
 197        if (!enable)
 198                weight = -weight;
 199
 200        /* Pinned counter cpu profiling */
 201        if (!bp->hw.target) {
 202                get_bp_info(bp->cpu, type)->cpu_pinned += weight;
 203                return;
 204        }
 205
 206        /* Pinned counter task profiling */
 207        for_each_cpu(cpu, cpumask)
 208                toggle_bp_task_slot(bp, cpu, type, weight);
 209
 210        if (enable)
 211                list_add_tail(&bp->hw.bp_list, &bp_task_head);
 212        else
 213                list_del(&bp->hw.bp_list);
 214}
 215
 216__weak int arch_reserve_bp_slot(struct perf_event *bp)
 217{
 218        return 0;
 219}
 220
 221__weak void arch_release_bp_slot(struct perf_event *bp)
 222{
 223}
 224
 225/*
 226 * Function to perform processor-specific cleanup during unregistration
 227 */
 228__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
 229{
 230        /*
 231         * A weak stub function here for those archs that don't define
 232         * it inside arch/.../kernel/hw_breakpoint.c
 233         */
 234}
 235
 236/*
 237 * Constraints to check before allowing this new breakpoint counter:
 238 *
 239 *  == Non-pinned counter == (Considered as pinned for now)
 240 *
 241 *   - If attached to a single cpu, check:
 242 *
 243 *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
 244 *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
 245 *
 246 *       -> If there are already non-pinned counters in this cpu, it means
 247 *          there is already a free slot for them.
 248 *          Otherwise, we check that the maximum number of per task
 249 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
 250 *          (for this cpu) doesn't cover every registers.
 251 *
 252 *   - If attached to every cpus, check:
 253 *
 254 *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
 255 *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
 256 *
 257 *       -> This is roughly the same, except we check the number of per cpu
 258 *          bp for every cpu and we keep the max one. Same for the per tasks
 259 *          breakpoints.
 260 *
 261 *
 262 * == Pinned counter ==
 263 *
 264 *   - If attached to a single cpu, check:
 265 *
 266 *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
 267 *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
 268 *
 269 *       -> Same checks as before. But now the info->flexible, if any, must keep
 270 *          one register at least (or they will never be fed).
 271 *
 272 *   - If attached to every cpus, check:
 273 *
 274 *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
 275 *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
 276 */
 277static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
 278{
 279        struct bp_busy_slots slots = {0};
 280        enum bp_type_idx type;
 281        int weight;
 282        int ret;
 283
 284        /* We couldn't initialize breakpoint constraints on boot */
 285        if (!constraints_initialized)
 286                return -ENOMEM;
 287
 288        /* Basic checks */
 289        if (bp_type == HW_BREAKPOINT_EMPTY ||
 290            bp_type == HW_BREAKPOINT_INVALID)
 291                return -EINVAL;
 292
 293        type = find_slot_idx(bp_type);
 294        weight = hw_breakpoint_weight(bp);
 295
 296        fetch_bp_busy_slots(&slots, bp, type);
 297        /*
 298         * Simulate the addition of this breakpoint to the constraints
 299         * and see the result.
 300         */
 301        fetch_this_slot(&slots, weight);
 302
 303        /* Flexible counters need to keep at least one slot */
 304        if (slots.pinned + (!!slots.flexible) > nr_slots[type])
 305                return -ENOSPC;
 306
 307        ret = arch_reserve_bp_slot(bp);
 308        if (ret)
 309                return ret;
 310
 311        toggle_bp_slot(bp, true, type, weight);
 312
 313        return 0;
 314}
 315
 316int reserve_bp_slot(struct perf_event *bp)
 317{
 318        int ret;
 319
 320        mutex_lock(&nr_bp_mutex);
 321
 322        ret = __reserve_bp_slot(bp, bp->attr.bp_type);
 323
 324        mutex_unlock(&nr_bp_mutex);
 325
 326        return ret;
 327}
 328
 329static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
 330{
 331        enum bp_type_idx type;
 332        int weight;
 333
 334        arch_release_bp_slot(bp);
 335
 336        type = find_slot_idx(bp_type);
 337        weight = hw_breakpoint_weight(bp);
 338        toggle_bp_slot(bp, false, type, weight);
 339}
 340
 341void release_bp_slot(struct perf_event *bp)
 342{
 343        mutex_lock(&nr_bp_mutex);
 344
 345        arch_unregister_hw_breakpoint(bp);
 346        __release_bp_slot(bp, bp->attr.bp_type);
 347
 348        mutex_unlock(&nr_bp_mutex);
 349}
 350
 351static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 352{
 353        int err;
 354
 355        __release_bp_slot(bp, old_type);
 356
 357        err = __reserve_bp_slot(bp, new_type);
 358        if (err) {
 359                /*
 360                 * Reserve the old_type slot back in case
 361                 * there's no space for the new type.
 362                 *
 363                 * This must succeed, because we just released
 364                 * the old_type slot in the __release_bp_slot
 365                 * call above. If not, something is broken.
 366                 */
 367                WARN_ON(__reserve_bp_slot(bp, old_type));
 368        }
 369
 370        return err;
 371}
 372
 373static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 374{
 375        int ret;
 376
 377        mutex_lock(&nr_bp_mutex);
 378        ret = __modify_bp_slot(bp, old_type, new_type);
 379        mutex_unlock(&nr_bp_mutex);
 380        return ret;
 381}
 382
 383/*
 384 * Allow the kernel debugger to reserve breakpoint slots without
 385 * taking a lock using the dbg_* variant of for the reserve and
 386 * release breakpoint slots.
 387 */
 388int dbg_reserve_bp_slot(struct perf_event *bp)
 389{
 390        if (mutex_is_locked(&nr_bp_mutex))
 391                return -1;
 392
 393        return __reserve_bp_slot(bp, bp->attr.bp_type);
 394}
 395
 396int dbg_release_bp_slot(struct perf_event *bp)
 397{
 398        if (mutex_is_locked(&nr_bp_mutex))
 399                return -1;
 400
 401        __release_bp_slot(bp, bp->attr.bp_type);
 402
 403        return 0;
 404}
 405
 406static int hw_breakpoint_parse(struct perf_event *bp,
 407                               const struct perf_event_attr *attr,
 408                               struct arch_hw_breakpoint *hw)
 409{
 410        int err;
 411
 412        err = hw_breakpoint_arch_parse(bp, attr, hw);
 413        if (err)
 414                return err;
 415
 416        if (arch_check_bp_in_kernelspace(hw)) {
 417                if (attr->exclude_kernel)
 418                        return -EINVAL;
 419                /*
 420                 * Don't let unprivileged users set a breakpoint in the trap
 421                 * path to avoid trap recursion attacks.
 422                 */
 423                if (!capable(CAP_SYS_ADMIN))
 424                        return -EPERM;
 425        }
 426
 427        return 0;
 428}
 429
 430int register_perf_hw_breakpoint(struct perf_event *bp)
 431{
 432        struct arch_hw_breakpoint hw = { };
 433        int err;
 434
 435        err = reserve_bp_slot(bp);
 436        if (err)
 437                return err;
 438
 439        err = hw_breakpoint_parse(bp, &bp->attr, &hw);
 440        if (err) {
 441                release_bp_slot(bp);
 442                return err;
 443        }
 444
 445        bp->hw.info = hw;
 446
 447        return 0;
 448}
 449
 450/**
 451 * register_user_hw_breakpoint - register a hardware breakpoint for user space
 452 * @attr: breakpoint attributes
 453 * @triggered: callback to trigger when we hit the breakpoint
 454 * @context: context data could be used in the triggered callback
 455 * @tsk: pointer to 'task_struct' of the process to which the address belongs
 456 */
 457struct perf_event *
 458register_user_hw_breakpoint(struct perf_event_attr *attr,
 459                            perf_overflow_handler_t triggered,
 460                            void *context,
 461                            struct task_struct *tsk)
 462{
 463        return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
 464                                                context);
 465}
 466EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 467
 468static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
 469                                    struct perf_event_attr *from)
 470{
 471        to->bp_addr = from->bp_addr;
 472        to->bp_type = from->bp_type;
 473        to->bp_len  = from->bp_len;
 474        to->disabled = from->disabled;
 475}
 476
 477int
 478modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
 479                                bool check)
 480{
 481        struct arch_hw_breakpoint hw = { };
 482        int err;
 483
 484        err = hw_breakpoint_parse(bp, attr, &hw);
 485        if (err)
 486                return err;
 487
 488        if (check) {
 489                struct perf_event_attr old_attr;
 490
 491                old_attr = bp->attr;
 492                hw_breakpoint_copy_attr(&old_attr, attr);
 493                if (memcmp(&old_attr, attr, sizeof(*attr)))
 494                        return -EINVAL;
 495        }
 496
 497        if (bp->attr.bp_type != attr->bp_type) {
 498                err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
 499                if (err)
 500                        return err;
 501        }
 502
 503        hw_breakpoint_copy_attr(&bp->attr, attr);
 504        bp->hw.info = hw;
 505
 506        return 0;
 507}
 508
 509/**
 510 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
 511 * @bp: the breakpoint structure to modify
 512 * @attr: new breakpoint attributes
 513 */
 514int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 515{
 516        int err;
 517
 518        /*
 519         * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
 520         * will not be possible to raise IPIs that invoke __perf_event_disable.
 521         * So call the function directly after making sure we are targeting the
 522         * current task.
 523         */
 524        if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
 525                perf_event_disable_local(bp);
 526        else
 527                perf_event_disable(bp);
 528
 529        err = modify_user_hw_breakpoint_check(bp, attr, false);
 530
 531        if (!bp->attr.disabled)
 532                perf_event_enable(bp);
 533
 534        return err;
 535}
 536EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 537
 538/**
 539 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
 540 * @bp: the breakpoint structure to unregister
 541 */
 542void unregister_hw_breakpoint(struct perf_event *bp)
 543{
 544        if (!bp)
 545                return;
 546        perf_event_release_kernel(bp);
 547}
 548EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 549
 550/**
 551 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
 552 * @attr: breakpoint attributes
 553 * @triggered: callback to trigger when we hit the breakpoint
 554 * @context: context data could be used in the triggered callback
 555 *
 556 * @return a set of per_cpu pointers to perf events
 557 */
 558struct perf_event * __percpu *
 559register_wide_hw_breakpoint(struct perf_event_attr *attr,
 560                            perf_overflow_handler_t triggered,
 561                            void *context)
 562{
 563        struct perf_event * __percpu *cpu_events, *bp;
 564        long err = 0;
 565        int cpu;
 566
 567        cpu_events = alloc_percpu(typeof(*cpu_events));
 568        if (!cpu_events)
 569                return (void __percpu __force *)ERR_PTR(-ENOMEM);
 570
 571        cpus_read_lock();
 572        for_each_online_cpu(cpu) {
 573                bp = perf_event_create_kernel_counter(attr, cpu, NULL,
 574                                                      triggered, context);
 575                if (IS_ERR(bp)) {
 576                        err = PTR_ERR(bp);
 577                        break;
 578                }
 579
 580                per_cpu(*cpu_events, cpu) = bp;
 581        }
 582        cpus_read_unlock();
 583
 584        if (likely(!err))
 585                return cpu_events;
 586
 587        unregister_wide_hw_breakpoint(cpu_events);
 588        return (void __percpu __force *)ERR_PTR(err);
 589}
 590EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
 591
 592/**
 593 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
 594 * @cpu_events: the per cpu set of events to unregister
 595 */
 596void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
 597{
 598        int cpu;
 599
 600        for_each_possible_cpu(cpu)
 601                unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
 602
 603        free_percpu(cpu_events);
 604}
 605EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
 606
 607static struct notifier_block hw_breakpoint_exceptions_nb = {
 608        .notifier_call = hw_breakpoint_exceptions_notify,
 609        /* we need to be notified first */
 610        .priority = 0x7fffffff
 611};
 612
 613static void bp_perf_event_destroy(struct perf_event *event)
 614{
 615        release_bp_slot(event);
 616}
 617
 618static int hw_breakpoint_event_init(struct perf_event *bp)
 619{
 620        int err;
 621
 622        if (bp->attr.type != PERF_TYPE_BREAKPOINT)
 623                return -ENOENT;
 624
 625        /*
 626         * no branch sampling for breakpoint events
 627         */
 628        if (has_branch_stack(bp))
 629                return -EOPNOTSUPP;
 630
 631        err = register_perf_hw_breakpoint(bp);
 632        if (err)
 633                return err;
 634
 635        bp->destroy = bp_perf_event_destroy;
 636
 637        return 0;
 638}
 639
 640static int hw_breakpoint_add(struct perf_event *bp, int flags)
 641{
 642        if (!(flags & PERF_EF_START))
 643                bp->hw.state = PERF_HES_STOPPED;
 644
 645        if (is_sampling_event(bp)) {
 646                bp->hw.last_period = bp->hw.sample_period;
 647                perf_swevent_set_period(bp);
 648        }
 649
 650        return arch_install_hw_breakpoint(bp);
 651}
 652
 653static void hw_breakpoint_del(struct perf_event *bp, int flags)
 654{
 655        arch_uninstall_hw_breakpoint(bp);
 656}
 657
 658static void hw_breakpoint_start(struct perf_event *bp, int flags)
 659{
 660        bp->hw.state = 0;
 661}
 662
 663static void hw_breakpoint_stop(struct perf_event *bp, int flags)
 664{
 665        bp->hw.state = PERF_HES_STOPPED;
 666}
 667
 668static struct pmu perf_breakpoint = {
 669        .task_ctx_nr    = perf_sw_context, /* could eventually get its own */
 670
 671        .event_init     = hw_breakpoint_event_init,
 672        .add            = hw_breakpoint_add,
 673        .del            = hw_breakpoint_del,
 674        .start          = hw_breakpoint_start,
 675        .stop           = hw_breakpoint_stop,
 676        .read           = hw_breakpoint_pmu_read,
 677};
 678
 679int __init init_hw_breakpoint(void)
 680{
 681        int cpu, err_cpu;
 682        int i;
 683
 684        for (i = 0; i < TYPE_MAX; i++)
 685                nr_slots[i] = hw_breakpoint_slots(i);
 686
 687        for_each_possible_cpu(cpu) {
 688                for (i = 0; i < TYPE_MAX; i++) {
 689                        struct bp_cpuinfo *info = get_bp_info(cpu, i);
 690
 691                        info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
 692                                                        GFP_KERNEL);
 693                        if (!info->tsk_pinned)
 694                                goto err_alloc;
 695                }
 696        }
 697
 698        constraints_initialized = 1;
 699
 700        perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
 701
 702        return register_die_notifier(&hw_breakpoint_exceptions_nb);
 703
 704 err_alloc:
 705        for_each_possible_cpu(err_cpu) {
 706                for (i = 0; i < TYPE_MAX; i++)
 707                        kfree(get_bp_info(err_cpu, i)->tsk_pinned);
 708                if (err_cpu == cpu)
 709                        break;
 710        }
 711
 712        return -ENOMEM;
 713}
 714
 715
 716