linux/kernel/events/hw_breakpoint.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Copyright (C) 2007 Alan Stern
   4 * Copyright (C) IBM Corporation, 2009
   5 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
   6 *
   7 * Thanks to Ingo Molnar for his many suggestions.
   8 *
   9 * Authors: Alan Stern <stern@rowland.harvard.edu>
  10 *          K.Prasad <prasad@linux.vnet.ibm.com>
  11 *          Frederic Weisbecker <fweisbec@gmail.com>
  12 */
  13
  14/*
  15 * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
  16 * using the CPU's debug registers.
  17 * This file contains the arch-independent routines.
  18 */
  19
  20#include <linux/irqflags.h>
  21#include <linux/kallsyms.h>
  22#include <linux/notifier.h>
  23#include <linux/kprobes.h>
  24#include <linux/kdebug.h>
  25#include <linux/kernel.h>
  26#include <linux/module.h>
  27#include <linux/percpu.h>
  28#include <linux/sched.h>
  29#include <linux/init.h>
  30#include <linux/slab.h>
  31#include <linux/list.h>
  32#include <linux/cpu.h>
  33#include <linux/smp.h>
  34#include <linux/bug.h>
  35
  36#include <linux/hw_breakpoint.h>
  37/*
  38 * Constraints data
  39 */
  40struct bp_cpuinfo {
  41        /* Number of pinned cpu breakpoints in a cpu */
  42        unsigned int    cpu_pinned;
  43        /* tsk_pinned[n] is the number of tasks having n+1 breakpoints */
  44        unsigned int    *tsk_pinned;
  45        /* Number of non-pinned cpu/task breakpoints in a cpu */
  46        unsigned int    flexible; /* XXX: placeholder, see fetch_this_slot() */
  47};
  48
  49static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
  50static int nr_slots[TYPE_MAX];
  51
  52static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type)
  53{
  54        return per_cpu_ptr(bp_cpuinfo + type, cpu);
  55}
  56
  57/* Keep track of the breakpoints attached to tasks */
  58static LIST_HEAD(bp_task_head);
  59
  60static int constraints_initialized;
  61
  62/* Gather the number of total pinned and un-pinned bp in a cpuset */
  63struct bp_busy_slots {
  64        unsigned int pinned;
  65        unsigned int flexible;
  66};
  67
  68/* Serialize accesses to the above constraints */
  69static DEFINE_MUTEX(nr_bp_mutex);
  70
  71__weak int hw_breakpoint_weight(struct perf_event *bp)
  72{
  73        return 1;
  74}
  75
  76static inline enum bp_type_idx find_slot_idx(u64 bp_type)
  77{
  78        if (bp_type & HW_BREAKPOINT_RW)
  79                return TYPE_DATA;
  80
  81        return TYPE_INST;
  82}
  83
  84/*
  85 * Report the maximum number of pinned breakpoints a task
  86 * have in this cpu
  87 */
  88static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
  89{
  90        unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
  91        int i;
  92
  93        for (i = nr_slots[type] - 1; i >= 0; i--) {
  94                if (tsk_pinned[i] > 0)
  95                        return i + 1;
  96        }
  97
  98        return 0;
  99}
 100
 101/*
 102 * Count the number of breakpoints of the same type and same task.
 103 * The given event must be not on the list.
 104 */
 105static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 106{
 107        struct task_struct *tsk = bp->hw.target;
 108        struct perf_event *iter;
 109        int count = 0;
 110
 111        list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
 112                if (iter->hw.target == tsk &&
 113                    find_slot_idx(iter->attr.bp_type) == type &&
 114                    (iter->cpu < 0 || cpu == iter->cpu))
 115                        count += hw_breakpoint_weight(iter);
 116        }
 117
 118        return count;
 119}
 120
 121static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
 122{
 123        if (bp->cpu >= 0)
 124                return cpumask_of(bp->cpu);
 125        return cpu_possible_mask;
 126}
 127
 128/*
 129 * Report the number of pinned/un-pinned breakpoints we have in
 130 * a given cpu (cpu > -1) or in all of them (cpu = -1).
 131 */
 132static void
 133fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 134                    enum bp_type_idx type)
 135{
 136        const struct cpumask *cpumask = cpumask_of_bp(bp);
 137        int cpu;
 138
 139        for_each_cpu(cpu, cpumask) {
 140                struct bp_cpuinfo *info = get_bp_info(cpu, type);
 141                int nr;
 142
 143                nr = info->cpu_pinned;
 144                if (!bp->hw.target)
 145                        nr += max_task_bp_pinned(cpu, type);
 146                else
 147                        nr += task_bp_pinned(cpu, bp, type);
 148
 149                if (nr > slots->pinned)
 150                        slots->pinned = nr;
 151
 152                nr = info->flexible;
 153                if (nr > slots->flexible)
 154                        slots->flexible = nr;
 155        }
 156}
 157
 158/*
 159 * For now, continue to consider flexible as pinned, until we can
 160 * ensure no flexible event can ever be scheduled before a pinned event
 161 * in a same cpu.
 162 */
 163static void
 164fetch_this_slot(struct bp_busy_slots *slots, int weight)
 165{
 166        slots->pinned += weight;
 167}
 168
 169/*
 170 * Add a pinned breakpoint for the given task in our constraint table
 171 */
 172static void toggle_bp_task_slot(struct perf_event *bp, int cpu,
 173                                enum bp_type_idx type, int weight)
 174{
 175        unsigned int *tsk_pinned = get_bp_info(cpu, type)->tsk_pinned;
 176        int old_idx, new_idx;
 177
 178        old_idx = task_bp_pinned(cpu, bp, type) - 1;
 179        new_idx = old_idx + weight;
 180
 181        if (old_idx >= 0)
 182                tsk_pinned[old_idx]--;
 183        if (new_idx >= 0)
 184                tsk_pinned[new_idx]++;
 185}
 186
 187/*
 188 * Add/remove the given breakpoint in our constraint table
 189 */
 190static void
 191toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 192               int weight)
 193{
 194        const struct cpumask *cpumask = cpumask_of_bp(bp);
 195        int cpu;
 196
 197        if (!enable)
 198                weight = -weight;
 199
 200        /* Pinned counter cpu profiling */
 201        if (!bp->hw.target) {
 202                get_bp_info(bp->cpu, type)->cpu_pinned += weight;
 203                return;
 204        }
 205
 206        /* Pinned counter task profiling */
 207        for_each_cpu(cpu, cpumask)
 208                toggle_bp_task_slot(bp, cpu, type, weight);
 209
 210        if (enable)
 211                list_add_tail(&bp->hw.bp_list, &bp_task_head);
 212        else
 213                list_del(&bp->hw.bp_list);
 214}
 215
 216__weak int arch_reserve_bp_slot(struct perf_event *bp)
 217{
 218        return 0;
 219}
 220
 221__weak void arch_release_bp_slot(struct perf_event *bp)
 222{
 223}
 224
 225/*
 226 * Function to perform processor-specific cleanup during unregistration
 227 */
 228__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
 229{
 230        /*
 231         * A weak stub function here for those archs that don't define
 232         * it inside arch/.../kernel/hw_breakpoint.c
 233         */
 234}
 235
 236/*
 237 * Constraints to check before allowing this new breakpoint counter:
 238 *
 239 *  == Non-pinned counter == (Considered as pinned for now)
 240 *
 241 *   - If attached to a single cpu, check:
 242 *
 243 *       (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu)
 244 *           + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM
 245 *
 246 *       -> If there are already non-pinned counters in this cpu, it means
 247 *          there is already a free slot for them.
 248 *          Otherwise, we check that the maximum number of per task
 249 *          breakpoints (for this cpu) plus the number of per cpu breakpoint
 250 *          (for this cpu) doesn't cover every registers.
 251 *
 252 *   - If attached to every cpus, check:
 253 *
 254 *       (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *))
 255 *           + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM
 256 *
 257 *       -> This is roughly the same, except we check the number of per cpu
 258 *          bp for every cpu and we keep the max one. Same for the per tasks
 259 *          breakpoints.
 260 *
 261 *
 262 * == Pinned counter ==
 263 *
 264 *   - If attached to a single cpu, check:
 265 *
 266 *       ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu)
 267 *            + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM
 268 *
 269 *       -> Same checks as before. But now the info->flexible, if any, must keep
 270 *          one register at least (or they will never be fed).
 271 *
 272 *   - If attached to every cpus, check:
 273 *
 274 *       ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *))
 275 *            + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM
 276 */
 277static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
 278{
 279        struct bp_busy_slots slots = {0};
 280        enum bp_type_idx type;
 281        int weight;
 282        int ret;
 283
 284        /* We couldn't initialize breakpoint constraints on boot */
 285        if (!constraints_initialized)
 286                return -ENOMEM;
 287
 288        /* Basic checks */
 289        if (bp_type == HW_BREAKPOINT_EMPTY ||
 290            bp_type == HW_BREAKPOINT_INVALID)
 291                return -EINVAL;
 292
 293        type = find_slot_idx(bp_type);
 294        weight = hw_breakpoint_weight(bp);
 295
 296        fetch_bp_busy_slots(&slots, bp, type);
 297        /*
 298         * Simulate the addition of this breakpoint to the constraints
 299         * and see the result.
 300         */
 301        fetch_this_slot(&slots, weight);
 302
 303        /* Flexible counters need to keep at least one slot */
 304        if (slots.pinned + (!!slots.flexible) > nr_slots[type])
 305                return -ENOSPC;
 306
 307        ret = arch_reserve_bp_slot(bp);
 308        if (ret)
 309                return ret;
 310
 311        toggle_bp_slot(bp, true, type, weight);
 312
 313        return 0;
 314}
 315
 316int reserve_bp_slot(struct perf_event *bp)
 317{
 318        int ret;
 319
 320        mutex_lock(&nr_bp_mutex);
 321
 322        ret = __reserve_bp_slot(bp, bp->attr.bp_type);
 323
 324        mutex_unlock(&nr_bp_mutex);
 325
 326        return ret;
 327}
 328
 329static void __release_bp_slot(struct perf_event *bp, u64 bp_type)
 330{
 331        enum bp_type_idx type;
 332        int weight;
 333
 334        arch_release_bp_slot(bp);
 335
 336        type = find_slot_idx(bp_type);
 337        weight = hw_breakpoint_weight(bp);
 338        toggle_bp_slot(bp, false, type, weight);
 339}
 340
 341void release_bp_slot(struct perf_event *bp)
 342{
 343        mutex_lock(&nr_bp_mutex);
 344
 345        arch_unregister_hw_breakpoint(bp);
 346        __release_bp_slot(bp, bp->attr.bp_type);
 347
 348        mutex_unlock(&nr_bp_mutex);
 349}
 350
 351static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 352{
 353        int err;
 354
 355        __release_bp_slot(bp, old_type);
 356
 357        err = __reserve_bp_slot(bp, new_type);
 358        if (err) {
 359                /*
 360                 * Reserve the old_type slot back in case
 361                 * there's no space for the new type.
 362                 *
 363                 * This must succeed, because we just released
 364                 * the old_type slot in the __release_bp_slot
 365                 * call above. If not, something is broken.
 366                 */
 367                WARN_ON(__reserve_bp_slot(bp, old_type));
 368        }
 369
 370        return err;
 371}
 372
 373static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type)
 374{
 375        int ret;
 376
 377        mutex_lock(&nr_bp_mutex);
 378        ret = __modify_bp_slot(bp, old_type, new_type);
 379        mutex_unlock(&nr_bp_mutex);
 380        return ret;
 381}
 382
 383/*
 384 * Allow the kernel debugger to reserve breakpoint slots without
 385 * taking a lock using the dbg_* variant of for the reserve and
 386 * release breakpoint slots.
 387 */
 388int dbg_reserve_bp_slot(struct perf_event *bp)
 389{
 390        if (mutex_is_locked(&nr_bp_mutex))
 391                return -1;
 392
 393        return __reserve_bp_slot(bp, bp->attr.bp_type);
 394}
 395
 396int dbg_release_bp_slot(struct perf_event *bp)
 397{
 398        if (mutex_is_locked(&nr_bp_mutex))
 399                return -1;
 400
 401        __release_bp_slot(bp, bp->attr.bp_type);
 402
 403        return 0;
 404}
 405
 406static int hw_breakpoint_parse(struct perf_event *bp,
 407                               const struct perf_event_attr *attr,
 408                               struct arch_hw_breakpoint *hw)
 409{
 410        int err;
 411
 412        err = hw_breakpoint_arch_parse(bp, attr, hw);
 413        if (err)
 414                return err;
 415
 416        if (arch_check_bp_in_kernelspace(hw)) {
 417                if (attr->exclude_kernel)
 418                        return -EINVAL;
 419                /*
 420                 * Don't let unprivileged users set a breakpoint in the trap
 421                 * path to avoid trap recursion attacks.
 422                 */
 423                if (!capable(CAP_SYS_ADMIN))
 424                        return -EPERM;
 425        }
 426
 427        return 0;
 428}
 429
 430int register_perf_hw_breakpoint(struct perf_event *bp)
 431{
 432        struct arch_hw_breakpoint hw = { };
 433        int err;
 434
 435        err = reserve_bp_slot(bp);
 436        if (err)
 437                return err;
 438
 439        err = hw_breakpoint_parse(bp, &bp->attr, &hw);
 440        if (err) {
 441                release_bp_slot(bp);
 442                return err;
 443        }
 444
 445        bp->hw.info = hw;
 446
 447        return 0;
 448}
 449
 450/**
 451 * register_user_hw_breakpoint - register a hardware breakpoint for user space
 452 * @attr: breakpoint attributes
 453 * @triggered: callback to trigger when we hit the breakpoint
 454 * @tsk: pointer to 'task_struct' of the process to which the address belongs
 455 */
 456struct perf_event *
 457register_user_hw_breakpoint(struct perf_event_attr *attr,
 458                            perf_overflow_handler_t triggered,
 459                            void *context,
 460                            struct task_struct *tsk)
 461{
 462        return perf_event_create_kernel_counter(attr, -1, tsk, triggered,
 463                                                context);
 464}
 465EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
 466
 467static void hw_breakpoint_copy_attr(struct perf_event_attr *to,
 468                                    struct perf_event_attr *from)
 469{
 470        to->bp_addr = from->bp_addr;
 471        to->bp_type = from->bp_type;
 472        to->bp_len  = from->bp_len;
 473        to->disabled = from->disabled;
 474}
 475
 476int
 477modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr,
 478                                bool check)
 479{
 480        struct arch_hw_breakpoint hw = { };
 481        int err;
 482
 483        err = hw_breakpoint_parse(bp, attr, &hw);
 484        if (err)
 485                return err;
 486
 487        if (check) {
 488                struct perf_event_attr old_attr;
 489
 490                old_attr = bp->attr;
 491                hw_breakpoint_copy_attr(&old_attr, attr);
 492                if (memcmp(&old_attr, attr, sizeof(*attr)))
 493                        return -EINVAL;
 494        }
 495
 496        if (bp->attr.bp_type != attr->bp_type) {
 497                err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type);
 498                if (err)
 499                        return err;
 500        }
 501
 502        hw_breakpoint_copy_attr(&bp->attr, attr);
 503        bp->hw.info = hw;
 504
 505        return 0;
 506}
 507
 508/**
 509 * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
 510 * @bp: the breakpoint structure to modify
 511 * @attr: new breakpoint attributes
 512 */
 513int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
 514{
 515        int err;
 516
 517        /*
 518         * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it
 519         * will not be possible to raise IPIs that invoke __perf_event_disable.
 520         * So call the function directly after making sure we are targeting the
 521         * current task.
 522         */
 523        if (irqs_disabled() && bp->ctx && bp->ctx->task == current)
 524                perf_event_disable_local(bp);
 525        else
 526                perf_event_disable(bp);
 527
 528        err = modify_user_hw_breakpoint_check(bp, attr, false);
 529
 530        if (!bp->attr.disabled)
 531                perf_event_enable(bp);
 532
 533        return err;
 534}
 535EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
 536
 537/**
 538 * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
 539 * @bp: the breakpoint structure to unregister
 540 */
 541void unregister_hw_breakpoint(struct perf_event *bp)
 542{
 543        if (!bp)
 544                return;
 545        perf_event_release_kernel(bp);
 546}
 547EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
 548
 549/**
 550 * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
 551 * @attr: breakpoint attributes
 552 * @triggered: callback to trigger when we hit the breakpoint
 553 *
 554 * @return a set of per_cpu pointers to perf events
 555 */
 556struct perf_event * __percpu *
 557register_wide_hw_breakpoint(struct perf_event_attr *attr,
 558                            perf_overflow_handler_t triggered,
 559                            void *context)
 560{
 561        struct perf_event * __percpu *cpu_events, *bp;
 562        long err = 0;
 563        int cpu;
 564
 565        cpu_events = alloc_percpu(typeof(*cpu_events));
 566        if (!cpu_events)
 567                return (void __percpu __force *)ERR_PTR(-ENOMEM);
 568
 569        get_online_cpus();
 570        for_each_online_cpu(cpu) {
 571                bp = perf_event_create_kernel_counter(attr, cpu, NULL,
 572                                                      triggered, context);
 573                if (IS_ERR(bp)) {
 574                        err = PTR_ERR(bp);
 575                        break;
 576                }
 577
 578                per_cpu(*cpu_events, cpu) = bp;
 579        }
 580        put_online_cpus();
 581
 582        if (likely(!err))
 583                return cpu_events;
 584
 585        unregister_wide_hw_breakpoint(cpu_events);
 586        return (void __percpu __force *)ERR_PTR(err);
 587}
 588EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
 589
 590/**
 591 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
 592 * @cpu_events: the per cpu set of events to unregister
 593 */
 594void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
 595{
 596        int cpu;
 597
 598        for_each_possible_cpu(cpu)
 599                unregister_hw_breakpoint(per_cpu(*cpu_events, cpu));
 600
 601        free_percpu(cpu_events);
 602}
 603EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
 604
 605static struct notifier_block hw_breakpoint_exceptions_nb = {
 606        .notifier_call = hw_breakpoint_exceptions_notify,
 607        /* we need to be notified first */
 608        .priority = 0x7fffffff
 609};
 610
 611static void bp_perf_event_destroy(struct perf_event *event)
 612{
 613        release_bp_slot(event);
 614}
 615
 616static int hw_breakpoint_event_init(struct perf_event *bp)
 617{
 618        int err;
 619
 620        if (bp->attr.type != PERF_TYPE_BREAKPOINT)
 621                return -ENOENT;
 622
 623        /*
 624         * no branch sampling for breakpoint events
 625         */
 626        if (has_branch_stack(bp))
 627                return -EOPNOTSUPP;
 628
 629        err = register_perf_hw_breakpoint(bp);
 630        if (err)
 631                return err;
 632
 633        bp->destroy = bp_perf_event_destroy;
 634
 635        return 0;
 636}
 637
 638static int hw_breakpoint_add(struct perf_event *bp, int flags)
 639{
 640        if (!(flags & PERF_EF_START))
 641                bp->hw.state = PERF_HES_STOPPED;
 642
 643        if (is_sampling_event(bp)) {
 644                bp->hw.last_period = bp->hw.sample_period;
 645                perf_swevent_set_period(bp);
 646        }
 647
 648        return arch_install_hw_breakpoint(bp);
 649}
 650
 651static void hw_breakpoint_del(struct perf_event *bp, int flags)
 652{
 653        arch_uninstall_hw_breakpoint(bp);
 654}
 655
 656static void hw_breakpoint_start(struct perf_event *bp, int flags)
 657{
 658        bp->hw.state = 0;
 659}
 660
 661static void hw_breakpoint_stop(struct perf_event *bp, int flags)
 662{
 663        bp->hw.state = PERF_HES_STOPPED;
 664}
 665
 666static struct pmu perf_breakpoint = {
 667        .task_ctx_nr    = perf_sw_context, /* could eventually get its own */
 668
 669        .event_init     = hw_breakpoint_event_init,
 670        .add            = hw_breakpoint_add,
 671        .del            = hw_breakpoint_del,
 672        .start          = hw_breakpoint_start,
 673        .stop           = hw_breakpoint_stop,
 674        .read           = hw_breakpoint_pmu_read,
 675};
 676
 677int __init init_hw_breakpoint(void)
 678{
 679        int cpu, err_cpu;
 680        int i;
 681
 682        for (i = 0; i < TYPE_MAX; i++)
 683                nr_slots[i] = hw_breakpoint_slots(i);
 684
 685        for_each_possible_cpu(cpu) {
 686                for (i = 0; i < TYPE_MAX; i++) {
 687                        struct bp_cpuinfo *info = get_bp_info(cpu, i);
 688
 689                        info->tsk_pinned = kcalloc(nr_slots[i], sizeof(int),
 690                                                        GFP_KERNEL);
 691                        if (!info->tsk_pinned)
 692                                goto err_alloc;
 693                }
 694        }
 695
 696        constraints_initialized = 1;
 697
 698        perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT);
 699
 700        return register_die_notifier(&hw_breakpoint_exceptions_nb);
 701
 702 err_alloc:
 703        for_each_possible_cpu(err_cpu) {
 704                for (i = 0; i < TYPE_MAX; i++)
 705                        kfree(get_bp_info(err_cpu, i)->tsk_pinned);
 706                if (err_cpu == cpu)
 707                        break;
 708        }
 709
 710        return -ENOMEM;
 711}
 712
 713
 714