linux/kernel/kprobes.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *  Kernel Probes (KProbes)
   4 *  kernel/kprobes.c
   5 *
   6 * Copyright (C) IBM Corporation, 2002, 2004
   7 *
   8 * 2002-Oct     Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
   9 *              Probes initial implementation (includes suggestions from
  10 *              Rusty Russell).
  11 * 2004-Aug     Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
  12 *              hlists and exceptions notifier as suggested by Andi Kleen.
  13 * 2004-July    Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
  14 *              interface to access function arguments.
  15 * 2004-Sep     Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
  16 *              exceptions notifier to be first on the priority list.
  17 * 2005-May     Hien Nguyen <hien@us.ibm.com>, Jim Keniston
  18 *              <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
  19 *              <prasanna@in.ibm.com> added function-return probes.
  20 */
  21#include <linux/kprobes.h>
  22#include <linux/hash.h>
  23#include <linux/init.h>
  24#include <linux/slab.h>
  25#include <linux/stddef.h>
  26#include <linux/export.h>
  27#include <linux/moduleloader.h>
  28#include <linux/kallsyms.h>
  29#include <linux/freezer.h>
  30#include <linux/seq_file.h>
  31#include <linux/debugfs.h>
  32#include <linux/sysctl.h>
  33#include <linux/kdebug.h>
  34#include <linux/memory.h>
  35#include <linux/ftrace.h>
  36#include <linux/cpu.h>
  37#include <linux/jump_label.h>
  38
  39#include <asm/sections.h>
  40#include <asm/cacheflush.h>
  41#include <asm/errno.h>
  42#include <linux/uaccess.h>
  43
  44#define KPROBE_HASH_BITS 6
  45#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
  46
  47
  48static int kprobes_initialized;
  49static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
  50static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
  51
  52/* NOTE: change this value only with kprobe_mutex held */
  53static bool kprobes_all_disarmed;
  54
  55/* This protects kprobe_table and optimizing_list */
  56static DEFINE_MUTEX(kprobe_mutex);
  57static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
  58static struct {
  59        raw_spinlock_t lock ____cacheline_aligned_in_smp;
  60} kretprobe_table_locks[KPROBE_TABLE_SIZE];
  61
  62kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
  63                                        unsigned int __unused)
  64{
  65        return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
  66}
  67
  68static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
  69{
  70        return &(kretprobe_table_locks[hash].lock);
  71}
  72
  73/* Blacklist -- list of struct kprobe_blacklist_entry */
  74static LIST_HEAD(kprobe_blacklist);
  75
  76#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
  77/*
  78 * kprobe->ainsn.insn points to the copy of the instruction to be
  79 * single-stepped. x86_64, POWER4 and above have no-exec support and
  80 * stepping on the instruction on a vmalloced/kmalloced/data page
  81 * is a recipe for disaster
  82 */
  83struct kprobe_insn_page {
  84        struct list_head list;
  85        kprobe_opcode_t *insns;         /* Page of instruction slots */
  86        struct kprobe_insn_cache *cache;
  87        int nused;
  88        int ngarbage;
  89        char slot_used[];
  90};
  91
  92#define KPROBE_INSN_PAGE_SIZE(slots)                    \
  93        (offsetof(struct kprobe_insn_page, slot_used) + \
  94         (sizeof(char) * (slots)))
  95
  96static int slots_per_page(struct kprobe_insn_cache *c)
  97{
  98        return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
  99}
 100
 101enum kprobe_slot_state {
 102        SLOT_CLEAN = 0,
 103        SLOT_DIRTY = 1,
 104        SLOT_USED = 2,
 105};
 106
 107void __weak *alloc_insn_page(void)
 108{
 109        return module_alloc(PAGE_SIZE);
 110}
 111
 112void __weak free_insn_page(void *page)
 113{
 114        module_memfree(page);
 115}
 116
 117struct kprobe_insn_cache kprobe_insn_slots = {
 118        .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
 119        .alloc = alloc_insn_page,
 120        .free = free_insn_page,
 121        .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
 122        .insn_size = MAX_INSN_SIZE,
 123        .nr_garbage = 0,
 124};
 125static int collect_garbage_slots(struct kprobe_insn_cache *c);
 126
 127/**
 128 * __get_insn_slot() - Find a slot on an executable page for an instruction.
 129 * We allocate an executable page if there's no room on existing ones.
 130 */
 131kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
 132{
 133        struct kprobe_insn_page *kip;
 134        kprobe_opcode_t *slot = NULL;
 135
 136        /* Since the slot array is not protected by rcu, we need a mutex */
 137        mutex_lock(&c->mutex);
 138 retry:
 139        rcu_read_lock();
 140        list_for_each_entry_rcu(kip, &c->pages, list) {
 141                if (kip->nused < slots_per_page(c)) {
 142                        int i;
 143                        for (i = 0; i < slots_per_page(c); i++) {
 144                                if (kip->slot_used[i] == SLOT_CLEAN) {
 145                                        kip->slot_used[i] = SLOT_USED;
 146                                        kip->nused++;
 147                                        slot = kip->insns + (i * c->insn_size);
 148                                        rcu_read_unlock();
 149                                        goto out;
 150                                }
 151                        }
 152                        /* kip->nused is broken. Fix it. */
 153                        kip->nused = slots_per_page(c);
 154                        WARN_ON(1);
 155                }
 156        }
 157        rcu_read_unlock();
 158
 159        /* If there are any garbage slots, collect it and try again. */
 160        if (c->nr_garbage && collect_garbage_slots(c) == 0)
 161                goto retry;
 162
 163        /* All out of space.  Need to allocate a new page. */
 164        kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
 165        if (!kip)
 166                goto out;
 167
 168        /*
 169         * Use module_alloc so this page is within +/- 2GB of where the
 170         * kernel image and loaded module images reside. This is required
 171         * so x86_64 can correctly handle the %rip-relative fixups.
 172         */
 173        kip->insns = c->alloc();
 174        if (!kip->insns) {
 175                kfree(kip);
 176                goto out;
 177        }
 178        INIT_LIST_HEAD(&kip->list);
 179        memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
 180        kip->slot_used[0] = SLOT_USED;
 181        kip->nused = 1;
 182        kip->ngarbage = 0;
 183        kip->cache = c;
 184        list_add_rcu(&kip->list, &c->pages);
 185        slot = kip->insns;
 186out:
 187        mutex_unlock(&c->mutex);
 188        return slot;
 189}
 190
 191/* Return 1 if all garbages are collected, otherwise 0. */
 192static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
 193{
 194        kip->slot_used[idx] = SLOT_CLEAN;
 195        kip->nused--;
 196        if (kip->nused == 0) {
 197                /*
 198                 * Page is no longer in use.  Free it unless
 199                 * it's the last one.  We keep the last one
 200                 * so as not to have to set it up again the
 201                 * next time somebody inserts a probe.
 202                 */
 203                if (!list_is_singular(&kip->list)) {
 204                        list_del_rcu(&kip->list);
 205                        synchronize_rcu();
 206                        kip->cache->free(kip->insns);
 207                        kfree(kip);
 208                }
 209                return 1;
 210        }
 211        return 0;
 212}
 213
 214static int collect_garbage_slots(struct kprobe_insn_cache *c)
 215{
 216        struct kprobe_insn_page *kip, *next;
 217
 218        /* Ensure no-one is interrupted on the garbages */
 219        synchronize_rcu();
 220
 221        list_for_each_entry_safe(kip, next, &c->pages, list) {
 222                int i;
 223                if (kip->ngarbage == 0)
 224                        continue;
 225                kip->ngarbage = 0;      /* we will collect all garbages */
 226                for (i = 0; i < slots_per_page(c); i++) {
 227                        if (kip->slot_used[i] == SLOT_DIRTY && collect_one_slot(kip, i))
 228                                break;
 229                }
 230        }
 231        c->nr_garbage = 0;
 232        return 0;
 233}
 234
 235void __free_insn_slot(struct kprobe_insn_cache *c,
 236                      kprobe_opcode_t *slot, int dirty)
 237{
 238        struct kprobe_insn_page *kip;
 239        long idx;
 240
 241        mutex_lock(&c->mutex);
 242        rcu_read_lock();
 243        list_for_each_entry_rcu(kip, &c->pages, list) {
 244                idx = ((long)slot - (long)kip->insns) /
 245                        (c->insn_size * sizeof(kprobe_opcode_t));
 246                if (idx >= 0 && idx < slots_per_page(c))
 247                        goto out;
 248        }
 249        /* Could not find this slot. */
 250        WARN_ON(1);
 251        kip = NULL;
 252out:
 253        rcu_read_unlock();
 254        /* Mark and sweep: this may sleep */
 255        if (kip) {
 256                /* Check double free */
 257                WARN_ON(kip->slot_used[idx] != SLOT_USED);
 258                if (dirty) {
 259                        kip->slot_used[idx] = SLOT_DIRTY;
 260                        kip->ngarbage++;
 261                        if (++c->nr_garbage > slots_per_page(c))
 262                                collect_garbage_slots(c);
 263                } else {
 264                        collect_one_slot(kip, idx);
 265                }
 266        }
 267        mutex_unlock(&c->mutex);
 268}
 269
 270/*
 271 * Check given address is on the page of kprobe instruction slots.
 272 * This will be used for checking whether the address on a stack
 273 * is on a text area or not.
 274 */
 275bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
 276{
 277        struct kprobe_insn_page *kip;
 278        bool ret = false;
 279
 280        rcu_read_lock();
 281        list_for_each_entry_rcu(kip, &c->pages, list) {
 282                if (addr >= (unsigned long)kip->insns &&
 283                    addr < (unsigned long)kip->insns + PAGE_SIZE) {
 284                        ret = true;
 285                        break;
 286                }
 287        }
 288        rcu_read_unlock();
 289
 290        return ret;
 291}
 292
 293#ifdef CONFIG_OPTPROBES
 294/* For optimized_kprobe buffer */
 295struct kprobe_insn_cache kprobe_optinsn_slots = {
 296        .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
 297        .alloc = alloc_insn_page,
 298        .free = free_insn_page,
 299        .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
 300        /* .insn_size is initialized later */
 301        .nr_garbage = 0,
 302};
 303#endif
 304#endif
 305
 306/* We have preemption disabled.. so it is safe to use __ versions */
 307static inline void set_kprobe_instance(struct kprobe *kp)
 308{
 309        __this_cpu_write(kprobe_instance, kp);
 310}
 311
 312static inline void reset_kprobe_instance(void)
 313{
 314        __this_cpu_write(kprobe_instance, NULL);
 315}
 316
 317/*
 318 * This routine is called either:
 319 *      - under the kprobe_mutex - during kprobe_[un]register()
 320 *                              OR
 321 *      - with preemption disabled - from arch/xxx/kernel/kprobes.c
 322 */
 323struct kprobe *get_kprobe(void *addr)
 324{
 325        struct hlist_head *head;
 326        struct kprobe *p;
 327
 328        head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
 329        hlist_for_each_entry_rcu(p, head, hlist) {
 330                if (p->addr == addr)
 331                        return p;
 332        }
 333
 334        return NULL;
 335}
 336NOKPROBE_SYMBOL(get_kprobe);
 337
 338static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
 339
 340/* Return true if the kprobe is an aggregator */
 341static inline int kprobe_aggrprobe(struct kprobe *p)
 342{
 343        return p->pre_handler == aggr_pre_handler;
 344}
 345
 346/* Return true(!0) if the kprobe is unused */
 347static inline int kprobe_unused(struct kprobe *p)
 348{
 349        return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
 350               list_empty(&p->list);
 351}
 352
 353/*
 354 * Keep all fields in the kprobe consistent
 355 */
 356static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
 357{
 358        memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
 359        memcpy(&p->ainsn, &ap->ainsn, sizeof(struct arch_specific_insn));
 360}
 361
 362#ifdef CONFIG_OPTPROBES
 363/* NOTE: change this value only with kprobe_mutex held */
 364static bool kprobes_allow_optimization;
 365
 366/*
 367 * Call all pre_handler on the list, but ignores its return value.
 368 * This must be called from arch-dep optimized caller.
 369 */
 370void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
 371{
 372        struct kprobe *kp;
 373
 374        list_for_each_entry_rcu(kp, &p->list, list) {
 375                if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
 376                        set_kprobe_instance(kp);
 377                        kp->pre_handler(kp, regs);
 378                }
 379                reset_kprobe_instance();
 380        }
 381}
 382NOKPROBE_SYMBOL(opt_pre_handler);
 383
 384/* Free optimized instructions and optimized_kprobe */
 385static void free_aggr_kprobe(struct kprobe *p)
 386{
 387        struct optimized_kprobe *op;
 388
 389        op = container_of(p, struct optimized_kprobe, kp);
 390        arch_remove_optimized_kprobe(op);
 391        arch_remove_kprobe(p);
 392        kfree(op);
 393}
 394
 395/* Return true(!0) if the kprobe is ready for optimization. */
 396static inline int kprobe_optready(struct kprobe *p)
 397{
 398        struct optimized_kprobe *op;
 399
 400        if (kprobe_aggrprobe(p)) {
 401                op = container_of(p, struct optimized_kprobe, kp);
 402                return arch_prepared_optinsn(&op->optinsn);
 403        }
 404
 405        return 0;
 406}
 407
 408/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
 409static inline int kprobe_disarmed(struct kprobe *p)
 410{
 411        struct optimized_kprobe *op;
 412
 413        /* If kprobe is not aggr/opt probe, just return kprobe is disabled */
 414        if (!kprobe_aggrprobe(p))
 415                return kprobe_disabled(p);
 416
 417        op = container_of(p, struct optimized_kprobe, kp);
 418
 419        return kprobe_disabled(p) && list_empty(&op->list);
 420}
 421
 422/* Return true(!0) if the probe is queued on (un)optimizing lists */
 423static int kprobe_queued(struct kprobe *p)
 424{
 425        struct optimized_kprobe *op;
 426
 427        if (kprobe_aggrprobe(p)) {
 428                op = container_of(p, struct optimized_kprobe, kp);
 429                if (!list_empty(&op->list))
 430                        return 1;
 431        }
 432        return 0;
 433}
 434
 435/*
 436 * Return an optimized kprobe whose optimizing code replaces
 437 * instructions including addr (exclude breakpoint).
 438 */
 439static struct kprobe *get_optimized_kprobe(unsigned long addr)
 440{
 441        int i;
 442        struct kprobe *p = NULL;
 443        struct optimized_kprobe *op;
 444
 445        /* Don't check i == 0, since that is a breakpoint case. */
 446        for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++)
 447                p = get_kprobe((void *)(addr - i));
 448
 449        if (p && kprobe_optready(p)) {
 450                op = container_of(p, struct optimized_kprobe, kp);
 451                if (arch_within_optimized_kprobe(op, addr))
 452                        return p;
 453        }
 454
 455        return NULL;
 456}
 457
 458/* Optimization staging list, protected by kprobe_mutex */
 459static LIST_HEAD(optimizing_list);
 460static LIST_HEAD(unoptimizing_list);
 461static LIST_HEAD(freeing_list);
 462
 463static void kprobe_optimizer(struct work_struct *work);
 464static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
 465#define OPTIMIZE_DELAY 5
 466
 467/*
 468 * Optimize (replace a breakpoint with a jump) kprobes listed on
 469 * optimizing_list.
 470 */
 471static void do_optimize_kprobes(void)
 472{
 473        lockdep_assert_held(&text_mutex);
 474        /*
 475         * The optimization/unoptimization refers online_cpus via
 476         * stop_machine() and cpu-hotplug modifies online_cpus.
 477         * And same time, text_mutex will be held in cpu-hotplug and here.
 478         * This combination can cause a deadlock (cpu-hotplug try to lock
 479         * text_mutex but stop_machine can not be done because online_cpus
 480         * has been changed)
 481         * To avoid this deadlock, caller must have locked cpu hotplug
 482         * for preventing cpu-hotplug outside of text_mutex locking.
 483         */
 484        lockdep_assert_cpus_held();
 485
 486        /* Optimization never be done when disarmed */
 487        if (kprobes_all_disarmed || !kprobes_allow_optimization ||
 488            list_empty(&optimizing_list))
 489                return;
 490
 491        arch_optimize_kprobes(&optimizing_list);
 492}
 493
 494/*
 495 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
 496 * if need) kprobes listed on unoptimizing_list.
 497 */
 498static void do_unoptimize_kprobes(void)
 499{
 500        struct optimized_kprobe *op, *tmp;
 501
 502        lockdep_assert_held(&text_mutex);
 503        /* See comment in do_optimize_kprobes() */
 504        lockdep_assert_cpus_held();
 505
 506        /* Unoptimization must be done anytime */
 507        if (list_empty(&unoptimizing_list))
 508                return;
 509
 510        arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
 511        /* Loop free_list for disarming */
 512        list_for_each_entry_safe(op, tmp, &freeing_list, list) {
 513                /* Switching from detour code to origin */
 514                op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
 515                /* Disarm probes if marked disabled */
 516                if (kprobe_disabled(&op->kp))
 517                        arch_disarm_kprobe(&op->kp);
 518                if (kprobe_unused(&op->kp)) {
 519                        /*
 520                         * Remove unused probes from hash list. After waiting
 521                         * for synchronization, these probes are reclaimed.
 522                         * (reclaiming is done by do_free_cleaned_kprobes.)
 523                         */
 524                        hlist_del_rcu(&op->kp.hlist);
 525                } else
 526                        list_del_init(&op->list);
 527        }
 528}
 529
 530/* Reclaim all kprobes on the free_list */
 531static void do_free_cleaned_kprobes(void)
 532{
 533        struct optimized_kprobe *op, *tmp;
 534
 535        list_for_each_entry_safe(op, tmp, &freeing_list, list) {
 536                list_del_init(&op->list);
 537                if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) {
 538                        /*
 539                         * This must not happen, but if there is a kprobe
 540                         * still in use, keep it on kprobes hash list.
 541                         */
 542                        continue;
 543                }
 544                free_aggr_kprobe(&op->kp);
 545        }
 546}
 547
 548/* Start optimizer after OPTIMIZE_DELAY passed */
 549static void kick_kprobe_optimizer(void)
 550{
 551        schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
 552}
 553
 554/* Kprobe jump optimizer */
 555static void kprobe_optimizer(struct work_struct *work)
 556{
 557        mutex_lock(&kprobe_mutex);
 558        cpus_read_lock();
 559        mutex_lock(&text_mutex);
 560        /* Lock modules while optimizing kprobes */
 561        mutex_lock(&module_mutex);
 562
 563        /*
 564         * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
 565         * kprobes before waiting for quiesence period.
 566         */
 567        do_unoptimize_kprobes();
 568
 569        /*
 570         * Step 2: Wait for quiesence period to ensure all potentially
 571         * preempted tasks to have normally scheduled. Because optprobe
 572         * may modify multiple instructions, there is a chance that Nth
 573         * instruction is preempted. In that case, such tasks can return
 574         * to 2nd-Nth byte of jump instruction. This wait is for avoiding it.
 575         * Note that on non-preemptive kernel, this is transparently converted
 576         * to synchronoze_sched() to wait for all interrupts to have completed.
 577         */
 578        synchronize_rcu_tasks();
 579
 580        /* Step 3: Optimize kprobes after quiesence period */
 581        do_optimize_kprobes();
 582
 583        /* Step 4: Free cleaned kprobes after quiesence period */
 584        do_free_cleaned_kprobes();
 585
 586        mutex_unlock(&module_mutex);
 587        mutex_unlock(&text_mutex);
 588        cpus_read_unlock();
 589        mutex_unlock(&kprobe_mutex);
 590
 591        /* Step 5: Kick optimizer again if needed */
 592        if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
 593                kick_kprobe_optimizer();
 594}
 595
 596/* Wait for completing optimization and unoptimization */
 597void wait_for_kprobe_optimizer(void)
 598{
 599        mutex_lock(&kprobe_mutex);
 600
 601        while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
 602                mutex_unlock(&kprobe_mutex);
 603
 604                /* this will also make optimizing_work execute immmediately */
 605                flush_delayed_work(&optimizing_work);
 606                /* @optimizing_work might not have been queued yet, relax */
 607                cpu_relax();
 608
 609                mutex_lock(&kprobe_mutex);
 610        }
 611
 612        mutex_unlock(&kprobe_mutex);
 613}
 614
 615static bool optprobe_queued_unopt(struct optimized_kprobe *op)
 616{
 617        struct optimized_kprobe *_op;
 618
 619        list_for_each_entry(_op, &unoptimizing_list, list) {
 620                if (op == _op)
 621                        return true;
 622        }
 623
 624        return false;
 625}
 626
 627/* Optimize kprobe if p is ready to be optimized */
 628static void optimize_kprobe(struct kprobe *p)
 629{
 630        struct optimized_kprobe *op;
 631
 632        /* Check if the kprobe is disabled or not ready for optimization. */
 633        if (!kprobe_optready(p) || !kprobes_allow_optimization ||
 634            (kprobe_disabled(p) || kprobes_all_disarmed))
 635                return;
 636
 637        /* kprobes with post_handler can not be optimized */
 638        if (p->post_handler)
 639                return;
 640
 641        op = container_of(p, struct optimized_kprobe, kp);
 642
 643        /* Check there is no other kprobes at the optimized instructions */
 644        if (arch_check_optimized_kprobe(op) < 0)
 645                return;
 646
 647        /* Check if it is already optimized. */
 648        if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) {
 649                if (optprobe_queued_unopt(op)) {
 650                        /* This is under unoptimizing. Just dequeue the probe */
 651                        list_del_init(&op->list);
 652                }
 653                return;
 654        }
 655        op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
 656
 657        /* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */
 658        if (WARN_ON_ONCE(!list_empty(&op->list)))
 659                return;
 660
 661        list_add(&op->list, &optimizing_list);
 662        kick_kprobe_optimizer();
 663}
 664
 665/* Short cut to direct unoptimizing */
 666static void force_unoptimize_kprobe(struct optimized_kprobe *op)
 667{
 668        lockdep_assert_cpus_held();
 669        arch_unoptimize_kprobe(op);
 670        op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
 671        if (kprobe_disabled(&op->kp))
 672                arch_disarm_kprobe(&op->kp);
 673}
 674
 675/* Unoptimize a kprobe if p is optimized */
 676static void unoptimize_kprobe(struct kprobe *p, bool force)
 677{
 678        struct optimized_kprobe *op;
 679
 680        if (!kprobe_aggrprobe(p) || kprobe_disarmed(p))
 681                return; /* This is not an optprobe nor optimized */
 682
 683        op = container_of(p, struct optimized_kprobe, kp);
 684        if (!kprobe_optimized(p))
 685                return;
 686
 687        if (!list_empty(&op->list)) {
 688                if (optprobe_queued_unopt(op)) {
 689                        /* Queued in unoptimizing queue */
 690                        if (force) {
 691                                /*
 692                                 * Forcibly unoptimize the kprobe here, and queue it
 693                                 * in the freeing list for release afterwards.
 694                                 */
 695                                force_unoptimize_kprobe(op);
 696                                list_move(&op->list, &freeing_list);
 697                        }
 698                } else {
 699                        /* Dequeue from the optimizing queue */
 700                        list_del_init(&op->list);
 701                        op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
 702                }
 703                return;
 704        }
 705
 706        /* Optimized kprobe case */
 707        if (force) {
 708                /* Forcibly update the code: this is a special case */
 709                force_unoptimize_kprobe(op);
 710        } else {
 711                list_add(&op->list, &unoptimizing_list);
 712                kick_kprobe_optimizer();
 713        }
 714}
 715
 716/* Cancel unoptimizing for reusing */
 717static int reuse_unused_kprobe(struct kprobe *ap)
 718{
 719        struct optimized_kprobe *op;
 720
 721        /*
 722         * Unused kprobe MUST be on the way of delayed unoptimizing (means
 723         * there is still a relative jump) and disabled.
 724         */
 725        op = container_of(ap, struct optimized_kprobe, kp);
 726        WARN_ON_ONCE(list_empty(&op->list));
 727        /* Enable the probe again */
 728        ap->flags &= ~KPROBE_FLAG_DISABLED;
 729        /* Optimize it again (remove from op->list) */
 730        if (!kprobe_optready(ap))
 731                return -EINVAL;
 732
 733        optimize_kprobe(ap);
 734        return 0;
 735}
 736
 737/* Remove optimized instructions */
 738static void kill_optimized_kprobe(struct kprobe *p)
 739{
 740        struct optimized_kprobe *op;
 741
 742        op = container_of(p, struct optimized_kprobe, kp);
 743        if (!list_empty(&op->list))
 744                /* Dequeue from the (un)optimization queue */
 745                list_del_init(&op->list);
 746        op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
 747
 748        if (kprobe_unused(p)) {
 749                /* Enqueue if it is unused */
 750                list_add(&op->list, &freeing_list);
 751                /*
 752                 * Remove unused probes from the hash list. After waiting
 753                 * for synchronization, this probe is reclaimed.
 754                 * (reclaiming is done by do_free_cleaned_kprobes().)
 755                 */
 756                hlist_del_rcu(&op->kp.hlist);
 757        }
 758
 759        /* Don't touch the code, because it is already freed. */
 760        arch_remove_optimized_kprobe(op);
 761}
 762
 763static inline
 764void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
 765{
 766        if (!kprobe_ftrace(p))
 767                arch_prepare_optimized_kprobe(op, p);
 768}
 769
 770/* Try to prepare optimized instructions */
 771static void prepare_optimized_kprobe(struct kprobe *p)
 772{
 773        struct optimized_kprobe *op;
 774
 775        op = container_of(p, struct optimized_kprobe, kp);
 776        __prepare_optimized_kprobe(op, p);
 777}
 778
 779/* Allocate new optimized_kprobe and try to prepare optimized instructions */
 780static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 781{
 782        struct optimized_kprobe *op;
 783
 784        op = kzalloc(sizeof(struct optimized_kprobe), GFP_KERNEL);
 785        if (!op)
 786                return NULL;
 787
 788        INIT_LIST_HEAD(&op->list);
 789        op->kp.addr = p->addr;
 790        __prepare_optimized_kprobe(op, p);
 791
 792        return &op->kp;
 793}
 794
 795static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
 796
 797/*
 798 * Prepare an optimized_kprobe and optimize it
 799 * NOTE: p must be a normal registered kprobe
 800 */
 801static void try_to_optimize_kprobe(struct kprobe *p)
 802{
 803        struct kprobe *ap;
 804        struct optimized_kprobe *op;
 805
 806        /* Impossible to optimize ftrace-based kprobe */
 807        if (kprobe_ftrace(p))
 808                return;
 809
 810        /* For preparing optimization, jump_label_text_reserved() is called */
 811        cpus_read_lock();
 812        jump_label_lock();
 813        mutex_lock(&text_mutex);
 814
 815        ap = alloc_aggr_kprobe(p);
 816        if (!ap)
 817                goto out;
 818
 819        op = container_of(ap, struct optimized_kprobe, kp);
 820        if (!arch_prepared_optinsn(&op->optinsn)) {
 821                /* If failed to setup optimizing, fallback to kprobe */
 822                arch_remove_optimized_kprobe(op);
 823                kfree(op);
 824                goto out;
 825        }
 826
 827        init_aggr_kprobe(ap, p);
 828        optimize_kprobe(ap);    /* This just kicks optimizer thread */
 829
 830out:
 831        mutex_unlock(&text_mutex);
 832        jump_label_unlock();
 833        cpus_read_unlock();
 834}
 835
 836#ifdef CONFIG_SYSCTL
 837static void optimize_all_kprobes(void)
 838{
 839        struct hlist_head *head;
 840        struct kprobe *p;
 841        unsigned int i;
 842
 843        mutex_lock(&kprobe_mutex);
 844        /* If optimization is already allowed, just return */
 845        if (kprobes_allow_optimization)
 846                goto out;
 847
 848        cpus_read_lock();
 849        kprobes_allow_optimization = true;
 850        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 851                head = &kprobe_table[i];
 852                hlist_for_each_entry_rcu(p, head, hlist)
 853                        if (!kprobe_disabled(p))
 854                                optimize_kprobe(p);
 855        }
 856        cpus_read_unlock();
 857        printk(KERN_INFO "Kprobes globally optimized\n");
 858out:
 859        mutex_unlock(&kprobe_mutex);
 860}
 861
 862static void unoptimize_all_kprobes(void)
 863{
 864        struct hlist_head *head;
 865        struct kprobe *p;
 866        unsigned int i;
 867
 868        mutex_lock(&kprobe_mutex);
 869        /* If optimization is already prohibited, just return */
 870        if (!kprobes_allow_optimization) {
 871                mutex_unlock(&kprobe_mutex);
 872                return;
 873        }
 874
 875        cpus_read_lock();
 876        kprobes_allow_optimization = false;
 877        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 878                head = &kprobe_table[i];
 879                hlist_for_each_entry_rcu(p, head, hlist) {
 880                        if (!kprobe_disabled(p))
 881                                unoptimize_kprobe(p, false);
 882                }
 883        }
 884        cpus_read_unlock();
 885        mutex_unlock(&kprobe_mutex);
 886
 887        /* Wait for unoptimizing completion */
 888        wait_for_kprobe_optimizer();
 889        printk(KERN_INFO "Kprobes globally unoptimized\n");
 890}
 891
 892static DEFINE_MUTEX(kprobe_sysctl_mutex);
 893int sysctl_kprobes_optimization;
 894int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
 895                                      void __user *buffer, size_t *length,
 896                                      loff_t *ppos)
 897{
 898        int ret;
 899
 900        mutex_lock(&kprobe_sysctl_mutex);
 901        sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
 902        ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
 903
 904        if (sysctl_kprobes_optimization)
 905                optimize_all_kprobes();
 906        else
 907                unoptimize_all_kprobes();
 908        mutex_unlock(&kprobe_sysctl_mutex);
 909
 910        return ret;
 911}
 912#endif /* CONFIG_SYSCTL */
 913
 914/* Put a breakpoint for a probe. Must be called with text_mutex locked */
 915static void __arm_kprobe(struct kprobe *p)
 916{
 917        struct kprobe *_p;
 918
 919        /* Check collision with other optimized kprobes */
 920        _p = get_optimized_kprobe((unsigned long)p->addr);
 921        if (unlikely(_p))
 922                /* Fallback to unoptimized kprobe */
 923                unoptimize_kprobe(_p, true);
 924
 925        arch_arm_kprobe(p);
 926        optimize_kprobe(p);     /* Try to optimize (add kprobe to a list) */
 927}
 928
 929/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
 930static void __disarm_kprobe(struct kprobe *p, bool reopt)
 931{
 932        struct kprobe *_p;
 933
 934        /* Try to unoptimize */
 935        unoptimize_kprobe(p, kprobes_all_disarmed);
 936
 937        if (!kprobe_queued(p)) {
 938                arch_disarm_kprobe(p);
 939                /* If another kprobe was blocked, optimize it. */
 940                _p = get_optimized_kprobe((unsigned long)p->addr);
 941                if (unlikely(_p) && reopt)
 942                        optimize_kprobe(_p);
 943        }
 944        /* TODO: reoptimize others after unoptimized this probe */
 945}
 946
 947#else /* !CONFIG_OPTPROBES */
 948
 949#define optimize_kprobe(p)                      do {} while (0)
 950#define unoptimize_kprobe(p, f)                 do {} while (0)
 951#define kill_optimized_kprobe(p)                do {} while (0)
 952#define prepare_optimized_kprobe(p)             do {} while (0)
 953#define try_to_optimize_kprobe(p)               do {} while (0)
 954#define __arm_kprobe(p)                         arch_arm_kprobe(p)
 955#define __disarm_kprobe(p, o)                   arch_disarm_kprobe(p)
 956#define kprobe_disarmed(p)                      kprobe_disabled(p)
 957#define wait_for_kprobe_optimizer()             do {} while (0)
 958
 959static int reuse_unused_kprobe(struct kprobe *ap)
 960{
 961        /*
 962         * If the optimized kprobe is NOT supported, the aggr kprobe is
 963         * released at the same time that the last aggregated kprobe is
 964         * unregistered.
 965         * Thus there should be no chance to reuse unused kprobe.
 966         */
 967        printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
 968        return -EINVAL;
 969}
 970
 971static void free_aggr_kprobe(struct kprobe *p)
 972{
 973        arch_remove_kprobe(p);
 974        kfree(p);
 975}
 976
 977static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 978{
 979        return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
 980}
 981#endif /* CONFIG_OPTPROBES */
 982
 983#ifdef CONFIG_KPROBES_ON_FTRACE
 984static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
 985        .func = kprobe_ftrace_handler,
 986        .flags = FTRACE_OPS_FL_SAVE_REGS,
 987};
 988
 989static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
 990        .func = kprobe_ftrace_handler,
 991        .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY,
 992};
 993
 994static int kprobe_ipmodify_enabled;
 995static int kprobe_ftrace_enabled;
 996
 997/* Must ensure p->addr is really on ftrace */
 998static int prepare_kprobe(struct kprobe *p)
 999{
1000        if (!kprobe_ftrace(p))
1001                return arch_prepare_kprobe(p);
1002
1003        return arch_prepare_kprobe_ftrace(p);
1004}
1005
1006/* Caller must lock kprobe_mutex */
1007static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1008                               int *cnt)
1009{
1010        int ret = 0;
1011
1012        ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0);
1013        if (ret) {
1014                pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n",
1015                         p->addr, ret);
1016                return ret;
1017        }
1018
1019        if (*cnt == 0) {
1020                ret = register_ftrace_function(ops);
1021                if (ret) {
1022                        pr_debug("Failed to init kprobe-ftrace (%d)\n", ret);
1023                        goto err_ftrace;
1024                }
1025        }
1026
1027        (*cnt)++;
1028        return ret;
1029
1030err_ftrace:
1031        /*
1032         * At this point, sinec ops is not registered, we should be sefe from
1033         * registering empty filter.
1034         */
1035        ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1036        return ret;
1037}
1038
1039static int arm_kprobe_ftrace(struct kprobe *p)
1040{
1041        bool ipmodify = (p->post_handler != NULL);
1042
1043        return __arm_kprobe_ftrace(p,
1044                ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1045                ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1046}
1047
1048/* Caller must lock kprobe_mutex */
1049static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
1050                                  int *cnt)
1051{
1052        int ret = 0;
1053
1054        if (*cnt == 1) {
1055                ret = unregister_ftrace_function(ops);
1056                if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret))
1057                        return ret;
1058        }
1059
1060        (*cnt)--;
1061
1062        ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0);
1063        WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n",
1064                  p->addr, ret);
1065        return ret;
1066}
1067
1068static int disarm_kprobe_ftrace(struct kprobe *p)
1069{
1070        bool ipmodify = (p->post_handler != NULL);
1071
1072        return __disarm_kprobe_ftrace(p,
1073                ipmodify ? &kprobe_ipmodify_ops : &kprobe_ftrace_ops,
1074                ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
1075}
1076#else   /* !CONFIG_KPROBES_ON_FTRACE */
1077#define prepare_kprobe(p)       arch_prepare_kprobe(p)
1078#define arm_kprobe_ftrace(p)    (-ENODEV)
1079#define disarm_kprobe_ftrace(p) (-ENODEV)
1080#endif
1081
1082/* Arm a kprobe with text_mutex */
1083static int arm_kprobe(struct kprobe *kp)
1084{
1085        if (unlikely(kprobe_ftrace(kp)))
1086                return arm_kprobe_ftrace(kp);
1087
1088        cpus_read_lock();
1089        mutex_lock(&text_mutex);
1090        __arm_kprobe(kp);
1091        mutex_unlock(&text_mutex);
1092        cpus_read_unlock();
1093
1094        return 0;
1095}
1096
1097/* Disarm a kprobe with text_mutex */
1098static int disarm_kprobe(struct kprobe *kp, bool reopt)
1099{
1100        if (unlikely(kprobe_ftrace(kp)))
1101                return disarm_kprobe_ftrace(kp);
1102
1103        cpus_read_lock();
1104        mutex_lock(&text_mutex);
1105        __disarm_kprobe(kp, reopt);
1106        mutex_unlock(&text_mutex);
1107        cpus_read_unlock();
1108
1109        return 0;
1110}
1111
1112/*
1113 * Aggregate handlers for multiple kprobes support - these handlers
1114 * take care of invoking the individual kprobe handlers on p->list
1115 */
1116static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
1117{
1118        struct kprobe *kp;
1119
1120        list_for_each_entry_rcu(kp, &p->list, list) {
1121                if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
1122                        set_kprobe_instance(kp);
1123                        if (kp->pre_handler(kp, regs))
1124                                return 1;
1125                }
1126                reset_kprobe_instance();
1127        }
1128        return 0;
1129}
1130NOKPROBE_SYMBOL(aggr_pre_handler);
1131
1132static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
1133                              unsigned long flags)
1134{
1135        struct kprobe *kp;
1136
1137        list_for_each_entry_rcu(kp, &p->list, list) {
1138                if (kp->post_handler && likely(!kprobe_disabled(kp))) {
1139                        set_kprobe_instance(kp);
1140                        kp->post_handler(kp, regs, flags);
1141                        reset_kprobe_instance();
1142                }
1143        }
1144}
1145NOKPROBE_SYMBOL(aggr_post_handler);
1146
1147static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
1148                              int trapnr)
1149{
1150        struct kprobe *cur = __this_cpu_read(kprobe_instance);
1151
1152        /*
1153         * if we faulted "during" the execution of a user specified
1154         * probe handler, invoke just that probe's fault handler
1155         */
1156        if (cur && cur->fault_handler) {
1157                if (cur->fault_handler(cur, regs, trapnr))
1158                        return 1;
1159        }
1160        return 0;
1161}
1162NOKPROBE_SYMBOL(aggr_fault_handler);
1163
1164/* Walks the list and increments nmissed count for multiprobe case */
1165void kprobes_inc_nmissed_count(struct kprobe *p)
1166{
1167        struct kprobe *kp;
1168        if (!kprobe_aggrprobe(p)) {
1169                p->nmissed++;
1170        } else {
1171                list_for_each_entry_rcu(kp, &p->list, list)
1172                        kp->nmissed++;
1173        }
1174        return;
1175}
1176NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
1177
1178void recycle_rp_inst(struct kretprobe_instance *ri,
1179                     struct hlist_head *head)
1180{
1181        struct kretprobe *rp = ri->rp;
1182
1183        /* remove rp inst off the rprobe_inst_table */
1184        hlist_del(&ri->hlist);
1185        INIT_HLIST_NODE(&ri->hlist);
1186        if (likely(rp)) {
1187                raw_spin_lock(&rp->lock);
1188                hlist_add_head(&ri->hlist, &rp->free_instances);
1189                raw_spin_unlock(&rp->lock);
1190        } else
1191                /* Unregistering */
1192                hlist_add_head(&ri->hlist, head);
1193}
1194NOKPROBE_SYMBOL(recycle_rp_inst);
1195
1196void kretprobe_hash_lock(struct task_struct *tsk,
1197                         struct hlist_head **head, unsigned long *flags)
1198__acquires(hlist_lock)
1199{
1200        unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
1201        raw_spinlock_t *hlist_lock;
1202
1203        *head = &kretprobe_inst_table[hash];
1204        hlist_lock = kretprobe_table_lock_ptr(hash);
1205        raw_spin_lock_irqsave(hlist_lock, *flags);
1206}
1207NOKPROBE_SYMBOL(kretprobe_hash_lock);
1208
1209static void kretprobe_table_lock(unsigned long hash,
1210                                 unsigned long *flags)
1211__acquires(hlist_lock)
1212{
1213        raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
1214        raw_spin_lock_irqsave(hlist_lock, *flags);
1215}
1216NOKPROBE_SYMBOL(kretprobe_table_lock);
1217
1218void kretprobe_hash_unlock(struct task_struct *tsk,
1219                           unsigned long *flags)
1220__releases(hlist_lock)
1221{
1222        unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
1223        raw_spinlock_t *hlist_lock;
1224
1225        hlist_lock = kretprobe_table_lock_ptr(hash);
1226        raw_spin_unlock_irqrestore(hlist_lock, *flags);
1227}
1228NOKPROBE_SYMBOL(kretprobe_hash_unlock);
1229
1230static void kretprobe_table_unlock(unsigned long hash,
1231                                   unsigned long *flags)
1232__releases(hlist_lock)
1233{
1234        raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
1235        raw_spin_unlock_irqrestore(hlist_lock, *flags);
1236}
1237NOKPROBE_SYMBOL(kretprobe_table_unlock);
1238
1239/*
1240 * This function is called from finish_task_switch when task tk becomes dead,
1241 * so that we can recycle any function-return probe instances associated
1242 * with this task. These left over instances represent probed functions
1243 * that have been called but will never return.
1244 */
1245void kprobe_flush_task(struct task_struct *tk)
1246{
1247        struct kretprobe_instance *ri;
1248        struct hlist_head *head, empty_rp;
1249        struct hlist_node *tmp;
1250        unsigned long hash, flags = 0;
1251
1252        if (unlikely(!kprobes_initialized))
1253                /* Early boot.  kretprobe_table_locks not yet initialized. */
1254                return;
1255
1256        INIT_HLIST_HEAD(&empty_rp);
1257        hash = hash_ptr(tk, KPROBE_HASH_BITS);
1258        head = &kretprobe_inst_table[hash];
1259        kretprobe_table_lock(hash, &flags);
1260        hlist_for_each_entry_safe(ri, tmp, head, hlist) {
1261                if (ri->task == tk)
1262                        recycle_rp_inst(ri, &empty_rp);
1263        }
1264        kretprobe_table_unlock(hash, &flags);
1265        hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
1266                hlist_del(&ri->hlist);
1267                kfree(ri);
1268        }
1269}
1270NOKPROBE_SYMBOL(kprobe_flush_task);
1271
1272static inline void free_rp_inst(struct kretprobe *rp)
1273{
1274        struct kretprobe_instance *ri;
1275        struct hlist_node *next;
1276
1277        hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) {
1278                hlist_del(&ri->hlist);
1279                kfree(ri);
1280        }
1281}
1282
1283static void cleanup_rp_inst(struct kretprobe *rp)
1284{
1285        unsigned long flags, hash;
1286        struct kretprobe_instance *ri;
1287        struct hlist_node *next;
1288        struct hlist_head *head;
1289
1290        /* No race here */
1291        for (hash = 0; hash < KPROBE_TABLE_SIZE; hash++) {
1292                kretprobe_table_lock(hash, &flags);
1293                head = &kretprobe_inst_table[hash];
1294                hlist_for_each_entry_safe(ri, next, head, hlist) {
1295                        if (ri->rp == rp)
1296                                ri->rp = NULL;
1297                }
1298                kretprobe_table_unlock(hash, &flags);
1299        }
1300        free_rp_inst(rp);
1301}
1302NOKPROBE_SYMBOL(cleanup_rp_inst);
1303
1304/* Add the new probe to ap->list */
1305static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
1306{
1307        if (p->post_handler)
1308                unoptimize_kprobe(ap, true);    /* Fall back to normal kprobe */
1309
1310        list_add_rcu(&p->list, &ap->list);
1311        if (p->post_handler && !ap->post_handler)
1312                ap->post_handler = aggr_post_handler;
1313
1314        return 0;
1315}
1316
1317/*
1318 * Fill in the required fields of the "manager kprobe". Replace the
1319 * earlier kprobe in the hlist with the manager kprobe
1320 */
1321static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
1322{
1323        /* Copy p's insn slot to ap */
1324        copy_kprobe(p, ap);
1325        flush_insn_slot(ap);
1326        ap->addr = p->addr;
1327        ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
1328        ap->pre_handler = aggr_pre_handler;
1329        ap->fault_handler = aggr_fault_handler;
1330        /* We don't care the kprobe which has gone. */
1331        if (p->post_handler && !kprobe_gone(p))
1332                ap->post_handler = aggr_post_handler;
1333
1334        INIT_LIST_HEAD(&ap->list);
1335        INIT_HLIST_NODE(&ap->hlist);
1336
1337        list_add_rcu(&p->list, &ap->list);
1338        hlist_replace_rcu(&p->hlist, &ap->hlist);
1339}
1340
1341/*
1342 * This is the second or subsequent kprobe at the address - handle
1343 * the intricacies
1344 */
1345static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
1346{
1347        int ret = 0;
1348        struct kprobe *ap = orig_p;
1349
1350        cpus_read_lock();
1351
1352        /* For preparing optimization, jump_label_text_reserved() is called */
1353        jump_label_lock();
1354        mutex_lock(&text_mutex);
1355
1356        if (!kprobe_aggrprobe(orig_p)) {
1357                /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
1358                ap = alloc_aggr_kprobe(orig_p);
1359                if (!ap) {
1360                        ret = -ENOMEM;
1361                        goto out;
1362                }
1363                init_aggr_kprobe(ap, orig_p);
1364        } else if (kprobe_unused(ap)) {
1365                /* This probe is going to die. Rescue it */
1366                ret = reuse_unused_kprobe(ap);
1367                if (ret)
1368                        goto out;
1369        }
1370
1371        if (kprobe_gone(ap)) {
1372                /*
1373                 * Attempting to insert new probe at the same location that
1374                 * had a probe in the module vaddr area which already
1375                 * freed. So, the instruction slot has already been
1376                 * released. We need a new slot for the new probe.
1377                 */
1378                ret = arch_prepare_kprobe(ap);
1379                if (ret)
1380                        /*
1381                         * Even if fail to allocate new slot, don't need to
1382                         * free aggr_probe. It will be used next time, or
1383                         * freed by unregister_kprobe.
1384                         */
1385                        goto out;
1386
1387                /* Prepare optimized instructions if possible. */
1388                prepare_optimized_kprobe(ap);
1389
1390                /*
1391                 * Clear gone flag to prevent allocating new slot again, and
1392                 * set disabled flag because it is not armed yet.
1393                 */
1394                ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
1395                            | KPROBE_FLAG_DISABLED;
1396        }
1397
1398        /* Copy ap's insn slot to p */
1399        copy_kprobe(ap, p);
1400        ret = add_new_kprobe(ap, p);
1401
1402out:
1403        mutex_unlock(&text_mutex);
1404        jump_label_unlock();
1405        cpus_read_unlock();
1406
1407        if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
1408                ap->flags &= ~KPROBE_FLAG_DISABLED;
1409                if (!kprobes_all_disarmed) {
1410                        /* Arm the breakpoint again. */
1411                        ret = arm_kprobe(ap);
1412                        if (ret) {
1413                                ap->flags |= KPROBE_FLAG_DISABLED;
1414                                list_del_rcu(&p->list);
1415                                synchronize_rcu();
1416                        }
1417                }
1418        }
1419        return ret;
1420}
1421
1422bool __weak arch_within_kprobe_blacklist(unsigned long addr)
1423{
1424        /* The __kprobes marked functions and entry code must not be probed */
1425        return addr >= (unsigned long)__kprobes_text_start &&
1426               addr < (unsigned long)__kprobes_text_end;
1427}
1428
1429static bool __within_kprobe_blacklist(unsigned long addr)
1430{
1431        struct kprobe_blacklist_entry *ent;
1432
1433        if (arch_within_kprobe_blacklist(addr))
1434                return true;
1435        /*
1436         * If there exists a kprobe_blacklist, verify and
1437         * fail any probe registration in the prohibited area
1438         */
1439        list_for_each_entry(ent, &kprobe_blacklist, list) {
1440                if (addr >= ent->start_addr && addr < ent->end_addr)
1441                        return true;
1442        }
1443        return false;
1444}
1445
1446bool within_kprobe_blacklist(unsigned long addr)
1447{
1448        char symname[KSYM_NAME_LEN], *p;
1449
1450        if (__within_kprobe_blacklist(addr))
1451                return true;
1452
1453        /* Check if the address is on a suffixed-symbol */
1454        if (!lookup_symbol_name(addr, symname)) {
1455                p = strchr(symname, '.');
1456                if (!p)
1457                        return false;
1458                *p = '\0';
1459                addr = (unsigned long)kprobe_lookup_name(symname, 0);
1460                if (addr)
1461                        return __within_kprobe_blacklist(addr);
1462        }
1463        return false;
1464}
1465
1466/*
1467 * If we have a symbol_name argument, look it up and add the offset field
1468 * to it. This way, we can specify a relative address to a symbol.
1469 * This returns encoded errors if it fails to look up symbol or invalid
1470 * combination of parameters.
1471 */
1472static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr,
1473                        const char *symbol_name, unsigned int offset)
1474{
1475        if ((symbol_name && addr) || (!symbol_name && !addr))
1476                goto invalid;
1477
1478        if (symbol_name) {
1479                addr = kprobe_lookup_name(symbol_name, offset);
1480                if (!addr)
1481                        return ERR_PTR(-ENOENT);
1482        }
1483
1484        addr = (kprobe_opcode_t *)(((char *)addr) + offset);
1485        if (addr)
1486                return addr;
1487
1488invalid:
1489        return ERR_PTR(-EINVAL);
1490}
1491
1492static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
1493{
1494        return _kprobe_addr(p->addr, p->symbol_name, p->offset);
1495}
1496
1497/* Check passed kprobe is valid and return kprobe in kprobe_table. */
1498static struct kprobe *__get_valid_kprobe(struct kprobe *p)
1499{
1500        struct kprobe *ap, *list_p;
1501
1502        ap = get_kprobe(p->addr);
1503        if (unlikely(!ap))
1504                return NULL;
1505
1506        if (p != ap) {
1507                list_for_each_entry_rcu(list_p, &ap->list, list)
1508                        if (list_p == p)
1509                        /* kprobe p is a valid probe */
1510                                goto valid;
1511                return NULL;
1512        }
1513valid:
1514        return ap;
1515}
1516
1517/* Return error if the kprobe is being re-registered */
1518static inline int check_kprobe_rereg(struct kprobe *p)
1519{
1520        int ret = 0;
1521
1522        mutex_lock(&kprobe_mutex);
1523        if (__get_valid_kprobe(p))
1524                ret = -EINVAL;
1525        mutex_unlock(&kprobe_mutex);
1526
1527        return ret;
1528}
1529
1530int __weak arch_check_ftrace_location(struct kprobe *p)
1531{
1532        unsigned long ftrace_addr;
1533
1534        ftrace_addr = ftrace_location((unsigned long)p->addr);
1535        if (ftrace_addr) {
1536#ifdef CONFIG_KPROBES_ON_FTRACE
1537                /* Given address is not on the instruction boundary */
1538                if ((unsigned long)p->addr != ftrace_addr)
1539                        return -EILSEQ;
1540                p->flags |= KPROBE_FLAG_FTRACE;
1541#else   /* !CONFIG_KPROBES_ON_FTRACE */
1542                return -EINVAL;
1543#endif
1544        }
1545        return 0;
1546}
1547
1548static int check_kprobe_address_safe(struct kprobe *p,
1549                                     struct module **probed_mod)
1550{
1551        int ret;
1552
1553        ret = arch_check_ftrace_location(p);
1554        if (ret)
1555                return ret;
1556        jump_label_lock();
1557        preempt_disable();
1558
1559        /* Ensure it is not in reserved area nor out of text */
1560        if (!kernel_text_address((unsigned long) p->addr) ||
1561            within_kprobe_blacklist((unsigned long) p->addr) ||
1562            jump_label_text_reserved(p->addr, p->addr) ||
1563            find_bug((unsigned long)p->addr)) {
1564                ret = -EINVAL;
1565                goto out;
1566        }
1567
1568        /* Check if are we probing a module */
1569        *probed_mod = __module_text_address((unsigned long) p->addr);
1570        if (*probed_mod) {
1571                /*
1572                 * We must hold a refcount of the probed module while updating
1573                 * its code to prohibit unexpected unloading.
1574                 */
1575                if (unlikely(!try_module_get(*probed_mod))) {
1576                        ret = -ENOENT;
1577                        goto out;
1578                }
1579
1580                /*
1581                 * If the module freed .init.text, we couldn't insert
1582                 * kprobes in there.
1583                 */
1584                if (within_module_init((unsigned long)p->addr, *probed_mod) &&
1585                    (*probed_mod)->state != MODULE_STATE_COMING) {
1586                        module_put(*probed_mod);
1587                        *probed_mod = NULL;
1588                        ret = -ENOENT;
1589                }
1590        }
1591out:
1592        preempt_enable();
1593        jump_label_unlock();
1594
1595        return ret;
1596}
1597
1598int register_kprobe(struct kprobe *p)
1599{
1600        int ret;
1601        struct kprobe *old_p;
1602        struct module *probed_mod;
1603        kprobe_opcode_t *addr;
1604
1605        /* Adjust probe address from symbol */
1606        addr = kprobe_addr(p);
1607        if (IS_ERR(addr))
1608                return PTR_ERR(addr);
1609        p->addr = addr;
1610
1611        ret = check_kprobe_rereg(p);
1612        if (ret)
1613                return ret;
1614
1615        /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1616        p->flags &= KPROBE_FLAG_DISABLED;
1617        p->nmissed = 0;
1618        INIT_LIST_HEAD(&p->list);
1619
1620        ret = check_kprobe_address_safe(p, &probed_mod);
1621        if (ret)
1622                return ret;
1623
1624        mutex_lock(&kprobe_mutex);
1625
1626        old_p = get_kprobe(p->addr);
1627        if (old_p) {
1628                /* Since this may unoptimize old_p, locking text_mutex. */
1629                ret = register_aggr_kprobe(old_p, p);
1630                goto out;
1631        }
1632
1633        cpus_read_lock();
1634        /* Prevent text modification */
1635        mutex_lock(&text_mutex);
1636        ret = prepare_kprobe(p);
1637        mutex_unlock(&text_mutex);
1638        cpus_read_unlock();
1639        if (ret)
1640                goto out;
1641
1642        INIT_HLIST_NODE(&p->hlist);
1643        hlist_add_head_rcu(&p->hlist,
1644                       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
1645
1646        if (!kprobes_all_disarmed && !kprobe_disabled(p)) {
1647                ret = arm_kprobe(p);
1648                if (ret) {
1649                        hlist_del_rcu(&p->hlist);
1650                        synchronize_rcu();
1651                        goto out;
1652                }
1653        }
1654
1655        /* Try to optimize kprobe */
1656        try_to_optimize_kprobe(p);
1657out:
1658        mutex_unlock(&kprobe_mutex);
1659
1660        if (probed_mod)
1661                module_put(probed_mod);
1662
1663        return ret;
1664}
1665EXPORT_SYMBOL_GPL(register_kprobe);
1666
1667/* Check if all probes on the aggrprobe are disabled */
1668static int aggr_kprobe_disabled(struct kprobe *ap)
1669{
1670        struct kprobe *kp;
1671
1672        list_for_each_entry_rcu(kp, &ap->list, list)
1673                if (!kprobe_disabled(kp))
1674                        /*
1675                         * There is an active probe on the list.
1676                         * We can't disable this ap.
1677                         */
1678                        return 0;
1679
1680        return 1;
1681}
1682
1683/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
1684static struct kprobe *__disable_kprobe(struct kprobe *p)
1685{
1686        struct kprobe *orig_p;
1687        int ret;
1688
1689        /* Get an original kprobe for return */
1690        orig_p = __get_valid_kprobe(p);
1691        if (unlikely(orig_p == NULL))
1692                return ERR_PTR(-EINVAL);
1693
1694        if (!kprobe_disabled(p)) {
1695                /* Disable probe if it is a child probe */
1696                if (p != orig_p)
1697                        p->flags |= KPROBE_FLAG_DISABLED;
1698
1699                /* Try to disarm and disable this/parent probe */
1700                if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
1701                        /*
1702                         * If kprobes_all_disarmed is set, orig_p
1703                         * should have already been disarmed, so
1704                         * skip unneed disarming process.
1705                         */
1706                        if (!kprobes_all_disarmed) {
1707                                ret = disarm_kprobe(orig_p, true);
1708                                if (ret) {
1709                                        p->flags &= ~KPROBE_FLAG_DISABLED;
1710                                        return ERR_PTR(ret);
1711                                }
1712                        }
1713                        orig_p->flags |= KPROBE_FLAG_DISABLED;
1714                }
1715        }
1716
1717        return orig_p;
1718}
1719
1720/*
1721 * Unregister a kprobe without a scheduler synchronization.
1722 */
1723static int __unregister_kprobe_top(struct kprobe *p)
1724{
1725        struct kprobe *ap, *list_p;
1726
1727        /* Disable kprobe. This will disarm it if needed. */
1728        ap = __disable_kprobe(p);
1729        if (IS_ERR(ap))
1730                return PTR_ERR(ap);
1731
1732        if (ap == p)
1733                /*
1734                 * This probe is an independent(and non-optimized) kprobe
1735                 * (not an aggrprobe). Remove from the hash list.
1736                 */
1737                goto disarmed;
1738
1739        /* Following process expects this probe is an aggrprobe */
1740        WARN_ON(!kprobe_aggrprobe(ap));
1741
1742        if (list_is_singular(&ap->list) && kprobe_disarmed(ap))
1743                /*
1744                 * !disarmed could be happen if the probe is under delayed
1745                 * unoptimizing.
1746                 */
1747                goto disarmed;
1748        else {
1749                /* If disabling probe has special handlers, update aggrprobe */
1750                if (p->post_handler && !kprobe_gone(p)) {
1751                        list_for_each_entry_rcu(list_p, &ap->list, list) {
1752                                if ((list_p != p) && (list_p->post_handler))
1753                                        goto noclean;
1754                        }
1755                        ap->post_handler = NULL;
1756                }
1757noclean:
1758                /*
1759                 * Remove from the aggrprobe: this path will do nothing in
1760                 * __unregister_kprobe_bottom().
1761                 */
1762                list_del_rcu(&p->list);
1763                if (!kprobe_disabled(ap) && !kprobes_all_disarmed)
1764                        /*
1765                         * Try to optimize this probe again, because post
1766                         * handler may have been changed.
1767                         */
1768                        optimize_kprobe(ap);
1769        }
1770        return 0;
1771
1772disarmed:
1773        hlist_del_rcu(&ap->hlist);
1774        return 0;
1775}
1776
1777static void __unregister_kprobe_bottom(struct kprobe *p)
1778{
1779        struct kprobe *ap;
1780
1781        if (list_empty(&p->list))
1782                /* This is an independent kprobe */
1783                arch_remove_kprobe(p);
1784        else if (list_is_singular(&p->list)) {
1785                /* This is the last child of an aggrprobe */
1786                ap = list_entry(p->list.next, struct kprobe, list);
1787                list_del(&p->list);
1788                free_aggr_kprobe(ap);
1789        }
1790        /* Otherwise, do nothing. */
1791}
1792
1793int register_kprobes(struct kprobe **kps, int num)
1794{
1795        int i, ret = 0;
1796
1797        if (num <= 0)
1798                return -EINVAL;
1799        for (i = 0; i < num; i++) {
1800                ret = register_kprobe(kps[i]);
1801                if (ret < 0) {
1802                        if (i > 0)
1803                                unregister_kprobes(kps, i);
1804                        break;
1805                }
1806        }
1807        return ret;
1808}
1809EXPORT_SYMBOL_GPL(register_kprobes);
1810
1811void unregister_kprobe(struct kprobe *p)
1812{
1813        unregister_kprobes(&p, 1);
1814}
1815EXPORT_SYMBOL_GPL(unregister_kprobe);
1816
1817void unregister_kprobes(struct kprobe **kps, int num)
1818{
1819        int i;
1820
1821        if (num <= 0)
1822                return;
1823        mutex_lock(&kprobe_mutex);
1824        for (i = 0; i < num; i++)
1825                if (__unregister_kprobe_top(kps[i]) < 0)
1826                        kps[i]->addr = NULL;
1827        mutex_unlock(&kprobe_mutex);
1828
1829        synchronize_rcu();
1830        for (i = 0; i < num; i++)
1831                if (kps[i]->addr)
1832                        __unregister_kprobe_bottom(kps[i]);
1833}
1834EXPORT_SYMBOL_GPL(unregister_kprobes);
1835
1836int __weak kprobe_exceptions_notify(struct notifier_block *self,
1837                                        unsigned long val, void *data)
1838{
1839        return NOTIFY_DONE;
1840}
1841NOKPROBE_SYMBOL(kprobe_exceptions_notify);
1842
1843static struct notifier_block kprobe_exceptions_nb = {
1844        .notifier_call = kprobe_exceptions_notify,
1845        .priority = 0x7fffffff /* we need to be notified first */
1846};
1847
1848unsigned long __weak arch_deref_entry_point(void *entry)
1849{
1850        return (unsigned long)entry;
1851}
1852
1853#ifdef CONFIG_KRETPROBES
1854/*
1855 * This kprobe pre_handler is registered with every kretprobe. When probe
1856 * hits it will set up the return probe.
1857 */
1858static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
1859{
1860        struct kretprobe *rp = container_of(p, struct kretprobe, kp);
1861        unsigned long hash, flags = 0;
1862        struct kretprobe_instance *ri;
1863
1864        /*
1865         * To avoid deadlocks, prohibit return probing in NMI contexts,
1866         * just skip the probe and increase the (inexact) 'nmissed'
1867         * statistical counter, so that the user is informed that
1868         * something happened:
1869         */
1870        if (unlikely(in_nmi())) {
1871                rp->nmissed++;
1872                return 0;
1873        }
1874
1875        /* TODO: consider to only swap the RA after the last pre_handler fired */
1876        hash = hash_ptr(current, KPROBE_HASH_BITS);
1877        raw_spin_lock_irqsave(&rp->lock, flags);
1878        if (!hlist_empty(&rp->free_instances)) {
1879                ri = hlist_entry(rp->free_instances.first,
1880                                struct kretprobe_instance, hlist);
1881                hlist_del(&ri->hlist);
1882                raw_spin_unlock_irqrestore(&rp->lock, flags);
1883
1884                ri->rp = rp;
1885                ri->task = current;
1886
1887                if (rp->entry_handler && rp->entry_handler(ri, regs)) {
1888                        raw_spin_lock_irqsave(&rp->lock, flags);
1889                        hlist_add_head(&ri->hlist, &rp->free_instances);
1890                        raw_spin_unlock_irqrestore(&rp->lock, flags);
1891                        return 0;
1892                }
1893
1894                arch_prepare_kretprobe(ri, regs);
1895
1896                /* XXX(hch): why is there no hlist_move_head? */
1897                INIT_HLIST_NODE(&ri->hlist);
1898                kretprobe_table_lock(hash, &flags);
1899                hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
1900                kretprobe_table_unlock(hash, &flags);
1901        } else {
1902                rp->nmissed++;
1903                raw_spin_unlock_irqrestore(&rp->lock, flags);
1904        }
1905        return 0;
1906}
1907NOKPROBE_SYMBOL(pre_handler_kretprobe);
1908
1909bool __weak arch_kprobe_on_func_entry(unsigned long offset)
1910{
1911        return !offset;
1912}
1913
1914bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
1915{
1916        kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
1917
1918        if (IS_ERR(kp_addr))
1919                return false;
1920
1921        if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
1922                                                !arch_kprobe_on_func_entry(offset))
1923                return false;
1924
1925        return true;
1926}
1927
1928int register_kretprobe(struct kretprobe *rp)
1929{
1930        int ret = 0;
1931        struct kretprobe_instance *inst;
1932        int i;
1933        void *addr;
1934
1935        if (!kprobe_on_func_entry(rp->kp.addr, rp->kp.symbol_name, rp->kp.offset))
1936                return -EINVAL;
1937
1938        if (kretprobe_blacklist_size) {
1939                addr = kprobe_addr(&rp->kp);
1940                if (IS_ERR(addr))
1941                        return PTR_ERR(addr);
1942
1943                for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
1944                        if (kretprobe_blacklist[i].addr == addr)
1945                                return -EINVAL;
1946                }
1947        }
1948
1949        rp->kp.pre_handler = pre_handler_kretprobe;
1950        rp->kp.post_handler = NULL;
1951        rp->kp.fault_handler = NULL;
1952
1953        /* Pre-allocate memory for max kretprobe instances */
1954        if (rp->maxactive <= 0) {
1955#ifdef CONFIG_PREEMPTION
1956                rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
1957#else
1958                rp->maxactive = num_possible_cpus();
1959#endif
1960        }
1961        raw_spin_lock_init(&rp->lock);
1962        INIT_HLIST_HEAD(&rp->free_instances);
1963        for (i = 0; i < rp->maxactive; i++) {
1964                inst = kmalloc(sizeof(struct kretprobe_instance) +
1965                               rp->data_size, GFP_KERNEL);
1966                if (inst == NULL) {
1967                        free_rp_inst(rp);
1968                        return -ENOMEM;
1969                }
1970                INIT_HLIST_NODE(&inst->hlist);
1971                hlist_add_head(&inst->hlist, &rp->free_instances);
1972        }
1973
1974        rp->nmissed = 0;
1975        /* Establish function entry probe point */
1976        ret = register_kprobe(&rp->kp);
1977        if (ret != 0)
1978                free_rp_inst(rp);
1979        return ret;
1980}
1981EXPORT_SYMBOL_GPL(register_kretprobe);
1982
1983int register_kretprobes(struct kretprobe **rps, int num)
1984{
1985        int ret = 0, i;
1986
1987        if (num <= 0)
1988                return -EINVAL;
1989        for (i = 0; i < num; i++) {
1990                ret = register_kretprobe(rps[i]);
1991                if (ret < 0) {
1992                        if (i > 0)
1993                                unregister_kretprobes(rps, i);
1994                        break;
1995                }
1996        }
1997        return ret;
1998}
1999EXPORT_SYMBOL_GPL(register_kretprobes);
2000
2001void unregister_kretprobe(struct kretprobe *rp)
2002{
2003        unregister_kretprobes(&rp, 1);
2004}
2005EXPORT_SYMBOL_GPL(unregister_kretprobe);
2006
2007void unregister_kretprobes(struct kretprobe **rps, int num)
2008{
2009        int i;
2010
2011        if (num <= 0)
2012                return;
2013        mutex_lock(&kprobe_mutex);
2014        for (i = 0; i < num; i++)
2015                if (__unregister_kprobe_top(&rps[i]->kp) < 0)
2016                        rps[i]->kp.addr = NULL;
2017        mutex_unlock(&kprobe_mutex);
2018
2019        synchronize_rcu();
2020        for (i = 0; i < num; i++) {
2021                if (rps[i]->kp.addr) {
2022                        __unregister_kprobe_bottom(&rps[i]->kp);
2023                        cleanup_rp_inst(rps[i]);
2024                }
2025        }
2026}
2027EXPORT_SYMBOL_GPL(unregister_kretprobes);
2028
2029#else /* CONFIG_KRETPROBES */
2030int register_kretprobe(struct kretprobe *rp)
2031{
2032        return -ENOSYS;
2033}
2034EXPORT_SYMBOL_GPL(register_kretprobe);
2035
2036int register_kretprobes(struct kretprobe **rps, int num)
2037{
2038        return -ENOSYS;
2039}
2040EXPORT_SYMBOL_GPL(register_kretprobes);
2041
2042void unregister_kretprobe(struct kretprobe *rp)
2043{
2044}
2045EXPORT_SYMBOL_GPL(unregister_kretprobe);
2046
2047void unregister_kretprobes(struct kretprobe **rps, int num)
2048{
2049}
2050EXPORT_SYMBOL_GPL(unregister_kretprobes);
2051
2052static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
2053{
2054        return 0;
2055}
2056NOKPROBE_SYMBOL(pre_handler_kretprobe);
2057
2058#endif /* CONFIG_KRETPROBES */
2059
2060/* Set the kprobe gone and remove its instruction buffer. */
2061static void kill_kprobe(struct kprobe *p)
2062{
2063        struct kprobe *kp;
2064
2065        p->flags |= KPROBE_FLAG_GONE;
2066        if (kprobe_aggrprobe(p)) {
2067                /*
2068                 * If this is an aggr_kprobe, we have to list all the
2069                 * chained probes and mark them GONE.
2070                 */
2071                list_for_each_entry_rcu(kp, &p->list, list)
2072                        kp->flags |= KPROBE_FLAG_GONE;
2073                p->post_handler = NULL;
2074                kill_optimized_kprobe(p);
2075        }
2076        /*
2077         * Here, we can remove insn_slot safely, because no thread calls
2078         * the original probed function (which will be freed soon) any more.
2079         */
2080        arch_remove_kprobe(p);
2081}
2082
2083/* Disable one kprobe */
2084int disable_kprobe(struct kprobe *kp)
2085{
2086        int ret = 0;
2087        struct kprobe *p;
2088
2089        mutex_lock(&kprobe_mutex);
2090
2091        /* Disable this kprobe */
2092        p = __disable_kprobe(kp);
2093        if (IS_ERR(p))
2094                ret = PTR_ERR(p);
2095
2096        mutex_unlock(&kprobe_mutex);
2097        return ret;
2098}
2099EXPORT_SYMBOL_GPL(disable_kprobe);
2100
2101/* Enable one kprobe */
2102int enable_kprobe(struct kprobe *kp)
2103{
2104        int ret = 0;
2105        struct kprobe *p;
2106
2107        mutex_lock(&kprobe_mutex);
2108
2109        /* Check whether specified probe is valid. */
2110        p = __get_valid_kprobe(kp);
2111        if (unlikely(p == NULL)) {
2112                ret = -EINVAL;
2113                goto out;
2114        }
2115
2116        if (kprobe_gone(kp)) {
2117                /* This kprobe has gone, we couldn't enable it. */
2118                ret = -EINVAL;
2119                goto out;
2120        }
2121
2122        if (p != kp)
2123                kp->flags &= ~KPROBE_FLAG_DISABLED;
2124
2125        if (!kprobes_all_disarmed && kprobe_disabled(p)) {
2126                p->flags &= ~KPROBE_FLAG_DISABLED;
2127                ret = arm_kprobe(p);
2128                if (ret)
2129                        p->flags |= KPROBE_FLAG_DISABLED;
2130        }
2131out:
2132        mutex_unlock(&kprobe_mutex);
2133        return ret;
2134}
2135EXPORT_SYMBOL_GPL(enable_kprobe);
2136
2137/* Caller must NOT call this in usual path. This is only for critical case */
2138void dump_kprobe(struct kprobe *kp)
2139{
2140        pr_err("Dumping kprobe:\n");
2141        pr_err("Name: %s\nOffset: %x\nAddress: %pS\n",
2142               kp->symbol_name, kp->offset, kp->addr);
2143}
2144NOKPROBE_SYMBOL(dump_kprobe);
2145
2146int kprobe_add_ksym_blacklist(unsigned long entry)
2147{
2148        struct kprobe_blacklist_entry *ent;
2149        unsigned long offset = 0, size = 0;
2150
2151        if (!kernel_text_address(entry) ||
2152            !kallsyms_lookup_size_offset(entry, &size, &offset))
2153                return -EINVAL;
2154
2155        ent = kmalloc(sizeof(*ent), GFP_KERNEL);
2156        if (!ent)
2157                return -ENOMEM;
2158        ent->start_addr = entry;
2159        ent->end_addr = entry + size;
2160        INIT_LIST_HEAD(&ent->list);
2161        list_add_tail(&ent->list, &kprobe_blacklist);
2162
2163        return (int)size;
2164}
2165
2166/* Add all symbols in given area into kprobe blacklist */
2167int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
2168{
2169        unsigned long entry;
2170        int ret = 0;
2171
2172        for (entry = start; entry < end; entry += ret) {
2173                ret = kprobe_add_ksym_blacklist(entry);
2174                if (ret < 0)
2175                        return ret;
2176                if (ret == 0)   /* In case of alias symbol */
2177                        ret = 1;
2178        }
2179        return 0;
2180}
2181
2182int __init __weak arch_populate_kprobe_blacklist(void)
2183{
2184        return 0;
2185}
2186
2187/*
2188 * Lookup and populate the kprobe_blacklist.
2189 *
2190 * Unlike the kretprobe blacklist, we'll need to determine
2191 * the range of addresses that belong to the said functions,
2192 * since a kprobe need not necessarily be at the beginning
2193 * of a function.
2194 */
2195static int __init populate_kprobe_blacklist(unsigned long *start,
2196                                             unsigned long *end)
2197{
2198        unsigned long entry;
2199        unsigned long *iter;
2200        int ret;
2201
2202        for (iter = start; iter < end; iter++) {
2203                entry = arch_deref_entry_point((void *)*iter);
2204                ret = kprobe_add_ksym_blacklist(entry);
2205                if (ret == -EINVAL)
2206                        continue;
2207                if (ret < 0)
2208                        return ret;
2209        }
2210
2211        /* Symbols in __kprobes_text are blacklisted */
2212        ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
2213                                        (unsigned long)__kprobes_text_end);
2214
2215        return ret ? : arch_populate_kprobe_blacklist();
2216}
2217
2218/* Module notifier call back, checking kprobes on the module */
2219static int kprobes_module_callback(struct notifier_block *nb,
2220                                   unsigned long val, void *data)
2221{
2222        struct module *mod = data;
2223        struct hlist_head *head;
2224        struct kprobe *p;
2225        unsigned int i;
2226        int checkcore = (val == MODULE_STATE_GOING);
2227
2228        if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
2229                return NOTIFY_DONE;
2230
2231        /*
2232         * When MODULE_STATE_GOING was notified, both of module .text and
2233         * .init.text sections would be freed. When MODULE_STATE_LIVE was
2234         * notified, only .init.text section would be freed. We need to
2235         * disable kprobes which have been inserted in the sections.
2236         */
2237        mutex_lock(&kprobe_mutex);
2238        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2239                head = &kprobe_table[i];
2240                hlist_for_each_entry_rcu(p, head, hlist)
2241                        if (within_module_init((unsigned long)p->addr, mod) ||
2242                            (checkcore &&
2243                             within_module_core((unsigned long)p->addr, mod))) {
2244                                /*
2245                                 * The vaddr this probe is installed will soon
2246                                 * be vfreed buy not synced to disk. Hence,
2247                                 * disarming the breakpoint isn't needed.
2248                                 *
2249                                 * Note, this will also move any optimized probes
2250                                 * that are pending to be removed from their
2251                                 * corresponding lists to the freeing_list and
2252                                 * will not be touched by the delayed
2253                                 * kprobe_optimizer work handler.
2254                                 */
2255                                kill_kprobe(p);
2256                        }
2257        }
2258        mutex_unlock(&kprobe_mutex);
2259        return NOTIFY_DONE;
2260}
2261
2262static struct notifier_block kprobe_module_nb = {
2263        .notifier_call = kprobes_module_callback,
2264        .priority = 0
2265};
2266
2267/* Markers of _kprobe_blacklist section */
2268extern unsigned long __start_kprobe_blacklist[];
2269extern unsigned long __stop_kprobe_blacklist[];
2270
2271static int __init init_kprobes(void)
2272{
2273        int i, err = 0;
2274
2275        /* FIXME allocate the probe table, currently defined statically */
2276        /* initialize all list heads */
2277        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2278                INIT_HLIST_HEAD(&kprobe_table[i]);
2279                INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
2280                raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
2281        }
2282
2283        err = populate_kprobe_blacklist(__start_kprobe_blacklist,
2284                                        __stop_kprobe_blacklist);
2285        if (err) {
2286                pr_err("kprobes: failed to populate blacklist: %d\n", err);
2287                pr_err("Please take care of using kprobes.\n");
2288        }
2289
2290        if (kretprobe_blacklist_size) {
2291                /* lookup the function address from its name */
2292                for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
2293                        kretprobe_blacklist[i].addr =
2294                                kprobe_lookup_name(kretprobe_blacklist[i].name, 0);
2295                        if (!kretprobe_blacklist[i].addr)
2296                                printk("kretprobe: lookup failed: %s\n",
2297                                       kretprobe_blacklist[i].name);
2298                }
2299        }
2300
2301#if defined(CONFIG_OPTPROBES)
2302#if defined(__ARCH_WANT_KPROBES_INSN_SLOT)
2303        /* Init kprobe_optinsn_slots */
2304        kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
2305#endif
2306        /* By default, kprobes can be optimized */
2307        kprobes_allow_optimization = true;
2308#endif
2309
2310        /* By default, kprobes are armed */
2311        kprobes_all_disarmed = false;
2312
2313        err = arch_init_kprobes();
2314        if (!err)
2315                err = register_die_notifier(&kprobe_exceptions_nb);
2316        if (!err)
2317                err = register_module_notifier(&kprobe_module_nb);
2318
2319        kprobes_initialized = (err == 0);
2320
2321        if (!err)
2322                init_test_probes();
2323        return err;
2324}
2325subsys_initcall(init_kprobes);
2326
2327#ifdef CONFIG_DEBUG_FS
2328static void report_probe(struct seq_file *pi, struct kprobe *p,
2329                const char *sym, int offset, char *modname, struct kprobe *pp)
2330{
2331        char *kprobe_type;
2332        void *addr = p->addr;
2333
2334        if (p->pre_handler == pre_handler_kretprobe)
2335                kprobe_type = "r";
2336        else
2337                kprobe_type = "k";
2338
2339        if (!kallsyms_show_value())
2340                addr = NULL;
2341
2342        if (sym)
2343                seq_printf(pi, "%px  %s  %s+0x%x  %s ",
2344                        addr, kprobe_type, sym, offset,
2345                        (modname ? modname : " "));
2346        else    /* try to use %pS */
2347                seq_printf(pi, "%px  %s  %pS ",
2348                        addr, kprobe_type, p->addr);
2349
2350        if (!pp)
2351                pp = p;
2352        seq_printf(pi, "%s%s%s%s\n",
2353                (kprobe_gone(p) ? "[GONE]" : ""),
2354                ((kprobe_disabled(p) && !kprobe_gone(p)) ?  "[DISABLED]" : ""),
2355                (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""),
2356                (kprobe_ftrace(pp) ? "[FTRACE]" : ""));
2357}
2358
2359static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
2360{
2361        return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
2362}
2363
2364static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
2365{
2366        (*pos)++;
2367        if (*pos >= KPROBE_TABLE_SIZE)
2368                return NULL;
2369        return pos;
2370}
2371
2372static void kprobe_seq_stop(struct seq_file *f, void *v)
2373{
2374        /* Nothing to do */
2375}
2376
2377static int show_kprobe_addr(struct seq_file *pi, void *v)
2378{
2379        struct hlist_head *head;
2380        struct kprobe *p, *kp;
2381        const char *sym = NULL;
2382        unsigned int i = *(loff_t *) v;
2383        unsigned long offset = 0;
2384        char *modname, namebuf[KSYM_NAME_LEN];
2385
2386        head = &kprobe_table[i];
2387        preempt_disable();
2388        hlist_for_each_entry_rcu(p, head, hlist) {
2389                sym = kallsyms_lookup((unsigned long)p->addr, NULL,
2390                                        &offset, &modname, namebuf);
2391                if (kprobe_aggrprobe(p)) {
2392                        list_for_each_entry_rcu(kp, &p->list, list)
2393                                report_probe(pi, kp, sym, offset, modname, p);
2394                } else
2395                        report_probe(pi, p, sym, offset, modname, NULL);
2396        }
2397        preempt_enable();
2398        return 0;
2399}
2400
2401static const struct seq_operations kprobes_seq_ops = {
2402        .start = kprobe_seq_start,
2403        .next  = kprobe_seq_next,
2404        .stop  = kprobe_seq_stop,
2405        .show  = show_kprobe_addr
2406};
2407
2408static int kprobes_open(struct inode *inode, struct file *filp)
2409{
2410        return seq_open(filp, &kprobes_seq_ops);
2411}
2412
2413static const struct file_operations debugfs_kprobes_operations = {
2414        .open           = kprobes_open,
2415        .read           = seq_read,
2416        .llseek         = seq_lseek,
2417        .release        = seq_release,
2418};
2419
2420/* kprobes/blacklist -- shows which functions can not be probed */
2421static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
2422{
2423        return seq_list_start(&kprobe_blacklist, *pos);
2424}
2425
2426static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
2427{
2428        return seq_list_next(v, &kprobe_blacklist, pos);
2429}
2430
2431static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
2432{
2433        struct kprobe_blacklist_entry *ent =
2434                list_entry(v, struct kprobe_blacklist_entry, list);
2435
2436        /*
2437         * If /proc/kallsyms is not showing kernel address, we won't
2438         * show them here either.
2439         */
2440        if (!kallsyms_show_value())
2441                seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL,
2442                           (void *)ent->start_addr);
2443        else
2444                seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr,
2445                           (void *)ent->end_addr, (void *)ent->start_addr);
2446        return 0;
2447}
2448
2449static const struct seq_operations kprobe_blacklist_seq_ops = {
2450        .start = kprobe_blacklist_seq_start,
2451        .next  = kprobe_blacklist_seq_next,
2452        .stop  = kprobe_seq_stop,       /* Reuse void function */
2453        .show  = kprobe_blacklist_seq_show,
2454};
2455
2456static int kprobe_blacklist_open(struct inode *inode, struct file *filp)
2457{
2458        return seq_open(filp, &kprobe_blacklist_seq_ops);
2459}
2460
2461static const struct file_operations debugfs_kprobe_blacklist_ops = {
2462        .open           = kprobe_blacklist_open,
2463        .read           = seq_read,
2464        .llseek         = seq_lseek,
2465        .release        = seq_release,
2466};
2467
2468static int arm_all_kprobes(void)
2469{
2470        struct hlist_head *head;
2471        struct kprobe *p;
2472        unsigned int i, total = 0, errors = 0;
2473        int err, ret = 0;
2474
2475        mutex_lock(&kprobe_mutex);
2476
2477        /* If kprobes are armed, just return */
2478        if (!kprobes_all_disarmed)
2479                goto already_enabled;
2480
2481        /*
2482         * optimize_kprobe() called by arm_kprobe() checks
2483         * kprobes_all_disarmed, so set kprobes_all_disarmed before
2484         * arm_kprobe.
2485         */
2486        kprobes_all_disarmed = false;
2487        /* Arming kprobes doesn't optimize kprobe itself */
2488        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2489                head = &kprobe_table[i];
2490                /* Arm all kprobes on a best-effort basis */
2491                hlist_for_each_entry_rcu(p, head, hlist) {
2492                        if (!kprobe_disabled(p)) {
2493                                err = arm_kprobe(p);
2494                                if (err)  {
2495                                        errors++;
2496                                        ret = err;
2497                                }
2498                                total++;
2499                        }
2500                }
2501        }
2502
2503        if (errors)
2504                pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n",
2505                        errors, total);
2506        else
2507                pr_info("Kprobes globally enabled\n");
2508
2509already_enabled:
2510        mutex_unlock(&kprobe_mutex);
2511        return ret;
2512}
2513
2514static int disarm_all_kprobes(void)
2515{
2516        struct hlist_head *head;
2517        struct kprobe *p;
2518        unsigned int i, total = 0, errors = 0;
2519        int err, ret = 0;
2520
2521        mutex_lock(&kprobe_mutex);
2522
2523        /* If kprobes are already disarmed, just return */
2524        if (kprobes_all_disarmed) {
2525                mutex_unlock(&kprobe_mutex);
2526                return 0;
2527        }
2528
2529        kprobes_all_disarmed = true;
2530
2531        for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
2532                head = &kprobe_table[i];
2533                /* Disarm all kprobes on a best-effort basis */
2534                hlist_for_each_entry_rcu(p, head, hlist) {
2535                        if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
2536                                err = disarm_kprobe(p, false);
2537                                if (err) {
2538                                        errors++;
2539                                        ret = err;
2540                                }
2541                                total++;
2542                        }
2543                }
2544        }
2545
2546        if (errors)
2547                pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n",
2548                        errors, total);
2549        else
2550                pr_info("Kprobes globally disabled\n");
2551
2552        mutex_unlock(&kprobe_mutex);
2553
2554        /* Wait for disarming all kprobes by optimizer */
2555        wait_for_kprobe_optimizer();
2556
2557        return ret;
2558}
2559
2560/*
2561 * XXX: The debugfs bool file interface doesn't allow for callbacks
2562 * when the bool state is switched. We can reuse that facility when
2563 * available
2564 */
2565static ssize_t read_enabled_file_bool(struct file *file,
2566               char __user *user_buf, size_t count, loff_t *ppos)
2567{
2568        char buf[3];
2569
2570        if (!kprobes_all_disarmed)
2571                buf[0] = '1';
2572        else
2573                buf[0] = '0';
2574        buf[1] = '\n';
2575        buf[2] = 0x00;
2576        return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
2577}
2578
2579static ssize_t write_enabled_file_bool(struct file *file,
2580               const char __user *user_buf, size_t count, loff_t *ppos)
2581{
2582        char buf[32];
2583        size_t buf_size;
2584        int ret = 0;
2585
2586        buf_size = min(count, (sizeof(buf)-1));
2587        if (copy_from_user(buf, user_buf, buf_size))
2588                return -EFAULT;
2589
2590        buf[buf_size] = '\0';
2591        switch (buf[0]) {
2592        case 'y':
2593        case 'Y':
2594        case '1':
2595                ret = arm_all_kprobes();
2596                break;
2597        case 'n':
2598        case 'N':
2599        case '0':
2600                ret = disarm_all_kprobes();
2601                break;
2602        default:
2603                return -EINVAL;
2604        }
2605
2606        if (ret)
2607                return ret;
2608
2609        return count;
2610}
2611
2612static const struct file_operations fops_kp = {
2613        .read =         read_enabled_file_bool,
2614        .write =        write_enabled_file_bool,
2615        .llseek =       default_llseek,
2616};
2617
2618static int __init debugfs_kprobe_init(void)
2619{
2620        struct dentry *dir;
2621        unsigned int value = 1;
2622
2623        dir = debugfs_create_dir("kprobes", NULL);
2624
2625        debugfs_create_file("list", 0400, dir, NULL,
2626                            &debugfs_kprobes_operations);
2627
2628        debugfs_create_file("enabled", 0600, dir, &value, &fops_kp);
2629
2630        debugfs_create_file("blacklist", 0400, dir, NULL,
2631                            &debugfs_kprobe_blacklist_ops);
2632
2633        return 0;
2634}
2635
2636late_initcall(debugfs_kprobe_init);
2637#endif /* CONFIG_DEBUG_FS */
2638