linux/arch/x86/oprofile/nmi_int.c
<<
>>
Prefs
   1/**
   2 * @file nmi_int.c
   3 *
   4 * @remark Copyright 2002-2009 OProfile authors
   5 * @remark Read the file COPYING
   6 *
   7 * @author John Levon <levon@movementarian.org>
   8 * @author Robert Richter <robert.richter@amd.com>
   9 * @author Barry Kasindorf <barry.kasindorf@amd.com>
  10 * @author Jason Yeh <jason.yeh@amd.com>
  11 * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
  12 */
  13
  14#include <linux/init.h>
  15#include <linux/notifier.h>
  16#include <linux/smp.h>
  17#include <linux/oprofile.h>
  18#include <linux/sysdev.h>
  19#include <linux/slab.h>
  20#include <linux/moduleparam.h>
  21#include <linux/kdebug.h>
  22#include <linux/cpu.h>
  23#include <asm/nmi.h>
  24#include <asm/msr.h>
  25#include <asm/apic.h>
  26
  27#include "op_counter.h"
  28#include "op_x86_model.h"
  29
  30static struct op_x86_model_spec *model;
  31static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
  32static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
  33
  34/* 0 == registered but off, 1 == registered and on */
  35static int nmi_enabled = 0;
  36
  37struct op_counter_config counter_config[OP_MAX_COUNTER];
  38
  39/* common functions */
  40
  41u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
  42                    struct op_counter_config *counter_config)
  43{
  44        u64 val = 0;
  45        u16 event = (u16)counter_config->event;
  46
  47        val |= ARCH_PERFMON_EVENTSEL_INT;
  48        val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
  49        val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
  50        val |= (counter_config->unit_mask & 0xFF) << 8;
  51        event &= model->event_mask ? model->event_mask : 0xFF;
  52        val |= event & 0xFF;
  53        val |= (event & 0x0F00) << 24;
  54
  55        return val;
  56}
  57
  58
  59static int profile_exceptions_notify(struct notifier_block *self,
  60                                     unsigned long val, void *data)
  61{
  62        struct die_args *args = (struct die_args *)data;
  63        int ret = NOTIFY_DONE;
  64        int cpu = smp_processor_id();
  65
  66        switch (val) {
  67        case DIE_NMI:
  68        case DIE_NMI_IPI:
  69                model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
  70                ret = NOTIFY_STOP;
  71                break;
  72        default:
  73                break;
  74        }
  75        return ret;
  76}
  77
  78static void nmi_cpu_save_registers(struct op_msrs *msrs)
  79{
  80        struct op_msr *counters = msrs->counters;
  81        struct op_msr *controls = msrs->controls;
  82        unsigned int i;
  83
  84        for (i = 0; i < model->num_counters; ++i) {
  85                if (counters[i].addr)
  86                        rdmsrl(counters[i].addr, counters[i].saved);
  87        }
  88
  89        for (i = 0; i < model->num_controls; ++i) {
  90                if (controls[i].addr)
  91                        rdmsrl(controls[i].addr, controls[i].saved);
  92        }
  93}
  94
  95static void nmi_cpu_start(void *dummy)
  96{
  97        struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
  98        model->start(msrs);
  99}
 100
 101static int nmi_start(void)
 102{
 103        on_each_cpu(nmi_cpu_start, NULL, 1);
 104        return 0;
 105}
 106
 107static void nmi_cpu_stop(void *dummy)
 108{
 109        struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
 110        model->stop(msrs);
 111}
 112
 113static void nmi_stop(void)
 114{
 115        on_each_cpu(nmi_cpu_stop, NULL, 1);
 116}
 117
 118#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
 119
 120static DEFINE_PER_CPU(int, switch_index);
 121
 122static inline int has_mux(void)
 123{
 124        return !!model->switch_ctrl;
 125}
 126
 127inline int op_x86_phys_to_virt(int phys)
 128{
 129        return __get_cpu_var(switch_index) + phys;
 130}
 131
 132inline int op_x86_virt_to_phys(int virt)
 133{
 134        return virt % model->num_counters;
 135}
 136
 137static void nmi_shutdown_mux(void)
 138{
 139        int i;
 140
 141        if (!has_mux())
 142                return;
 143
 144        for_each_possible_cpu(i) {
 145                kfree(per_cpu(cpu_msrs, i).multiplex);
 146                per_cpu(cpu_msrs, i).multiplex = NULL;
 147                per_cpu(switch_index, i) = 0;
 148        }
 149}
 150
 151static int nmi_setup_mux(void)
 152{
 153        size_t multiplex_size =
 154                sizeof(struct op_msr) * model->num_virt_counters;
 155        int i;
 156
 157        if (!has_mux())
 158                return 1;
 159
 160        for_each_possible_cpu(i) {
 161                per_cpu(cpu_msrs, i).multiplex =
 162                        kmalloc(multiplex_size, GFP_KERNEL);
 163                if (!per_cpu(cpu_msrs, i).multiplex)
 164                        return 0;
 165        }
 166
 167        return 1;
 168}
 169
 170static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
 171{
 172        int i;
 173        struct op_msr *multiplex = msrs->multiplex;
 174
 175        if (!has_mux())
 176                return;
 177
 178        for (i = 0; i < model->num_virt_counters; ++i) {
 179                if (counter_config[i].enabled) {
 180                        multiplex[i].saved = -(u64)counter_config[i].count;
 181                } else {
 182                        multiplex[i].addr  = 0;
 183                        multiplex[i].saved = 0;
 184                }
 185        }
 186
 187        per_cpu(switch_index, cpu) = 0;
 188}
 189
 190static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
 191{
 192        struct op_msr *multiplex = msrs->multiplex;
 193        int i;
 194
 195        for (i = 0; i < model->num_counters; ++i) {
 196                int virt = op_x86_phys_to_virt(i);
 197                if (multiplex[virt].addr)
 198                        rdmsrl(multiplex[virt].addr, multiplex[virt].saved);
 199        }
 200}
 201
 202static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
 203{
 204        struct op_msr *multiplex = msrs->multiplex;
 205        int i;
 206
 207        for (i = 0; i < model->num_counters; ++i) {
 208                int virt = op_x86_phys_to_virt(i);
 209                if (multiplex[virt].addr)
 210                        wrmsrl(multiplex[virt].addr, multiplex[virt].saved);
 211        }
 212}
 213
 214static void nmi_cpu_switch(void *dummy)
 215{
 216        int cpu = smp_processor_id();
 217        int si = per_cpu(switch_index, cpu);
 218        struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
 219
 220        nmi_cpu_stop(NULL);
 221        nmi_cpu_save_mpx_registers(msrs);
 222
 223        /* move to next set */
 224        si += model->num_counters;
 225        if ((si > model->num_virt_counters) || (counter_config[si].count == 0))
 226                per_cpu(switch_index, cpu) = 0;
 227        else
 228                per_cpu(switch_index, cpu) = si;
 229
 230        model->switch_ctrl(model, msrs);
 231        nmi_cpu_restore_mpx_registers(msrs);
 232
 233        nmi_cpu_start(NULL);
 234}
 235
 236
 237/*
 238 * Quick check to see if multiplexing is necessary.
 239 * The check should be sufficient since counters are used
 240 * in ordre.
 241 */
 242static int nmi_multiplex_on(void)
 243{
 244        return counter_config[model->num_counters].count ? 0 : -EINVAL;
 245}
 246
 247static int nmi_switch_event(void)
 248{
 249        if (!has_mux())
 250                return -ENOSYS;         /* not implemented */
 251        if (nmi_multiplex_on() < 0)
 252                return -EINVAL;         /* not necessary */
 253
 254        on_each_cpu(nmi_cpu_switch, NULL, 1);
 255
 256        return 0;
 257}
 258
 259static inline void mux_init(struct oprofile_operations *ops)
 260{
 261        if (has_mux())
 262                ops->switch_events = nmi_switch_event;
 263}
 264
 265static void mux_clone(int cpu)
 266{
 267        if (!has_mux())
 268                return;
 269
 270        memcpy(per_cpu(cpu_msrs, cpu).multiplex,
 271               per_cpu(cpu_msrs, 0).multiplex,
 272               sizeof(struct op_msr) * model->num_virt_counters);
 273}
 274
 275#else
 276
 277inline int op_x86_phys_to_virt(int phys) { return phys; }
 278inline int op_x86_virt_to_phys(int virt) { return virt; }
 279static inline void nmi_shutdown_mux(void) { }
 280static inline int nmi_setup_mux(void) { return 1; }
 281static inline void
 282nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
 283static inline void mux_init(struct oprofile_operations *ops) { }
 284static void mux_clone(int cpu) { }
 285
 286#endif
 287
 288static void free_msrs(void)
 289{
 290        int i;
 291        for_each_possible_cpu(i) {
 292                kfree(per_cpu(cpu_msrs, i).counters);
 293                per_cpu(cpu_msrs, i).counters = NULL;
 294                kfree(per_cpu(cpu_msrs, i).controls);
 295                per_cpu(cpu_msrs, i).controls = NULL;
 296        }
 297}
 298
 299static int allocate_msrs(void)
 300{
 301        size_t controls_size = sizeof(struct op_msr) * model->num_controls;
 302        size_t counters_size = sizeof(struct op_msr) * model->num_counters;
 303
 304        int i;
 305        for_each_possible_cpu(i) {
 306                per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
 307                                                        GFP_KERNEL);
 308                if (!per_cpu(cpu_msrs, i).counters)
 309                        return 0;
 310                per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
 311                                                        GFP_KERNEL);
 312                if (!per_cpu(cpu_msrs, i).controls)
 313                        return 0;
 314        }
 315
 316        return 1;
 317}
 318
 319static void nmi_cpu_setup(void *dummy)
 320{
 321        int cpu = smp_processor_id();
 322        struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
 323        nmi_cpu_save_registers(msrs);
 324        spin_lock(&oprofilefs_lock);
 325        model->setup_ctrs(model, msrs);
 326        nmi_cpu_setup_mux(cpu, msrs);
 327        spin_unlock(&oprofilefs_lock);
 328        per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
 329        apic_write(APIC_LVTPC, APIC_DM_NMI);
 330}
 331
 332static struct notifier_block profile_exceptions_nb = {
 333        .notifier_call = profile_exceptions_notify,
 334        .next = NULL,
 335        .priority = 2
 336};
 337
 338static int nmi_setup(void)
 339{
 340        int err = 0;
 341        int cpu;
 342
 343        if (!allocate_msrs())
 344                err = -ENOMEM;
 345        else if (!nmi_setup_mux())
 346                err = -ENOMEM;
 347        else
 348                err = register_die_notifier(&profile_exceptions_nb);
 349
 350        if (err) {
 351                free_msrs();
 352                nmi_shutdown_mux();
 353                return err;
 354        }
 355
 356        /* We need to serialize save and setup for HT because the subset
 357         * of msrs are distinct for save and setup operations
 358         */
 359
 360        /* Assume saved/restored counters are the same on all CPUs */
 361        model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
 362        for_each_possible_cpu(cpu) {
 363                if (!cpu)
 364                        continue;
 365
 366                memcpy(per_cpu(cpu_msrs, cpu).counters,
 367                       per_cpu(cpu_msrs, 0).counters,
 368                       sizeof(struct op_msr) * model->num_counters);
 369
 370                memcpy(per_cpu(cpu_msrs, cpu).controls,
 371                       per_cpu(cpu_msrs, 0).controls,
 372                       sizeof(struct op_msr) * model->num_controls);
 373
 374                mux_clone(cpu);
 375        }
 376        on_each_cpu(nmi_cpu_setup, NULL, 1);
 377        nmi_enabled = 1;
 378        return 0;
 379}
 380
 381static void nmi_cpu_restore_registers(struct op_msrs *msrs)
 382{
 383        struct op_msr *counters = msrs->counters;
 384        struct op_msr *controls = msrs->controls;
 385        unsigned int i;
 386
 387        for (i = 0; i < model->num_controls; ++i) {
 388                if (controls[i].addr)
 389                        wrmsrl(controls[i].addr, controls[i].saved);
 390        }
 391
 392        for (i = 0; i < model->num_counters; ++i) {
 393                if (counters[i].addr)
 394                        wrmsrl(counters[i].addr, counters[i].saved);
 395        }
 396}
 397
 398static void nmi_cpu_shutdown(void *dummy)
 399{
 400        unsigned int v;
 401        int cpu = smp_processor_id();
 402        struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
 403
 404        /* restoring APIC_LVTPC can trigger an apic error because the delivery
 405         * mode and vector nr combination can be illegal. That's by design: on
 406         * power on apic lvt contain a zero vector nr which are legal only for
 407         * NMI delivery mode. So inhibit apic err before restoring lvtpc
 408         */
 409        v = apic_read(APIC_LVTERR);
 410        apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
 411        apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
 412        apic_write(APIC_LVTERR, v);
 413        nmi_cpu_restore_registers(msrs);
 414}
 415
 416static void nmi_shutdown(void)
 417{
 418        struct op_msrs *msrs;
 419
 420        nmi_enabled = 0;
 421        on_each_cpu(nmi_cpu_shutdown, NULL, 1);
 422        unregister_die_notifier(&profile_exceptions_nb);
 423        nmi_shutdown_mux();
 424        msrs = &get_cpu_var(cpu_msrs);
 425        model->shutdown(msrs);
 426        free_msrs();
 427        put_cpu_var(cpu_msrs);
 428}
 429
 430static int nmi_create_files(struct super_block *sb, struct dentry *root)
 431{
 432        unsigned int i;
 433
 434        for (i = 0; i < model->num_virt_counters; ++i) {
 435                struct dentry *dir;
 436                char buf[4];
 437
 438                /* quick little hack to _not_ expose a counter if it is not
 439                 * available for use.  This should protect userspace app.
 440                 * NOTE:  assumes 1:1 mapping here (that counters are organized
 441                 *        sequentially in their struct assignment).
 442                 */
 443                if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i)))
 444                        continue;
 445
 446                snprintf(buf,  sizeof(buf), "%d", i);
 447                dir = oprofilefs_mkdir(sb, root, buf);
 448                oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
 449                oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
 450                oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
 451                oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
 452                oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
 453                oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
 454        }
 455
 456        return 0;
 457}
 458
 459#ifdef CONFIG_SMP
 460static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
 461                                 void *data)
 462{
 463        int cpu = (unsigned long)data;
 464        switch (action) {
 465        case CPU_DOWN_FAILED:
 466        case CPU_ONLINE:
 467                smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
 468                break;
 469        case CPU_DOWN_PREPARE:
 470                smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
 471                break;
 472        }
 473        return NOTIFY_DONE;
 474}
 475
 476static struct notifier_block oprofile_cpu_nb = {
 477        .notifier_call = oprofile_cpu_notifier
 478};
 479#endif
 480
 481#ifdef CONFIG_PM
 482
 483static int nmi_suspend(struct sys_device *dev, pm_message_t state)
 484{
 485        /* Only one CPU left, just stop that one */
 486        if (nmi_enabled == 1)
 487                nmi_cpu_stop(NULL);
 488        return 0;
 489}
 490
 491static int nmi_resume(struct sys_device *dev)
 492{
 493        if (nmi_enabled == 1)
 494                nmi_cpu_start(NULL);
 495        return 0;
 496}
 497
 498static struct sysdev_class oprofile_sysclass = {
 499        .name           = "oprofile",
 500        .resume         = nmi_resume,
 501        .suspend        = nmi_suspend,
 502};
 503
 504static struct sys_device device_oprofile = {
 505        .id     = 0,
 506        .cls    = &oprofile_sysclass,
 507};
 508
 509static int __init init_sysfs(void)
 510{
 511        int error;
 512
 513        error = sysdev_class_register(&oprofile_sysclass);
 514        if (!error)
 515                error = sysdev_register(&device_oprofile);
 516        return error;
 517}
 518
 519static void exit_sysfs(void)
 520{
 521        sysdev_unregister(&device_oprofile);
 522        sysdev_class_unregister(&oprofile_sysclass);
 523}
 524
 525#else
 526#define init_sysfs() do { } while (0)
 527#define exit_sysfs() do { } while (0)
 528#endif /* CONFIG_PM */
 529
 530static int __init p4_init(char **cpu_type)
 531{
 532        __u8 cpu_model = boot_cpu_data.x86_model;
 533
 534        if (cpu_model > 6 || cpu_model == 5)
 535                return 0;
 536
 537#ifndef CONFIG_SMP
 538        *cpu_type = "i386/p4";
 539        model = &op_p4_spec;
 540        return 1;
 541#else
 542        switch (smp_num_siblings) {
 543        case 1:
 544                *cpu_type = "i386/p4";
 545                model = &op_p4_spec;
 546                return 1;
 547
 548        case 2:
 549                *cpu_type = "i386/p4-ht";
 550                model = &op_p4_ht2_spec;
 551                return 1;
 552        }
 553#endif
 554
 555        printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
 556        printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
 557        return 0;
 558}
 559
 560static int force_arch_perfmon;
 561static int force_cpu_type(const char *str, struct kernel_param *kp)
 562{
 563        if (!strcmp(str, "arch_perfmon")) {
 564                force_arch_perfmon = 1;
 565                printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
 566        }
 567
 568        return 0;
 569}
 570module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
 571
 572static int __init ppro_init(char **cpu_type)
 573{
 574        __u8 cpu_model = boot_cpu_data.x86_model;
 575        struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
 576
 577        if (force_arch_perfmon && cpu_has_arch_perfmon)
 578                return 0;
 579
 580        switch (cpu_model) {
 581        case 0 ... 2:
 582                *cpu_type = "i386/ppro";
 583                break;
 584        case 3 ... 5:
 585                *cpu_type = "i386/pii";
 586                break;
 587        case 6 ... 8:
 588        case 10 ... 11:
 589                *cpu_type = "i386/piii";
 590                break;
 591        case 9:
 592        case 13:
 593                *cpu_type = "i386/p6_mobile";
 594                break;
 595        case 14:
 596                *cpu_type = "i386/core";
 597                break;
 598        case 15: case 23:
 599                *cpu_type = "i386/core_2";
 600                break;
 601        case 26:
 602                spec = &op_arch_perfmon_spec;
 603                *cpu_type = "i386/core_i7";
 604                break;
 605        case 28:
 606                *cpu_type = "i386/atom";
 607                break;
 608        default:
 609                /* Unknown */
 610                return 0;
 611        }
 612
 613        model = spec;
 614        return 1;
 615}
 616
 617/* in order to get sysfs right */
 618static int using_nmi;
 619
 620int __init op_nmi_init(struct oprofile_operations *ops)
 621{
 622        __u8 vendor = boot_cpu_data.x86_vendor;
 623        __u8 family = boot_cpu_data.x86;
 624        char *cpu_type = NULL;
 625        int ret = 0;
 626
 627        if (!cpu_has_apic)
 628                return -ENODEV;
 629
 630        switch (vendor) {
 631        case X86_VENDOR_AMD:
 632                /* Needs to be at least an Athlon (or hammer in 32bit mode) */
 633
 634                switch (family) {
 635                case 6:
 636                        cpu_type = "i386/athlon";
 637                        break;
 638                case 0xf:
 639                        /*
 640                         * Actually it could be i386/hammer too, but
 641                         * give user space an consistent name.
 642                         */
 643                        cpu_type = "x86-64/hammer";
 644                        break;
 645                case 0x10:
 646                        cpu_type = "x86-64/family10";
 647                        break;
 648                case 0x11:
 649                        cpu_type = "x86-64/family11h";
 650                        break;
 651                default:
 652                        return -ENODEV;
 653                }
 654                model = &op_amd_spec;
 655                break;
 656
 657        case X86_VENDOR_INTEL:
 658                switch (family) {
 659                        /* Pentium IV */
 660                case 0xf:
 661                        p4_init(&cpu_type);
 662                        break;
 663
 664                        /* A P6-class processor */
 665                case 6:
 666                        ppro_init(&cpu_type);
 667                        break;
 668
 669                default:
 670                        break;
 671                }
 672
 673                if (cpu_type)
 674                        break;
 675
 676                if (!cpu_has_arch_perfmon)
 677                        return -ENODEV;
 678
 679                /* use arch perfmon as fallback */
 680                cpu_type = "i386/arch_perfmon";
 681                model = &op_arch_perfmon_spec;
 682                break;
 683
 684        default:
 685                return -ENODEV;
 686        }
 687
 688#ifdef CONFIG_SMP
 689        register_cpu_notifier(&oprofile_cpu_nb);
 690#endif
 691        /* default values, can be overwritten by model */
 692        ops->create_files       = nmi_create_files;
 693        ops->setup              = nmi_setup;
 694        ops->shutdown           = nmi_shutdown;
 695        ops->start              = nmi_start;
 696        ops->stop               = nmi_stop;
 697        ops->cpu_type           = cpu_type;
 698
 699        if (model->init)
 700                ret = model->init(ops);
 701        if (ret)
 702                return ret;
 703
 704        if (!model->num_virt_counters)
 705                model->num_virt_counters = model->num_counters;
 706
 707        mux_init(ops);
 708
 709        init_sysfs();
 710        using_nmi = 1;
 711        printk(KERN_INFO "oprofile: using NMI interrupt.\n");
 712        return 0;
 713}
 714
 715void op_nmi_exit(void)
 716{
 717        if (using_nmi) {
 718                exit_sysfs();
 719#ifdef CONFIG_SMP
 720                unregister_cpu_notifier(&oprofile_cpu_nb);
 721#endif
 722        }
 723        if (model->exit)
 724                model->exit();
 725}
 726