linux/arch/x86/kernel/cpu/perf_event_amd_ibs.c
<<
>>
Prefs
   1/*
   2 * Performance events - AMD IBS
   3 *
   4 *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
   5 *
   6 *  For licencing details see kernel-base/COPYING
   7 */
   8
   9#include <linux/perf_event.h>
  10#include <linux/module.h>
  11#include <linux/pci.h>
  12#include <linux/ptrace.h>
  13#include <linux/syscore_ops.h>
  14
  15#include <asm/apic.h>
  16
  17#include "perf_event.h"
  18
  19static u32 ibs_caps;
  20
  21#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  22
  23#include <linux/kprobes.h>
  24#include <linux/hardirq.h>
  25
  26#include <asm/nmi.h>
  27
  28#define IBS_FETCH_CONFIG_MASK   (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
  29#define IBS_OP_CONFIG_MASK      IBS_OP_MAX_CNT
  30
  31enum ibs_states {
  32        IBS_ENABLED     = 0,
  33        IBS_STARTED     = 1,
  34        IBS_STOPPING    = 2,
  35
  36        IBS_MAX_STATES,
  37};
  38
  39struct cpu_perf_ibs {
  40        struct perf_event       *event;
  41        unsigned long           state[BITS_TO_LONGS(IBS_MAX_STATES)];
  42};
  43
  44struct perf_ibs {
  45        struct pmu                      pmu;
  46        unsigned int                    msr;
  47        u64                             config_mask;
  48        u64                             cnt_mask;
  49        u64                             enable_mask;
  50        u64                             valid_mask;
  51        u64                             max_period;
  52        unsigned long                   offset_mask[1];
  53        int                             offset_max;
  54        struct cpu_perf_ibs __percpu    *pcpu;
  55
  56        struct attribute                **format_attrs;
  57        struct attribute_group          format_group;
  58        const struct attribute_group    *attr_groups[2];
  59
  60        u64                             (*get_count)(u64 config);
  61};
  62
  63struct perf_ibs_data {
  64        u32             size;
  65        union {
  66                u32     data[0];        /* data buffer starts here */
  67                u32     caps;
  68        };
  69        u64             regs[MSR_AMD64_IBS_REG_COUNT_MAX];
  70};
  71
  72static int
  73perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
  74{
  75        s64 left = local64_read(&hwc->period_left);
  76        s64 period = hwc->sample_period;
  77        int overflow = 0;
  78
  79        /*
  80         * If we are way outside a reasonable range then just skip forward:
  81         */
  82        if (unlikely(left <= -period)) {
  83                left = period;
  84                local64_set(&hwc->period_left, left);
  85                hwc->last_period = period;
  86                overflow = 1;
  87        }
  88
  89        if (unlikely(left < (s64)min)) {
  90                left += period;
  91                local64_set(&hwc->period_left, left);
  92                hwc->last_period = period;
  93                overflow = 1;
  94        }
  95
  96        /*
  97         * If the hw period that triggers the sw overflow is too short
  98         * we might hit the irq handler. This biases the results.
  99         * Thus we shorten the next-to-last period and set the last
 100         * period to the max period.
 101         */
 102        if (left > max) {
 103                left -= max;
 104                if (left > max)
 105                        left = max;
 106                else if (left < min)
 107                        left = min;
 108        }
 109
 110        *hw_period = (u64)left;
 111
 112        return overflow;
 113}
 114
 115static  int
 116perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
 117{
 118        struct hw_perf_event *hwc = &event->hw;
 119        int shift = 64 - width;
 120        u64 prev_raw_count;
 121        u64 delta;
 122
 123        /*
 124         * Careful: an NMI might modify the previous event value.
 125         *
 126         * Our tactic to handle this is to first atomically read and
 127         * exchange a new raw count - then add that new-prev delta
 128         * count to the generic event atomically:
 129         */
 130        prev_raw_count = local64_read(&hwc->prev_count);
 131        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 132                                        new_raw_count) != prev_raw_count)
 133                return 0;
 134
 135        /*
 136         * Now we have the new raw value and have updated the prev
 137         * timestamp already. We can now calculate the elapsed delta
 138         * (event-)time and add that to the generic event.
 139         *
 140         * Careful, not all hw sign-extends above the physical width
 141         * of the count.
 142         */
 143        delta = (new_raw_count << shift) - (prev_raw_count << shift);
 144        delta >>= shift;
 145
 146        local64_add(delta, &event->count);
 147        local64_sub(delta, &hwc->period_left);
 148
 149        return 1;
 150}
 151
 152static struct perf_ibs perf_ibs_fetch;
 153static struct perf_ibs perf_ibs_op;
 154
 155static struct perf_ibs *get_ibs_pmu(int type)
 156{
 157        if (perf_ibs_fetch.pmu.type == type)
 158                return &perf_ibs_fetch;
 159        if (perf_ibs_op.pmu.type == type)
 160                return &perf_ibs_op;
 161        return NULL;
 162}
 163
 164/*
 165 * Use IBS for precise event sampling:
 166 *
 167 *  perf record -a -e cpu-cycles:p ...    # use ibs op counting cycle count
 168 *  perf record -a -e r076:p ...          # same as -e cpu-cycles:p
 169 *  perf record -a -e r0C1:p ...          # use ibs op counting micro-ops
 170 *
 171 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
 172 * MSRC001_1033) is used to select either cycle or micro-ops counting
 173 * mode.
 174 *
 175 * The rip of IBS samples has skid 0. Thus, IBS supports precise
 176 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
 177 * rip is invalid when IBS was not able to record the rip correctly.
 178 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
 179 *
 180 */
 181static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
 182{
 183        switch (event->attr.precise_ip) {
 184        case 0:
 185                return -ENOENT;
 186        case 1:
 187        case 2:
 188                break;
 189        default:
 190                return -EOPNOTSUPP;
 191        }
 192
 193        switch (event->attr.type) {
 194        case PERF_TYPE_HARDWARE:
 195                switch (event->attr.config) {
 196                case PERF_COUNT_HW_CPU_CYCLES:
 197                        *config = 0;
 198                        return 0;
 199                }
 200                break;
 201        case PERF_TYPE_RAW:
 202                switch (event->attr.config) {
 203                case 0x0076:
 204                        *config = 0;
 205                        return 0;
 206                case 0x00C1:
 207                        *config = IBS_OP_CNT_CTL;
 208                        return 0;
 209                }
 210                break;
 211        default:
 212                return -ENOENT;
 213        }
 214
 215        return -EOPNOTSUPP;
 216}
 217
 218static const struct perf_event_attr ibs_notsupp = {
 219        .exclude_user   = 1,
 220        .exclude_kernel = 1,
 221        .exclude_hv     = 1,
 222        .exclude_idle   = 1,
 223        .exclude_host   = 1,
 224        .exclude_guest  = 1,
 225};
 226
 227static int perf_ibs_init(struct perf_event *event)
 228{
 229        struct hw_perf_event *hwc = &event->hw;
 230        struct perf_ibs *perf_ibs;
 231        u64 max_cnt, config;
 232        int ret;
 233
 234        perf_ibs = get_ibs_pmu(event->attr.type);
 235        if (perf_ibs) {
 236                config = event->attr.config;
 237        } else {
 238                perf_ibs = &perf_ibs_op;
 239                ret = perf_ibs_precise_event(event, &config);
 240                if (ret)
 241                        return ret;
 242        }
 243
 244        if (event->pmu != &perf_ibs->pmu)
 245                return -ENOENT;
 246
 247        if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
 248                return -EINVAL;
 249
 250        if (config & ~perf_ibs->config_mask)
 251                return -EINVAL;
 252
 253        if (hwc->sample_period) {
 254                if (config & perf_ibs->cnt_mask)
 255                        /* raw max_cnt may not be set */
 256                        return -EINVAL;
 257                if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
 258                        /*
 259                         * lower 4 bits can not be set in ibs max cnt,
 260                         * but allowing it in case we adjust the
 261                         * sample period to set a frequency.
 262                         */
 263                        return -EINVAL;
 264                hwc->sample_period &= ~0x0FULL;
 265                if (!hwc->sample_period)
 266                        hwc->sample_period = 0x10;
 267        } else {
 268                max_cnt = config & perf_ibs->cnt_mask;
 269                config &= ~perf_ibs->cnt_mask;
 270                event->attr.sample_period = max_cnt << 4;
 271                hwc->sample_period = event->attr.sample_period;
 272        }
 273
 274        if (!hwc->sample_period)
 275                return -EINVAL;
 276
 277        /*
 278         * If we modify hwc->sample_period, we also need to update
 279         * hwc->last_period and hwc->period_left.
 280         */
 281        hwc->last_period = hwc->sample_period;
 282        local64_set(&hwc->period_left, hwc->sample_period);
 283
 284        hwc->config_base = perf_ibs->msr;
 285        hwc->config = config;
 286
 287        return 0;
 288}
 289
 290static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
 291                               struct hw_perf_event *hwc, u64 *period)
 292{
 293        int overflow;
 294
 295        /* ignore lower 4 bits in min count: */
 296        overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
 297        local64_set(&hwc->prev_count, 0);
 298
 299        return overflow;
 300}
 301
 302static u64 get_ibs_fetch_count(u64 config)
 303{
 304        return (config & IBS_FETCH_CNT) >> 12;
 305}
 306
 307static u64 get_ibs_op_count(u64 config)
 308{
 309        u64 count = 0;
 310
 311        if (config & IBS_OP_VAL)
 312                count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
 313
 314        if (ibs_caps & IBS_CAPS_RDWROPCNT)
 315                count += (config & IBS_OP_CUR_CNT) >> 32;
 316
 317        return count;
 318}
 319
 320static void
 321perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
 322                      u64 *config)
 323{
 324        u64 count = perf_ibs->get_count(*config);
 325
 326        /*
 327         * Set width to 64 since we do not overflow on max width but
 328         * instead on max count. In perf_ibs_set_period() we clear
 329         * prev count manually on overflow.
 330         */
 331        while (!perf_event_try_update(event, count, 64)) {
 332                rdmsrl(event->hw.config_base, *config);
 333                count = perf_ibs->get_count(*config);
 334        }
 335}
 336
 337static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
 338                                         struct hw_perf_event *hwc, u64 config)
 339{
 340        wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
 341}
 342
 343/*
 344 * Erratum #420 Instruction-Based Sampling Engine May Generate
 345 * Interrupt that Cannot Be Cleared:
 346 *
 347 * Must clear counter mask first, then clear the enable bit. See
 348 * Revision Guide for AMD Family 10h Processors, Publication #41322.
 349 */
 350static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
 351                                          struct hw_perf_event *hwc, u64 config)
 352{
 353        config &= ~perf_ibs->cnt_mask;
 354        wrmsrl(hwc->config_base, config);
 355        config &= ~perf_ibs->enable_mask;
 356        wrmsrl(hwc->config_base, config);
 357}
 358
 359/*
 360 * We cannot restore the ibs pmu state, so we always needs to update
 361 * the event while stopping it and then reset the state when starting
 362 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
 363 * perf_ibs_start()/perf_ibs_stop() and instead always do it.
 364 */
 365static void perf_ibs_start(struct perf_event *event, int flags)
 366{
 367        struct hw_perf_event *hwc = &event->hw;
 368        struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 369        struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 370        u64 period;
 371
 372        if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
 373                return;
 374
 375        WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 376        hwc->state = 0;
 377
 378        perf_ibs_set_period(perf_ibs, hwc, &period);
 379        set_bit(IBS_STARTED, pcpu->state);
 380        perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 381
 382        perf_event_update_userpage(event);
 383}
 384
 385static void perf_ibs_stop(struct perf_event *event, int flags)
 386{
 387        struct hw_perf_event *hwc = &event->hw;
 388        struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 389        struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 390        u64 config;
 391        int stopping;
 392
 393        stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
 394
 395        if (!stopping && (hwc->state & PERF_HES_UPTODATE))
 396                return;
 397
 398        rdmsrl(hwc->config_base, config);
 399
 400        if (stopping) {
 401                set_bit(IBS_STOPPING, pcpu->state);
 402                perf_ibs_disable_event(perf_ibs, hwc, config);
 403                WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 404                hwc->state |= PERF_HES_STOPPED;
 405        }
 406
 407        if (hwc->state & PERF_HES_UPTODATE)
 408                return;
 409
 410        /*
 411         * Clear valid bit to not count rollovers on update, rollovers
 412         * are only updated in the irq handler.
 413         */
 414        config &= ~perf_ibs->valid_mask;
 415
 416        perf_ibs_event_update(perf_ibs, event, &config);
 417        hwc->state |= PERF_HES_UPTODATE;
 418}
 419
 420static int perf_ibs_add(struct perf_event *event, int flags)
 421{
 422        struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 423        struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 424
 425        if (test_and_set_bit(IBS_ENABLED, pcpu->state))
 426                return -ENOSPC;
 427
 428        event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 429
 430        pcpu->event = event;
 431
 432        if (flags & PERF_EF_START)
 433                perf_ibs_start(event, PERF_EF_RELOAD);
 434
 435        return 0;
 436}
 437
 438static void perf_ibs_del(struct perf_event *event, int flags)
 439{
 440        struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 441        struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 442
 443        if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
 444                return;
 445
 446        perf_ibs_stop(event, PERF_EF_UPDATE);
 447
 448        pcpu->event = NULL;
 449
 450        perf_event_update_userpage(event);
 451}
 452
 453static void perf_ibs_read(struct perf_event *event) { }
 454
 455PMU_FORMAT_ATTR(rand_en,        "config:57");
 456PMU_FORMAT_ATTR(cnt_ctl,        "config:19");
 457
 458static struct attribute *ibs_fetch_format_attrs[] = {
 459        &format_attr_rand_en.attr,
 460        NULL,
 461};
 462
 463static struct attribute *ibs_op_format_attrs[] = {
 464        NULL,   /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
 465        NULL,
 466};
 467
 468static struct perf_ibs perf_ibs_fetch = {
 469        .pmu = {
 470                .task_ctx_nr    = perf_invalid_context,
 471
 472                .event_init     = perf_ibs_init,
 473                .add            = perf_ibs_add,
 474                .del            = perf_ibs_del,
 475                .start          = perf_ibs_start,
 476                .stop           = perf_ibs_stop,
 477                .read           = perf_ibs_read,
 478        },
 479        .msr                    = MSR_AMD64_IBSFETCHCTL,
 480        .config_mask            = IBS_FETCH_CONFIG_MASK,
 481        .cnt_mask               = IBS_FETCH_MAX_CNT,
 482        .enable_mask            = IBS_FETCH_ENABLE,
 483        .valid_mask             = IBS_FETCH_VAL,
 484        .max_period             = IBS_FETCH_MAX_CNT << 4,
 485        .offset_mask            = { MSR_AMD64_IBSFETCH_REG_MASK },
 486        .offset_max             = MSR_AMD64_IBSFETCH_REG_COUNT,
 487        .format_attrs           = ibs_fetch_format_attrs,
 488
 489        .get_count              = get_ibs_fetch_count,
 490};
 491
 492static struct perf_ibs perf_ibs_op = {
 493        .pmu = {
 494                .task_ctx_nr    = perf_invalid_context,
 495
 496                .event_init     = perf_ibs_init,
 497                .add            = perf_ibs_add,
 498                .del            = perf_ibs_del,
 499                .start          = perf_ibs_start,
 500                .stop           = perf_ibs_stop,
 501                .read           = perf_ibs_read,
 502        },
 503        .msr                    = MSR_AMD64_IBSOPCTL,
 504        .config_mask            = IBS_OP_CONFIG_MASK,
 505        .cnt_mask               = IBS_OP_MAX_CNT,
 506        .enable_mask            = IBS_OP_ENABLE,
 507        .valid_mask             = IBS_OP_VAL,
 508        .max_period             = IBS_OP_MAX_CNT << 4,
 509        .offset_mask            = { MSR_AMD64_IBSOP_REG_MASK },
 510        .offset_max             = MSR_AMD64_IBSOP_REG_COUNT,
 511        .format_attrs           = ibs_op_format_attrs,
 512
 513        .get_count              = get_ibs_op_count,
 514};
 515
 516static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 517{
 518        struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 519        struct perf_event *event = pcpu->event;
 520        struct hw_perf_event *hwc = &event->hw;
 521        struct perf_sample_data data;
 522        struct perf_raw_record raw;
 523        struct pt_regs regs;
 524        struct perf_ibs_data ibs_data;
 525        int offset, size, check_rip, offset_max, throttle = 0;
 526        unsigned int msr;
 527        u64 *buf, *config, period;
 528
 529        if (!test_bit(IBS_STARTED, pcpu->state)) {
 530                /*
 531                 * Catch spurious interrupts after stopping IBS: After
 532                 * disabling IBS there could be still incoming NMIs
 533                 * with samples that even have the valid bit cleared.
 534                 * Mark all this NMIs as handled.
 535                 */
 536                return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
 537        }
 538
 539        msr = hwc->config_base;
 540        buf = ibs_data.regs;
 541        rdmsrl(msr, *buf);
 542        if (!(*buf++ & perf_ibs->valid_mask))
 543                return 0;
 544
 545        config = &ibs_data.regs[0];
 546        perf_ibs_event_update(perf_ibs, event, config);
 547        perf_sample_data_init(&data, 0, hwc->last_period);
 548        if (!perf_ibs_set_period(perf_ibs, hwc, &period))
 549                goto out;       /* no sw counter overflow */
 550
 551        ibs_data.caps = ibs_caps;
 552        size = 1;
 553        offset = 1;
 554        check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
 555        if (event->attr.sample_type & PERF_SAMPLE_RAW)
 556                offset_max = perf_ibs->offset_max;
 557        else if (check_rip)
 558                offset_max = 2;
 559        else
 560                offset_max = 1;
 561        do {
 562                rdmsrl(msr + offset, *buf++);
 563                size++;
 564                offset = find_next_bit(perf_ibs->offset_mask,
 565                                       perf_ibs->offset_max,
 566                                       offset + 1);
 567        } while (offset < offset_max);
 568        if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 569                /*
 570                 * Read IbsBrTarget and IbsOpData4 separately
 571                 * depending on their availability.
 572                 * Can't add to offset_max as they are staggered
 573                 */
 574                if (ibs_caps & IBS_CAPS_BRNTRGT) {
 575                        rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++);
 576                        size++;
 577                }
 578                if (ibs_caps & IBS_CAPS_OPDATA4) {
 579                        rdmsrl(MSR_AMD64_IBSOPDATA4, *buf++);
 580                        size++;
 581                }
 582        }
 583        ibs_data.size = sizeof(u64) * size;
 584
 585        regs = *iregs;
 586        if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
 587                regs.flags &= ~PERF_EFLAGS_EXACT;
 588        } else {
 589                set_linear_ip(&regs, ibs_data.regs[1]);
 590                regs.flags |= PERF_EFLAGS_EXACT;
 591        }
 592
 593        if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 594                raw.size = sizeof(u32) + ibs_data.size;
 595                raw.data = ibs_data.data;
 596                data.raw = &raw;
 597        }
 598
 599        throttle = perf_event_overflow(event, &data, &regs);
 600out:
 601        if (throttle)
 602                perf_ibs_disable_event(perf_ibs, hwc, *config);
 603        else
 604                perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 605
 606        perf_event_update_userpage(event);
 607
 608        return 1;
 609}
 610
 611static int
 612perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 613{
 614        int handled = 0;
 615
 616        handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
 617        handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
 618
 619        if (handled)
 620                inc_irq_stat(apic_perf_irqs);
 621
 622        return handled;
 623}
 624NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
 625
 626static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 627{
 628        struct cpu_perf_ibs __percpu *pcpu;
 629        int ret;
 630
 631        pcpu = alloc_percpu(struct cpu_perf_ibs);
 632        if (!pcpu)
 633                return -ENOMEM;
 634
 635        perf_ibs->pcpu = pcpu;
 636
 637        /* register attributes */
 638        if (perf_ibs->format_attrs[0]) {
 639                memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
 640                perf_ibs->format_group.name     = "format";
 641                perf_ibs->format_group.attrs    = perf_ibs->format_attrs;
 642
 643                memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
 644                perf_ibs->attr_groups[0]        = &perf_ibs->format_group;
 645                perf_ibs->pmu.attr_groups       = perf_ibs->attr_groups;
 646        }
 647
 648        ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
 649        if (ret) {
 650                perf_ibs->pcpu = NULL;
 651                free_percpu(pcpu);
 652        }
 653
 654        return ret;
 655}
 656
 657static __init int perf_event_ibs_init(void)
 658{
 659        struct attribute **attr = ibs_op_format_attrs;
 660
 661        if (!ibs_caps)
 662                return -ENODEV; /* ibs not supported by the cpu */
 663
 664        perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
 665
 666        if (ibs_caps & IBS_CAPS_OPCNT) {
 667                perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
 668                *attr++ = &format_attr_cnt_ctl.attr;
 669        }
 670        perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
 671
 672        register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
 673        printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
 674
 675        return 0;
 676}
 677
 678#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
 679
 680static __init int perf_event_ibs_init(void) { return 0; }
 681
 682#endif
 683
 684/* IBS - apic initialization, for perf and oprofile */
 685
 686static __init u32 __get_ibs_caps(void)
 687{
 688        u32 caps;
 689        unsigned int max_level;
 690
 691        if (!boot_cpu_has(X86_FEATURE_IBS))
 692                return 0;
 693
 694        /* check IBS cpuid feature flags */
 695        max_level = cpuid_eax(0x80000000);
 696        if (max_level < IBS_CPUID_FEATURES)
 697                return IBS_CAPS_DEFAULT;
 698
 699        caps = cpuid_eax(IBS_CPUID_FEATURES);
 700        if (!(caps & IBS_CAPS_AVAIL))
 701                /* cpuid flags not valid */
 702                return IBS_CAPS_DEFAULT;
 703
 704        return caps;
 705}
 706
 707u32 get_ibs_caps(void)
 708{
 709        return ibs_caps;
 710}
 711
 712EXPORT_SYMBOL(get_ibs_caps);
 713
 714static inline int get_eilvt(int offset)
 715{
 716        return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
 717}
 718
 719static inline int put_eilvt(int offset)
 720{
 721        return !setup_APIC_eilvt(offset, 0, 0, 1);
 722}
 723
 724/*
 725 * Check and reserve APIC extended interrupt LVT offset for IBS if available.
 726 */
 727static inline int ibs_eilvt_valid(void)
 728{
 729        int offset;
 730        u64 val;
 731        int valid = 0;
 732
 733        preempt_disable();
 734
 735        rdmsrl(MSR_AMD64_IBSCTL, val);
 736        offset = val & IBSCTL_LVT_OFFSET_MASK;
 737
 738        if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
 739                pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
 740                       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
 741                goto out;
 742        }
 743
 744        if (!get_eilvt(offset)) {
 745                pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
 746                       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
 747                goto out;
 748        }
 749
 750        valid = 1;
 751out:
 752        preempt_enable();
 753
 754        return valid;
 755}
 756
 757static int setup_ibs_ctl(int ibs_eilvt_off)
 758{
 759        struct pci_dev *cpu_cfg;
 760        int nodes;
 761        u32 value = 0;
 762
 763        nodes = 0;
 764        cpu_cfg = NULL;
 765        do {
 766                cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
 767                                         PCI_DEVICE_ID_AMD_10H_NB_MISC,
 768                                         cpu_cfg);
 769                if (!cpu_cfg)
 770                        break;
 771                ++nodes;
 772                pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
 773                                       | IBSCTL_LVT_OFFSET_VALID);
 774                pci_read_config_dword(cpu_cfg, IBSCTL, &value);
 775                if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
 776                        pci_dev_put(cpu_cfg);
 777                        printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
 778                               "IBSCTL = 0x%08x\n", value);
 779                        return -EINVAL;
 780                }
 781        } while (1);
 782
 783        if (!nodes) {
 784                printk(KERN_DEBUG "No CPU node configured for IBS\n");
 785                return -ENODEV;
 786        }
 787
 788        return 0;
 789}
 790
 791/*
 792 * This runs only on the current cpu. We try to find an LVT offset and
 793 * setup the local APIC. For this we must disable preemption. On
 794 * success we initialize all nodes with this offset. This updates then
 795 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
 796 * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
 797 * is using the new offset.
 798 */
 799static int force_ibs_eilvt_setup(void)
 800{
 801        int offset;
 802        int ret;
 803
 804        preempt_disable();
 805        /* find the next free available EILVT entry, skip offset 0 */
 806        for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
 807                if (get_eilvt(offset))
 808                        break;
 809        }
 810        preempt_enable();
 811
 812        if (offset == APIC_EILVT_NR_MAX) {
 813                printk(KERN_DEBUG "No EILVT entry available\n");
 814                return -EBUSY;
 815        }
 816
 817        ret = setup_ibs_ctl(offset);
 818        if (ret)
 819                goto out;
 820
 821        if (!ibs_eilvt_valid()) {
 822                ret = -EFAULT;
 823                goto out;
 824        }
 825
 826        pr_info("IBS: LVT offset %d assigned\n", offset);
 827
 828        return 0;
 829out:
 830        preempt_disable();
 831        put_eilvt(offset);
 832        preempt_enable();
 833        return ret;
 834}
 835
 836static void ibs_eilvt_setup(void)
 837{
 838        /*
 839         * Force LVT offset assignment for family 10h: The offsets are
 840         * not assigned by the BIOS for this family, so the OS is
 841         * responsible for doing it. If the OS assignment fails, fall
 842         * back to BIOS settings and try to setup this.
 843         */
 844        if (boot_cpu_data.x86 == 0x10)
 845                force_ibs_eilvt_setup();
 846}
 847
 848static inline int get_ibs_lvt_offset(void)
 849{
 850        u64 val;
 851
 852        rdmsrl(MSR_AMD64_IBSCTL, val);
 853        if (!(val & IBSCTL_LVT_OFFSET_VALID))
 854                return -EINVAL;
 855
 856        return val & IBSCTL_LVT_OFFSET_MASK;
 857}
 858
 859static void setup_APIC_ibs(void *dummy)
 860{
 861        int offset;
 862
 863        offset = get_ibs_lvt_offset();
 864        if (offset < 0)
 865                goto failed;
 866
 867        if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
 868                return;
 869failed:
 870        pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
 871                smp_processor_id());
 872}
 873
 874static void clear_APIC_ibs(void *dummy)
 875{
 876        int offset;
 877
 878        offset = get_ibs_lvt_offset();
 879        if (offset >= 0)
 880                setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
 881}
 882
 883#ifdef CONFIG_PM
 884
 885static int perf_ibs_suspend(void)
 886{
 887        clear_APIC_ibs(NULL);
 888        return 0;
 889}
 890
 891static void perf_ibs_resume(void)
 892{
 893        ibs_eilvt_setup();
 894        setup_APIC_ibs(NULL);
 895}
 896
 897static struct syscore_ops perf_ibs_syscore_ops = {
 898        .resume         = perf_ibs_resume,
 899        .suspend        = perf_ibs_suspend,
 900};
 901
 902static void perf_ibs_pm_init(void)
 903{
 904        register_syscore_ops(&perf_ibs_syscore_ops);
 905}
 906
 907#else
 908
 909static inline void perf_ibs_pm_init(void) { }
 910
 911#endif
 912
 913static int
 914perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 915{
 916        switch (action & ~CPU_TASKS_FROZEN) {
 917        case CPU_STARTING:
 918                setup_APIC_ibs(NULL);
 919                break;
 920        case CPU_DYING:
 921                clear_APIC_ibs(NULL);
 922                break;
 923        default:
 924                break;
 925        }
 926
 927        return NOTIFY_OK;
 928}
 929
 930static __init int amd_ibs_init(void)
 931{
 932        u32 caps;
 933        int ret = -EINVAL;
 934
 935        caps = __get_ibs_caps();
 936        if (!caps)
 937                return -ENODEV; /* ibs not supported by the cpu */
 938
 939        ibs_eilvt_setup();
 940
 941        if (!ibs_eilvt_valid())
 942                goto out;
 943
 944        perf_ibs_pm_init();
 945        cpu_notifier_register_begin();
 946        ibs_caps = caps;
 947        /* make ibs_caps visible to other cpus: */
 948        smp_mb();
 949        smp_call_function(setup_APIC_ibs, NULL, 1);
 950        __perf_cpu_notifier(perf_ibs_cpu_notifier);
 951        cpu_notifier_register_done();
 952
 953        ret = perf_event_ibs_init();
 954out:
 955        if (ret)
 956                pr_err("Failed to setup IBS, %d\n", ret);
 957        return ret;
 958}
 959
 960/* Since we need the pci subsystem to init ibs we can't do this earlier: */
 961device_initcall(amd_ibs_init);
 962