linux/arch/x86/kernel/cpu/perf_event_amd_ibs.c
<<
>>
Prefs
   1/*
   2 * Performance events - AMD IBS
   3 *
   4 *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
   5 *
   6 *  For licencing details see kernel-base/COPYING
   7 */
   8
   9#include <linux/perf_event.h>
  10#include <linux/module.h>
  11#include <linux/pci.h>
  12#include <linux/ptrace.h>
  13
  14#include <asm/apic.h>
  15
  16#include "perf_event.h"
  17
  18static u32 ibs_caps;
  19
  20#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  21
  22#include <linux/kprobes.h>
  23#include <linux/hardirq.h>
  24
  25#include <asm/nmi.h>
  26
  27#define IBS_FETCH_CONFIG_MASK   (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
  28#define IBS_OP_CONFIG_MASK      IBS_OP_MAX_CNT
  29
  30enum ibs_states {
  31        IBS_ENABLED     = 0,
  32        IBS_STARTED     = 1,
  33        IBS_STOPPING    = 2,
  34
  35        IBS_MAX_STATES,
  36};
  37
  38struct cpu_perf_ibs {
  39        struct perf_event       *event;
  40        unsigned long           state[BITS_TO_LONGS(IBS_MAX_STATES)];
  41};
  42
  43struct perf_ibs {
  44        struct pmu                      pmu;
  45        unsigned int                    msr;
  46        u64                             config_mask;
  47        u64                             cnt_mask;
  48        u64                             enable_mask;
  49        u64                             valid_mask;
  50        u64                             max_period;
  51        unsigned long                   offset_mask[1];
  52        int                             offset_max;
  53        struct cpu_perf_ibs __percpu    *pcpu;
  54
  55        struct attribute                **format_attrs;
  56        struct attribute_group          format_group;
  57        const struct attribute_group    *attr_groups[2];
  58
  59        u64                             (*get_count)(u64 config);
  60};
  61
  62struct perf_ibs_data {
  63        u32             size;
  64        union {
  65                u32     data[0];        /* data buffer starts here */
  66                u32     caps;
  67        };
  68        u64             regs[MSR_AMD64_IBS_REG_COUNT_MAX];
  69};
  70
  71static int
  72perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
  73{
  74        s64 left = local64_read(&hwc->period_left);
  75        s64 period = hwc->sample_period;
  76        int overflow = 0;
  77
  78        /*
  79         * If we are way outside a reasonable range then just skip forward:
  80         */
  81        if (unlikely(left <= -period)) {
  82                left = period;
  83                local64_set(&hwc->period_left, left);
  84                hwc->last_period = period;
  85                overflow = 1;
  86        }
  87
  88        if (unlikely(left < (s64)min)) {
  89                left += period;
  90                local64_set(&hwc->period_left, left);
  91                hwc->last_period = period;
  92                overflow = 1;
  93        }
  94
  95        /*
  96         * If the hw period that triggers the sw overflow is too short
  97         * we might hit the irq handler. This biases the results.
  98         * Thus we shorten the next-to-last period and set the last
  99         * period to the max period.
 100         */
 101        if (left > max) {
 102                left -= max;
 103                if (left > max)
 104                        left = max;
 105                else if (left < min)
 106                        left = min;
 107        }
 108
 109        *hw_period = (u64)left;
 110
 111        return overflow;
 112}
 113
 114static  int
 115perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
 116{
 117        struct hw_perf_event *hwc = &event->hw;
 118        int shift = 64 - width;
 119        u64 prev_raw_count;
 120        u64 delta;
 121
 122        /*
 123         * Careful: an NMI might modify the previous event value.
 124         *
 125         * Our tactic to handle this is to first atomically read and
 126         * exchange a new raw count - then add that new-prev delta
 127         * count to the generic event atomically:
 128         */
 129        prev_raw_count = local64_read(&hwc->prev_count);
 130        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 131                                        new_raw_count) != prev_raw_count)
 132                return 0;
 133
 134        /*
 135         * Now we have the new raw value and have updated the prev
 136         * timestamp already. We can now calculate the elapsed delta
 137         * (event-)time and add that to the generic event.
 138         *
 139         * Careful, not all hw sign-extends above the physical width
 140         * of the count.
 141         */
 142        delta = (new_raw_count << shift) - (prev_raw_count << shift);
 143        delta >>= shift;
 144
 145        local64_add(delta, &event->count);
 146        local64_sub(delta, &hwc->period_left);
 147
 148        return 1;
 149}
 150
 151static struct perf_ibs perf_ibs_fetch;
 152static struct perf_ibs perf_ibs_op;
 153
 154static struct perf_ibs *get_ibs_pmu(int type)
 155{
 156        if (perf_ibs_fetch.pmu.type == type)
 157                return &perf_ibs_fetch;
 158        if (perf_ibs_op.pmu.type == type)
 159                return &perf_ibs_op;
 160        return NULL;
 161}
 162
 163/*
 164 * Use IBS for precise event sampling:
 165 *
 166 *  perf record -a -e cpu-cycles:p ...    # use ibs op counting cycle count
 167 *  perf record -a -e r076:p ...          # same as -e cpu-cycles:p
 168 *  perf record -a -e r0C1:p ...          # use ibs op counting micro-ops
 169 *
 170 * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
 171 * MSRC001_1033) is used to select either cycle or micro-ops counting
 172 * mode.
 173 *
 174 * The rip of IBS samples has skid 0. Thus, IBS supports precise
 175 * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
 176 * rip is invalid when IBS was not able to record the rip correctly.
 177 * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
 178 *
 179 */
 180static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
 181{
 182        switch (event->attr.precise_ip) {
 183        case 0:
 184                return -ENOENT;
 185        case 1:
 186        case 2:
 187                break;
 188        default:
 189                return -EOPNOTSUPP;
 190        }
 191
 192        switch (event->attr.type) {
 193        case PERF_TYPE_HARDWARE:
 194                switch (event->attr.config) {
 195                case PERF_COUNT_HW_CPU_CYCLES:
 196                        *config = 0;
 197                        return 0;
 198                }
 199                break;
 200        case PERF_TYPE_RAW:
 201                switch (event->attr.config) {
 202                case 0x0076:
 203                        *config = 0;
 204                        return 0;
 205                case 0x00C1:
 206                        *config = IBS_OP_CNT_CTL;
 207                        return 0;
 208                }
 209                break;
 210        default:
 211                return -ENOENT;
 212        }
 213
 214        return -EOPNOTSUPP;
 215}
 216
 217static const struct perf_event_attr ibs_notsupp = {
 218        .exclude_user   = 1,
 219        .exclude_kernel = 1,
 220        .exclude_hv     = 1,
 221        .exclude_idle   = 1,
 222        .exclude_host   = 1,
 223        .exclude_guest  = 1,
 224};
 225
 226static int perf_ibs_init(struct perf_event *event)
 227{
 228        struct hw_perf_event *hwc = &event->hw;
 229        struct perf_ibs *perf_ibs;
 230        u64 max_cnt, config;
 231        int ret;
 232
 233        perf_ibs = get_ibs_pmu(event->attr.type);
 234        if (perf_ibs) {
 235                config = event->attr.config;
 236        } else {
 237                perf_ibs = &perf_ibs_op;
 238                ret = perf_ibs_precise_event(event, &config);
 239                if (ret)
 240                        return ret;
 241        }
 242
 243        if (event->pmu != &perf_ibs->pmu)
 244                return -ENOENT;
 245
 246        if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
 247                return -EINVAL;
 248
 249        if (config & ~perf_ibs->config_mask)
 250                return -EINVAL;
 251
 252        if (hwc->sample_period) {
 253                if (config & perf_ibs->cnt_mask)
 254                        /* raw max_cnt may not be set */
 255                        return -EINVAL;
 256                if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
 257                        /*
 258                         * lower 4 bits can not be set in ibs max cnt,
 259                         * but allowing it in case we adjust the
 260                         * sample period to set a frequency.
 261                         */
 262                        return -EINVAL;
 263                hwc->sample_period &= ~0x0FULL;
 264                if (!hwc->sample_period)
 265                        hwc->sample_period = 0x10;
 266        } else {
 267                max_cnt = config & perf_ibs->cnt_mask;
 268                config &= ~perf_ibs->cnt_mask;
 269                event->attr.sample_period = max_cnt << 4;
 270                hwc->sample_period = event->attr.sample_period;
 271        }
 272
 273        if (!hwc->sample_period)
 274                return -EINVAL;
 275
 276        /*
 277         * If we modify hwc->sample_period, we also need to update
 278         * hwc->last_period and hwc->period_left.
 279         */
 280        hwc->last_period = hwc->sample_period;
 281        local64_set(&hwc->period_left, hwc->sample_period);
 282
 283        hwc->config_base = perf_ibs->msr;
 284        hwc->config = config;
 285
 286        return 0;
 287}
 288
 289static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
 290                               struct hw_perf_event *hwc, u64 *period)
 291{
 292        int overflow;
 293
 294        /* ignore lower 4 bits in min count: */
 295        overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
 296        local64_set(&hwc->prev_count, 0);
 297
 298        return overflow;
 299}
 300
 301static u64 get_ibs_fetch_count(u64 config)
 302{
 303        return (config & IBS_FETCH_CNT) >> 12;
 304}
 305
 306static u64 get_ibs_op_count(u64 config)
 307{
 308        u64 count = 0;
 309
 310        if (config & IBS_OP_VAL)
 311                count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
 312
 313        if (ibs_caps & IBS_CAPS_RDWROPCNT)
 314                count += (config & IBS_OP_CUR_CNT) >> 32;
 315
 316        return count;
 317}
 318
 319static void
 320perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
 321                      u64 *config)
 322{
 323        u64 count = perf_ibs->get_count(*config);
 324
 325        /*
 326         * Set width to 64 since we do not overflow on max width but
 327         * instead on max count. In perf_ibs_set_period() we clear
 328         * prev count manually on overflow.
 329         */
 330        while (!perf_event_try_update(event, count, 64)) {
 331                rdmsrl(event->hw.config_base, *config);
 332                count = perf_ibs->get_count(*config);
 333        }
 334}
 335
 336static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
 337                                         struct hw_perf_event *hwc, u64 config)
 338{
 339        wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
 340}
 341
 342/*
 343 * Erratum #420 Instruction-Based Sampling Engine May Generate
 344 * Interrupt that Cannot Be Cleared:
 345 *
 346 * Must clear counter mask first, then clear the enable bit. See
 347 * Revision Guide for AMD Family 10h Processors, Publication #41322.
 348 */
 349static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
 350                                          struct hw_perf_event *hwc, u64 config)
 351{
 352        config &= ~perf_ibs->cnt_mask;
 353        wrmsrl(hwc->config_base, config);
 354        config &= ~perf_ibs->enable_mask;
 355        wrmsrl(hwc->config_base, config);
 356}
 357
 358/*
 359 * We cannot restore the ibs pmu state, so we always needs to update
 360 * the event while stopping it and then reset the state when starting
 361 * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
 362 * perf_ibs_start()/perf_ibs_stop() and instead always do it.
 363 */
 364static void perf_ibs_start(struct perf_event *event, int flags)
 365{
 366        struct hw_perf_event *hwc = &event->hw;
 367        struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 368        struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 369        u64 period;
 370
 371        if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
 372                return;
 373
 374        WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 375        hwc->state = 0;
 376
 377        perf_ibs_set_period(perf_ibs, hwc, &period);
 378        set_bit(IBS_STARTED, pcpu->state);
 379        perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 380
 381        perf_event_update_userpage(event);
 382}
 383
 384static void perf_ibs_stop(struct perf_event *event, int flags)
 385{
 386        struct hw_perf_event *hwc = &event->hw;
 387        struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 388        struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 389        u64 config;
 390        int stopping;
 391
 392        stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
 393
 394        if (!stopping && (hwc->state & PERF_HES_UPTODATE))
 395                return;
 396
 397        rdmsrl(hwc->config_base, config);
 398
 399        if (stopping) {
 400                set_bit(IBS_STOPPING, pcpu->state);
 401                perf_ibs_disable_event(perf_ibs, hwc, config);
 402                WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 403                hwc->state |= PERF_HES_STOPPED;
 404        }
 405
 406        if (hwc->state & PERF_HES_UPTODATE)
 407                return;
 408
 409        /*
 410         * Clear valid bit to not count rollovers on update, rollovers
 411         * are only updated in the irq handler.
 412         */
 413        config &= ~perf_ibs->valid_mask;
 414
 415        perf_ibs_event_update(perf_ibs, event, &config);
 416        hwc->state |= PERF_HES_UPTODATE;
 417}
 418
 419static int perf_ibs_add(struct perf_event *event, int flags)
 420{
 421        struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 422        struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 423
 424        if (test_and_set_bit(IBS_ENABLED, pcpu->state))
 425                return -ENOSPC;
 426
 427        event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 428
 429        pcpu->event = event;
 430
 431        if (flags & PERF_EF_START)
 432                perf_ibs_start(event, PERF_EF_RELOAD);
 433
 434        return 0;
 435}
 436
 437static void perf_ibs_del(struct perf_event *event, int flags)
 438{
 439        struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
 440        struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 441
 442        if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
 443                return;
 444
 445        perf_ibs_stop(event, PERF_EF_UPDATE);
 446
 447        pcpu->event = NULL;
 448
 449        perf_event_update_userpage(event);
 450}
 451
 452static void perf_ibs_read(struct perf_event *event) { }
 453
 454PMU_FORMAT_ATTR(rand_en,        "config:57");
 455PMU_FORMAT_ATTR(cnt_ctl,        "config:19");
 456
 457static struct attribute *ibs_fetch_format_attrs[] = {
 458        &format_attr_rand_en.attr,
 459        NULL,
 460};
 461
 462static struct attribute *ibs_op_format_attrs[] = {
 463        NULL,   /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
 464        NULL,
 465};
 466
 467static struct perf_ibs perf_ibs_fetch = {
 468        .pmu = {
 469                .task_ctx_nr    = perf_invalid_context,
 470
 471                .event_init     = perf_ibs_init,
 472                .add            = perf_ibs_add,
 473                .del            = perf_ibs_del,
 474                .start          = perf_ibs_start,
 475                .stop           = perf_ibs_stop,
 476                .read           = perf_ibs_read,
 477        },
 478        .msr                    = MSR_AMD64_IBSFETCHCTL,
 479        .config_mask            = IBS_FETCH_CONFIG_MASK,
 480        .cnt_mask               = IBS_FETCH_MAX_CNT,
 481        .enable_mask            = IBS_FETCH_ENABLE,
 482        .valid_mask             = IBS_FETCH_VAL,
 483        .max_period             = IBS_FETCH_MAX_CNT << 4,
 484        .offset_mask            = { MSR_AMD64_IBSFETCH_REG_MASK },
 485        .offset_max             = MSR_AMD64_IBSFETCH_REG_COUNT,
 486        .format_attrs           = ibs_fetch_format_attrs,
 487
 488        .get_count              = get_ibs_fetch_count,
 489};
 490
 491static struct perf_ibs perf_ibs_op = {
 492        .pmu = {
 493                .task_ctx_nr    = perf_invalid_context,
 494
 495                .event_init     = perf_ibs_init,
 496                .add            = perf_ibs_add,
 497                .del            = perf_ibs_del,
 498                .start          = perf_ibs_start,
 499                .stop           = perf_ibs_stop,
 500                .read           = perf_ibs_read,
 501        },
 502        .msr                    = MSR_AMD64_IBSOPCTL,
 503        .config_mask            = IBS_OP_CONFIG_MASK,
 504        .cnt_mask               = IBS_OP_MAX_CNT,
 505        .enable_mask            = IBS_OP_ENABLE,
 506        .valid_mask             = IBS_OP_VAL,
 507        .max_period             = IBS_OP_MAX_CNT << 4,
 508        .offset_mask            = { MSR_AMD64_IBSOP_REG_MASK },
 509        .offset_max             = MSR_AMD64_IBSOP_REG_COUNT,
 510        .format_attrs           = ibs_op_format_attrs,
 511
 512        .get_count              = get_ibs_op_count,
 513};
 514
 515static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 516{
 517        struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
 518        struct perf_event *event = pcpu->event;
 519        struct hw_perf_event *hwc = &event->hw;
 520        struct perf_sample_data data;
 521        struct perf_raw_record raw;
 522        struct pt_regs regs;
 523        struct perf_ibs_data ibs_data;
 524        int offset, size, check_rip, offset_max, throttle = 0;
 525        unsigned int msr;
 526        u64 *buf, *config, period;
 527
 528        if (!test_bit(IBS_STARTED, pcpu->state)) {
 529                /*
 530                 * Catch spurious interrupts after stopping IBS: After
 531                 * disabling IBS there could be still incoming NMIs
 532                 * with samples that even have the valid bit cleared.
 533                 * Mark all this NMIs as handled.
 534                 */
 535                return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
 536        }
 537
 538        msr = hwc->config_base;
 539        buf = ibs_data.regs;
 540        rdmsrl(msr, *buf);
 541        if (!(*buf++ & perf_ibs->valid_mask))
 542                return 0;
 543
 544        config = &ibs_data.regs[0];
 545        perf_ibs_event_update(perf_ibs, event, config);
 546        perf_sample_data_init(&data, 0, hwc->last_period);
 547        if (!perf_ibs_set_period(perf_ibs, hwc, &period))
 548                goto out;       /* no sw counter overflow */
 549
 550        ibs_data.caps = ibs_caps;
 551        size = 1;
 552        offset = 1;
 553        check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
 554        if (event->attr.sample_type & PERF_SAMPLE_RAW)
 555                offset_max = perf_ibs->offset_max;
 556        else if (check_rip)
 557                offset_max = 2;
 558        else
 559                offset_max = 1;
 560        do {
 561                rdmsrl(msr + offset, *buf++);
 562                size++;
 563                offset = find_next_bit(perf_ibs->offset_mask,
 564                                       perf_ibs->offset_max,
 565                                       offset + 1);
 566        } while (offset < offset_max);
 567        ibs_data.size = sizeof(u64) * size;
 568
 569        regs = *iregs;
 570        if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
 571                regs.flags &= ~PERF_EFLAGS_EXACT;
 572        } else {
 573                set_linear_ip(&regs, ibs_data.regs[1]);
 574                regs.flags |= PERF_EFLAGS_EXACT;
 575        }
 576
 577        if (event->attr.sample_type & PERF_SAMPLE_RAW) {
 578                raw.size = sizeof(u32) + ibs_data.size;
 579                raw.data = ibs_data.data;
 580                data.raw = &raw;
 581        }
 582
 583        throttle = perf_event_overflow(event, &data, &regs);
 584out:
 585        if (throttle)
 586                perf_ibs_disable_event(perf_ibs, hwc, *config);
 587        else
 588                perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
 589
 590        perf_event_update_userpage(event);
 591
 592        return 1;
 593}
 594
 595static int __kprobes
 596perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
 597{
 598        int handled = 0;
 599
 600        handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
 601        handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
 602
 603        if (handled)
 604                inc_irq_stat(apic_perf_irqs);
 605
 606        return handled;
 607}
 608
 609static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 610{
 611        struct cpu_perf_ibs __percpu *pcpu;
 612        int ret;
 613
 614        pcpu = alloc_percpu(struct cpu_perf_ibs);
 615        if (!pcpu)
 616                return -ENOMEM;
 617
 618        perf_ibs->pcpu = pcpu;
 619
 620        /* register attributes */
 621        if (perf_ibs->format_attrs[0]) {
 622                memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
 623                perf_ibs->format_group.name     = "format";
 624                perf_ibs->format_group.attrs    = perf_ibs->format_attrs;
 625
 626                memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
 627                perf_ibs->attr_groups[0]        = &perf_ibs->format_group;
 628                perf_ibs->pmu.attr_groups       = perf_ibs->attr_groups;
 629        }
 630
 631        ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
 632        if (ret) {
 633                perf_ibs->pcpu = NULL;
 634                free_percpu(pcpu);
 635        }
 636
 637        return ret;
 638}
 639
 640static __init int perf_event_ibs_init(void)
 641{
 642        struct attribute **attr = ibs_op_format_attrs;
 643
 644        if (!ibs_caps)
 645                return -ENODEV; /* ibs not supported by the cpu */
 646
 647        perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
 648
 649        if (ibs_caps & IBS_CAPS_OPCNT) {
 650                perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
 651                *attr++ = &format_attr_cnt_ctl.attr;
 652        }
 653        perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
 654
 655        register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
 656        printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
 657
 658        return 0;
 659}
 660
 661#else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
 662
 663static __init int perf_event_ibs_init(void) { return 0; }
 664
 665#endif
 666
 667/* IBS - apic initialization, for perf and oprofile */
 668
 669static __init u32 __get_ibs_caps(void)
 670{
 671        u32 caps;
 672        unsigned int max_level;
 673
 674        if (!boot_cpu_has(X86_FEATURE_IBS))
 675                return 0;
 676
 677        /* check IBS cpuid feature flags */
 678        max_level = cpuid_eax(0x80000000);
 679        if (max_level < IBS_CPUID_FEATURES)
 680                return IBS_CAPS_DEFAULT;
 681
 682        caps = cpuid_eax(IBS_CPUID_FEATURES);
 683        if (!(caps & IBS_CAPS_AVAIL))
 684                /* cpuid flags not valid */
 685                return IBS_CAPS_DEFAULT;
 686
 687        return caps;
 688}
 689
 690u32 get_ibs_caps(void)
 691{
 692        return ibs_caps;
 693}
 694
 695EXPORT_SYMBOL(get_ibs_caps);
 696
 697static inline int get_eilvt(int offset)
 698{
 699        return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
 700}
 701
 702static inline int put_eilvt(int offset)
 703{
 704        return !setup_APIC_eilvt(offset, 0, 0, 1);
 705}
 706
 707/*
 708 * Check and reserve APIC extended interrupt LVT offset for IBS if available.
 709 */
 710static inline int ibs_eilvt_valid(void)
 711{
 712        int offset;
 713        u64 val;
 714        int valid = 0;
 715
 716        preempt_disable();
 717
 718        rdmsrl(MSR_AMD64_IBSCTL, val);
 719        offset = val & IBSCTL_LVT_OFFSET_MASK;
 720
 721        if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
 722                pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
 723                       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
 724                goto out;
 725        }
 726
 727        if (!get_eilvt(offset)) {
 728                pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
 729                       smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
 730                goto out;
 731        }
 732
 733        valid = 1;
 734out:
 735        preempt_enable();
 736
 737        return valid;
 738}
 739
 740static int setup_ibs_ctl(int ibs_eilvt_off)
 741{
 742        struct pci_dev *cpu_cfg;
 743        int nodes;
 744        u32 value = 0;
 745
 746        nodes = 0;
 747        cpu_cfg = NULL;
 748        do {
 749                cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
 750                                         PCI_DEVICE_ID_AMD_10H_NB_MISC,
 751                                         cpu_cfg);
 752                if (!cpu_cfg)
 753                        break;
 754                ++nodes;
 755                pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
 756                                       | IBSCTL_LVT_OFFSET_VALID);
 757                pci_read_config_dword(cpu_cfg, IBSCTL, &value);
 758                if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
 759                        pci_dev_put(cpu_cfg);
 760                        printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
 761                               "IBSCTL = 0x%08x\n", value);
 762                        return -EINVAL;
 763                }
 764        } while (1);
 765
 766        if (!nodes) {
 767                printk(KERN_DEBUG "No CPU node configured for IBS\n");
 768                return -ENODEV;
 769        }
 770
 771        return 0;
 772}
 773
 774/*
 775 * This runs only on the current cpu. We try to find an LVT offset and
 776 * setup the local APIC. For this we must disable preemption. On
 777 * success we initialize all nodes with this offset. This updates then
 778 * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
 779 * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
 780 * is using the new offset.
 781 */
 782static int force_ibs_eilvt_setup(void)
 783{
 784        int offset;
 785        int ret;
 786
 787        preempt_disable();
 788        /* find the next free available EILVT entry, skip offset 0 */
 789        for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
 790                if (get_eilvt(offset))
 791                        break;
 792        }
 793        preempt_enable();
 794
 795        if (offset == APIC_EILVT_NR_MAX) {
 796                printk(KERN_DEBUG "No EILVT entry available\n");
 797                return -EBUSY;
 798        }
 799
 800        ret = setup_ibs_ctl(offset);
 801        if (ret)
 802                goto out;
 803
 804        if (!ibs_eilvt_valid()) {
 805                ret = -EFAULT;
 806                goto out;
 807        }
 808
 809        pr_info("IBS: LVT offset %d assigned\n", offset);
 810
 811        return 0;
 812out:
 813        preempt_disable();
 814        put_eilvt(offset);
 815        preempt_enable();
 816        return ret;
 817}
 818
 819static inline int get_ibs_lvt_offset(void)
 820{
 821        u64 val;
 822
 823        rdmsrl(MSR_AMD64_IBSCTL, val);
 824        if (!(val & IBSCTL_LVT_OFFSET_VALID))
 825                return -EINVAL;
 826
 827        return val & IBSCTL_LVT_OFFSET_MASK;
 828}
 829
 830static void setup_APIC_ibs(void *dummy)
 831{
 832        int offset;
 833
 834        offset = get_ibs_lvt_offset();
 835        if (offset < 0)
 836                goto failed;
 837
 838        if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
 839                return;
 840failed:
 841        pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
 842                smp_processor_id());
 843}
 844
 845static void clear_APIC_ibs(void *dummy)
 846{
 847        int offset;
 848
 849        offset = get_ibs_lvt_offset();
 850        if (offset >= 0)
 851                setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
 852}
 853
 854static int
 855perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
 856{
 857        switch (action & ~CPU_TASKS_FROZEN) {
 858        case CPU_STARTING:
 859                setup_APIC_ibs(NULL);
 860                break;
 861        case CPU_DYING:
 862                clear_APIC_ibs(NULL);
 863                break;
 864        default:
 865                break;
 866        }
 867
 868        return NOTIFY_OK;
 869}
 870
 871static __init int amd_ibs_init(void)
 872{
 873        u32 caps;
 874        int ret = -EINVAL;
 875
 876        caps = __get_ibs_caps();
 877        if (!caps)
 878                return -ENODEV; /* ibs not supported by the cpu */
 879
 880        /*
 881         * Force LVT offset assignment for family 10h: The offsets are
 882         * not assigned by the BIOS for this family, so the OS is
 883         * responsible for doing it. If the OS assignment fails, fall
 884         * back to BIOS settings and try to setup this.
 885         */
 886        if (boot_cpu_data.x86 == 0x10)
 887                force_ibs_eilvt_setup();
 888
 889        if (!ibs_eilvt_valid())
 890                goto out;
 891
 892        get_online_cpus();
 893        ibs_caps = caps;
 894        /* make ibs_caps visible to other cpus: */
 895        smp_mb();
 896        perf_cpu_notifier(perf_ibs_cpu_notifier);
 897        smp_call_function(setup_APIC_ibs, NULL, 1);
 898        put_online_cpus();
 899
 900        ret = perf_event_ibs_init();
 901out:
 902        if (ret)
 903                pr_err("Failed to setup IBS, %d\n", ret);
 904        return ret;
 905}
 906
 907/* Since we need the pci subsystem to init ibs we can't do this earlier: */
 908device_initcall(amd_ibs_init);
 909