linux/arch/x86/kernel/cpu/perf_event_intel_uncore.c
<<
>>
Prefs
   1#include "perf_event_intel_uncore.h"
   2
   3static struct intel_uncore_type *empty_uncore[] = { NULL, };
   4struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
   5struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
   6
   7static bool pcidrv_registered;
   8struct pci_driver *uncore_pci_driver;
   9/* pci bus to socket mapping */
  10int uncore_pcibus_to_physid[256] = { [0 ... 255] = -1, };
  11struct pci_dev *uncore_extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
  12
  13static DEFINE_RAW_SPINLOCK(uncore_box_lock);
  14/* mask of cpus that collect uncore events */
  15static cpumask_t uncore_cpu_mask;
  16
  17/* constraint for the fixed counter */
  18static struct event_constraint uncore_constraint_fixed =
  19        EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
  20struct event_constraint uncore_constraint_empty =
  21        EVENT_CONSTRAINT(0, 0, 0);
  22
  23ssize_t uncore_event_show(struct kobject *kobj,
  24                          struct kobj_attribute *attr, char *buf)
  25{
  26        struct uncore_event_desc *event =
  27                container_of(attr, struct uncore_event_desc, attr);
  28        return sprintf(buf, "%s", event->config);
  29}
  30
  31struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
  32{
  33        return container_of(event->pmu, struct intel_uncore_pmu, pmu);
  34}
  35
  36struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
  37{
  38        struct intel_uncore_box *box;
  39
  40        box = *per_cpu_ptr(pmu->box, cpu);
  41        if (box)
  42                return box;
  43
  44        raw_spin_lock(&uncore_box_lock);
  45        /* Recheck in lock to handle races. */
  46        if (*per_cpu_ptr(pmu->box, cpu))
  47                goto out;
  48        list_for_each_entry(box, &pmu->box_list, list) {
  49                if (box->phys_id == topology_physical_package_id(cpu)) {
  50                        atomic_inc(&box->refcnt);
  51                        *per_cpu_ptr(pmu->box, cpu) = box;
  52                        break;
  53                }
  54        }
  55out:
  56        raw_spin_unlock(&uncore_box_lock);
  57
  58        return *per_cpu_ptr(pmu->box, cpu);
  59}
  60
  61struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
  62{
  63        /*
  64         * perf core schedules event on the basis of cpu, uncore events are
  65         * collected by one of the cpus inside a physical package.
  66         */
  67        return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id());
  68}
  69
  70u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
  71{
  72        u64 count;
  73
  74        rdmsrl(event->hw.event_base, count);
  75
  76        return count;
  77}
  78
  79/*
  80 * generic get constraint function for shared match/mask registers.
  81 */
  82struct event_constraint *
  83uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
  84{
  85        struct intel_uncore_extra_reg *er;
  86        struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
  87        struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
  88        unsigned long flags;
  89        bool ok = false;
  90
  91        /*
  92         * reg->alloc can be set due to existing state, so for fake box we
  93         * need to ignore this, otherwise we might fail to allocate proper
  94         * fake state for this extra reg constraint.
  95         */
  96        if (reg1->idx == EXTRA_REG_NONE ||
  97            (!uncore_box_is_fake(box) && reg1->alloc))
  98                return NULL;
  99
 100        er = &box->shared_regs[reg1->idx];
 101        raw_spin_lock_irqsave(&er->lock, flags);
 102        if (!atomic_read(&er->ref) ||
 103            (er->config1 == reg1->config && er->config2 == reg2->config)) {
 104                atomic_inc(&er->ref);
 105                er->config1 = reg1->config;
 106                er->config2 = reg2->config;
 107                ok = true;
 108        }
 109        raw_spin_unlock_irqrestore(&er->lock, flags);
 110
 111        if (ok) {
 112                if (!uncore_box_is_fake(box))
 113                        reg1->alloc = 1;
 114                return NULL;
 115        }
 116
 117        return &uncore_constraint_empty;
 118}
 119
 120void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
 121{
 122        struct intel_uncore_extra_reg *er;
 123        struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
 124
 125        /*
 126         * Only put constraint if extra reg was actually allocated. Also
 127         * takes care of event which do not use an extra shared reg.
 128         *
 129         * Also, if this is a fake box we shouldn't touch any event state
 130         * (reg->alloc) and we don't care about leaving inconsistent box
 131         * state either since it will be thrown out.
 132         */
 133        if (uncore_box_is_fake(box) || !reg1->alloc)
 134                return;
 135
 136        er = &box->shared_regs[reg1->idx];
 137        atomic_dec(&er->ref);
 138        reg1->alloc = 0;
 139}
 140
 141u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
 142{
 143        struct intel_uncore_extra_reg *er;
 144        unsigned long flags;
 145        u64 config;
 146
 147        er = &box->shared_regs[idx];
 148
 149        raw_spin_lock_irqsave(&er->lock, flags);
 150        config = er->config;
 151        raw_spin_unlock_irqrestore(&er->lock, flags);
 152
 153        return config;
 154}
 155
 156static void uncore_assign_hw_event(struct intel_uncore_box *box, struct perf_event *event, int idx)
 157{
 158        struct hw_perf_event *hwc = &event->hw;
 159
 160        hwc->idx = idx;
 161        hwc->last_tag = ++box->tags[idx];
 162
 163        if (hwc->idx == UNCORE_PMC_IDX_FIXED) {
 164                hwc->event_base = uncore_fixed_ctr(box);
 165                hwc->config_base = uncore_fixed_ctl(box);
 166                return;
 167        }
 168
 169        hwc->config_base = uncore_event_ctl(box, hwc->idx);
 170        hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
 171}
 172
 173void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
 174{
 175        u64 prev_count, new_count, delta;
 176        int shift;
 177
 178        if (event->hw.idx >= UNCORE_PMC_IDX_FIXED)
 179                shift = 64 - uncore_fixed_ctr_bits(box);
 180        else
 181                shift = 64 - uncore_perf_ctr_bits(box);
 182
 183        /* the hrtimer might modify the previous event value */
 184again:
 185        prev_count = local64_read(&event->hw.prev_count);
 186        new_count = uncore_read_counter(box, event);
 187        if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
 188                goto again;
 189
 190        delta = (new_count << shift) - (prev_count << shift);
 191        delta >>= shift;
 192
 193        local64_add(delta, &event->count);
 194}
 195
 196/*
 197 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
 198 * for SandyBridge. So we use hrtimer to periodically poll the counter
 199 * to avoid overflow.
 200 */
 201static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
 202{
 203        struct intel_uncore_box *box;
 204        struct perf_event *event;
 205        unsigned long flags;
 206        int bit;
 207
 208        box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
 209        if (!box->n_active || box->cpu != smp_processor_id())
 210                return HRTIMER_NORESTART;
 211        /*
 212         * disable local interrupt to prevent uncore_pmu_event_start/stop
 213         * to interrupt the update process
 214         */
 215        local_irq_save(flags);
 216
 217        /*
 218         * handle boxes with an active event list as opposed to active
 219         * counters
 220         */
 221        list_for_each_entry(event, &box->active_list, active_entry) {
 222                uncore_perf_event_update(box, event);
 223        }
 224
 225        for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
 226                uncore_perf_event_update(box, box->events[bit]);
 227
 228        local_irq_restore(flags);
 229
 230        hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
 231        return HRTIMER_RESTART;
 232}
 233
 234void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
 235{
 236        hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
 237                      HRTIMER_MODE_REL_PINNED);
 238}
 239
 240void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
 241{
 242        hrtimer_cancel(&box->hrtimer);
 243}
 244
 245static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
 246{
 247        hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 248        box->hrtimer.function = uncore_pmu_hrtimer;
 249}
 250
 251static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
 252{
 253        struct intel_uncore_box *box;
 254        int i, size;
 255
 256        size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
 257
 258        box = kzalloc_node(size, GFP_KERNEL, node);
 259        if (!box)
 260                return NULL;
 261
 262        for (i = 0; i < type->num_shared_regs; i++)
 263                raw_spin_lock_init(&box->shared_regs[i].lock);
 264
 265        uncore_pmu_init_hrtimer(box);
 266        atomic_set(&box->refcnt, 1);
 267        box->cpu = -1;
 268        box->phys_id = -1;
 269
 270        /* set default hrtimer timeout */
 271        box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
 272
 273        INIT_LIST_HEAD(&box->active_list);
 274
 275        return box;
 276}
 277
 278/*
 279 * Using uncore_pmu_event_init pmu event_init callback
 280 * as a detection point for uncore events.
 281 */
 282static int uncore_pmu_event_init(struct perf_event *event);
 283
 284static bool is_uncore_event(struct perf_event *event)
 285{
 286        return event->pmu->event_init == uncore_pmu_event_init;
 287}
 288
 289static int
 290uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
 291{
 292        struct perf_event *event;
 293        int n, max_count;
 294
 295        max_count = box->pmu->type->num_counters;
 296        if (box->pmu->type->fixed_ctl)
 297                max_count++;
 298
 299        if (box->n_events >= max_count)
 300                return -EINVAL;
 301
 302        n = box->n_events;
 303
 304        if (is_uncore_event(leader)) {
 305                box->event_list[n] = leader;
 306                n++;
 307        }
 308
 309        if (!dogrp)
 310                return n;
 311
 312        list_for_each_entry(event, &leader->sibling_list, group_entry) {
 313                if (!is_uncore_event(event) ||
 314                    event->state <= PERF_EVENT_STATE_OFF)
 315                        continue;
 316
 317                if (n >= max_count)
 318                        return -EINVAL;
 319
 320                box->event_list[n] = event;
 321                n++;
 322        }
 323        return n;
 324}
 325
 326static struct event_constraint *
 327uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
 328{
 329        struct intel_uncore_type *type = box->pmu->type;
 330        struct event_constraint *c;
 331
 332        if (type->ops->get_constraint) {
 333                c = type->ops->get_constraint(box, event);
 334                if (c)
 335                        return c;
 336        }
 337
 338        if (event->attr.config == UNCORE_FIXED_EVENT)
 339                return &uncore_constraint_fixed;
 340
 341        if (type->constraints) {
 342                for_each_event_constraint(c, type->constraints) {
 343                        if ((event->hw.config & c->cmask) == c->code)
 344                                return c;
 345                }
 346        }
 347
 348        return &type->unconstrainted;
 349}
 350
 351static void uncore_put_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
 352{
 353        if (box->pmu->type->ops->put_constraint)
 354                box->pmu->type->ops->put_constraint(box, event);
 355}
 356
 357static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
 358{
 359        unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
 360        struct event_constraint *c;
 361        int i, wmin, wmax, ret = 0;
 362        struct hw_perf_event *hwc;
 363
 364        bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
 365
 366        for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 367                c = uncore_get_event_constraint(box, box->event_list[i]);
 368                box->event_constraint[i] = c;
 369                wmin = min(wmin, c->weight);
 370                wmax = max(wmax, c->weight);
 371        }
 372
 373        /* fastpath, try to reuse previous register */
 374        for (i = 0; i < n; i++) {
 375                hwc = &box->event_list[i]->hw;
 376                c = box->event_constraint[i];
 377
 378                /* never assigned */
 379                if (hwc->idx == -1)
 380                        break;
 381
 382                /* constraint still honored */
 383                if (!test_bit(hwc->idx, c->idxmsk))
 384                        break;
 385
 386                /* not already used */
 387                if (test_bit(hwc->idx, used_mask))
 388                        break;
 389
 390                __set_bit(hwc->idx, used_mask);
 391                if (assign)
 392                        assign[i] = hwc->idx;
 393        }
 394        /* slow path */
 395        if (i != n)
 396                ret = perf_assign_events(box->event_constraint, n,
 397                                         wmin, wmax, n, assign);
 398
 399        if (!assign || ret) {
 400                for (i = 0; i < n; i++)
 401                        uncore_put_event_constraint(box, box->event_list[i]);
 402        }
 403        return ret ? -EINVAL : 0;
 404}
 405
 406static void uncore_pmu_event_start(struct perf_event *event, int flags)
 407{
 408        struct intel_uncore_box *box = uncore_event_to_box(event);
 409        int idx = event->hw.idx;
 410
 411        if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
 412                return;
 413
 414        if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
 415                return;
 416
 417        event->hw.state = 0;
 418        box->events[idx] = event;
 419        box->n_active++;
 420        __set_bit(idx, box->active_mask);
 421
 422        local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
 423        uncore_enable_event(box, event);
 424
 425        if (box->n_active == 1) {
 426                uncore_enable_box(box);
 427                uncore_pmu_start_hrtimer(box);
 428        }
 429}
 430
 431static void uncore_pmu_event_stop(struct perf_event *event, int flags)
 432{
 433        struct intel_uncore_box *box = uncore_event_to_box(event);
 434        struct hw_perf_event *hwc = &event->hw;
 435
 436        if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
 437                uncore_disable_event(box, event);
 438                box->n_active--;
 439                box->events[hwc->idx] = NULL;
 440                WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 441                hwc->state |= PERF_HES_STOPPED;
 442
 443                if (box->n_active == 0) {
 444                        uncore_disable_box(box);
 445                        uncore_pmu_cancel_hrtimer(box);
 446                }
 447        }
 448
 449        if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 450                /*
 451                 * Drain the remaining delta count out of a event
 452                 * that we are disabling:
 453                 */
 454                uncore_perf_event_update(box, event);
 455                hwc->state |= PERF_HES_UPTODATE;
 456        }
 457}
 458
 459static int uncore_pmu_event_add(struct perf_event *event, int flags)
 460{
 461        struct intel_uncore_box *box = uncore_event_to_box(event);
 462        struct hw_perf_event *hwc = &event->hw;
 463        int assign[UNCORE_PMC_IDX_MAX];
 464        int i, n, ret;
 465
 466        if (!box)
 467                return -ENODEV;
 468
 469        ret = n = uncore_collect_events(box, event, false);
 470        if (ret < 0)
 471                return ret;
 472
 473        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 474        if (!(flags & PERF_EF_START))
 475                hwc->state |= PERF_HES_ARCH;
 476
 477        ret = uncore_assign_events(box, assign, n);
 478        if (ret)
 479                return ret;
 480
 481        /* save events moving to new counters */
 482        for (i = 0; i < box->n_events; i++) {
 483                event = box->event_list[i];
 484                hwc = &event->hw;
 485
 486                if (hwc->idx == assign[i] &&
 487                        hwc->last_tag == box->tags[assign[i]])
 488                        continue;
 489                /*
 490                 * Ensure we don't accidentally enable a stopped
 491                 * counter simply because we rescheduled.
 492                 */
 493                if (hwc->state & PERF_HES_STOPPED)
 494                        hwc->state |= PERF_HES_ARCH;
 495
 496                uncore_pmu_event_stop(event, PERF_EF_UPDATE);
 497        }
 498
 499        /* reprogram moved events into new counters */
 500        for (i = 0; i < n; i++) {
 501                event = box->event_list[i];
 502                hwc = &event->hw;
 503
 504                if (hwc->idx != assign[i] ||
 505                        hwc->last_tag != box->tags[assign[i]])
 506                        uncore_assign_hw_event(box, event, assign[i]);
 507                else if (i < box->n_events)
 508                        continue;
 509
 510                if (hwc->state & PERF_HES_ARCH)
 511                        continue;
 512
 513                uncore_pmu_event_start(event, 0);
 514        }
 515        box->n_events = n;
 516
 517        return 0;
 518}
 519
 520static void uncore_pmu_event_del(struct perf_event *event, int flags)
 521{
 522        struct intel_uncore_box *box = uncore_event_to_box(event);
 523        int i;
 524
 525        uncore_pmu_event_stop(event, PERF_EF_UPDATE);
 526
 527        for (i = 0; i < box->n_events; i++) {
 528                if (event == box->event_list[i]) {
 529                        uncore_put_event_constraint(box, event);
 530
 531                        while (++i < box->n_events)
 532                                box->event_list[i - 1] = box->event_list[i];
 533
 534                        --box->n_events;
 535                        break;
 536                }
 537        }
 538
 539        event->hw.idx = -1;
 540        event->hw.last_tag = ~0ULL;
 541}
 542
 543void uncore_pmu_event_read(struct perf_event *event)
 544{
 545        struct intel_uncore_box *box = uncore_event_to_box(event);
 546        uncore_perf_event_update(box, event);
 547}
 548
 549/*
 550 * validation ensures the group can be loaded onto the
 551 * PMU if it was the only group available.
 552 */
 553static int uncore_validate_group(struct intel_uncore_pmu *pmu,
 554                                struct perf_event *event)
 555{
 556        struct perf_event *leader = event->group_leader;
 557        struct intel_uncore_box *fake_box;
 558        int ret = -EINVAL, n;
 559
 560        fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
 561        if (!fake_box)
 562                return -ENOMEM;
 563
 564        fake_box->pmu = pmu;
 565        /*
 566         * the event is not yet connected with its
 567         * siblings therefore we must first collect
 568         * existing siblings, then add the new event
 569         * before we can simulate the scheduling
 570         */
 571        n = uncore_collect_events(fake_box, leader, true);
 572        if (n < 0)
 573                goto out;
 574
 575        fake_box->n_events = n;
 576        n = uncore_collect_events(fake_box, event, false);
 577        if (n < 0)
 578                goto out;
 579
 580        fake_box->n_events = n;
 581
 582        ret = uncore_assign_events(fake_box, NULL, n);
 583out:
 584        kfree(fake_box);
 585        return ret;
 586}
 587
 588static int uncore_pmu_event_init(struct perf_event *event)
 589{
 590        struct intel_uncore_pmu *pmu;
 591        struct intel_uncore_box *box;
 592        struct hw_perf_event *hwc = &event->hw;
 593        int ret;
 594
 595        if (event->attr.type != event->pmu->type)
 596                return -ENOENT;
 597
 598        pmu = uncore_event_to_pmu(event);
 599        /* no device found for this pmu */
 600        if (pmu->func_id < 0)
 601                return -ENOENT;
 602
 603        /*
 604         * Uncore PMU does measure at all privilege level all the time.
 605         * So it doesn't make sense to specify any exclude bits.
 606         */
 607        if (event->attr.exclude_user || event->attr.exclude_kernel ||
 608                        event->attr.exclude_hv || event->attr.exclude_idle)
 609                return -EINVAL;
 610
 611        /* Sampling not supported yet */
 612        if (hwc->sample_period)
 613                return -EINVAL;
 614
 615        /*
 616         * Place all uncore events for a particular physical package
 617         * onto a single cpu
 618         */
 619        if (event->cpu < 0)
 620                return -EINVAL;
 621        box = uncore_pmu_to_box(pmu, event->cpu);
 622        if (!box || box->cpu < 0)
 623                return -EINVAL;
 624        event->cpu = box->cpu;
 625
 626        event->hw.idx = -1;
 627        event->hw.last_tag = ~0ULL;
 628        event->hw.extra_reg.idx = EXTRA_REG_NONE;
 629        event->hw.branch_reg.idx = EXTRA_REG_NONE;
 630
 631        if (event->attr.config == UNCORE_FIXED_EVENT) {
 632                /* no fixed counter */
 633                if (!pmu->type->fixed_ctl)
 634                        return -EINVAL;
 635                /*
 636                 * if there is only one fixed counter, only the first pmu
 637                 * can access the fixed counter
 638                 */
 639                if (pmu->type->single_fixed && pmu->pmu_idx > 0)
 640                        return -EINVAL;
 641
 642                /* fixed counters have event field hardcoded to zero */
 643                hwc->config = 0ULL;
 644        } else {
 645                hwc->config = event->attr.config & pmu->type->event_mask;
 646                if (pmu->type->ops->hw_config) {
 647                        ret = pmu->type->ops->hw_config(box, event);
 648                        if (ret)
 649                                return ret;
 650                }
 651        }
 652
 653        if (event->group_leader != event)
 654                ret = uncore_validate_group(pmu, event);
 655        else
 656                ret = 0;
 657
 658        return ret;
 659}
 660
 661static ssize_t uncore_get_attr_cpumask(struct device *dev,
 662                                struct device_attribute *attr, char *buf)
 663{
 664        return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
 665}
 666
 667static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
 668
 669static struct attribute *uncore_pmu_attrs[] = {
 670        &dev_attr_cpumask.attr,
 671        NULL,
 672};
 673
 674static struct attribute_group uncore_pmu_attr_group = {
 675        .attrs = uncore_pmu_attrs,
 676};
 677
 678static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
 679{
 680        int ret;
 681
 682        if (!pmu->type->pmu) {
 683                pmu->pmu = (struct pmu) {
 684                        .attr_groups    = pmu->type->attr_groups,
 685                        .task_ctx_nr    = perf_invalid_context,
 686                        .event_init     = uncore_pmu_event_init,
 687                        .add            = uncore_pmu_event_add,
 688                        .del            = uncore_pmu_event_del,
 689                        .start          = uncore_pmu_event_start,
 690                        .stop           = uncore_pmu_event_stop,
 691                        .read           = uncore_pmu_event_read,
 692                };
 693        } else {
 694                pmu->pmu = *pmu->type->pmu;
 695                pmu->pmu.attr_groups = pmu->type->attr_groups;
 696        }
 697
 698        if (pmu->type->num_boxes == 1) {
 699                if (strlen(pmu->type->name) > 0)
 700                        sprintf(pmu->name, "uncore_%s", pmu->type->name);
 701                else
 702                        sprintf(pmu->name, "uncore");
 703        } else {
 704                sprintf(pmu->name, "uncore_%s_%d", pmu->type->name,
 705                        pmu->pmu_idx);
 706        }
 707
 708        ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
 709        return ret;
 710}
 711
 712static void __init uncore_type_exit(struct intel_uncore_type *type)
 713{
 714        int i;
 715
 716        for (i = 0; i < type->num_boxes; i++)
 717                free_percpu(type->pmus[i].box);
 718        kfree(type->pmus);
 719        type->pmus = NULL;
 720        kfree(type->events_group);
 721        type->events_group = NULL;
 722}
 723
 724static void __init uncore_types_exit(struct intel_uncore_type **types)
 725{
 726        int i;
 727        for (i = 0; types[i]; i++)
 728                uncore_type_exit(types[i]);
 729}
 730
 731static int __init uncore_type_init(struct intel_uncore_type *type)
 732{
 733        struct intel_uncore_pmu *pmus;
 734        struct attribute_group *attr_group;
 735        struct attribute **attrs;
 736        int i, j;
 737
 738        pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
 739        if (!pmus)
 740                return -ENOMEM;
 741
 742        type->pmus = pmus;
 743
 744        type->unconstrainted = (struct event_constraint)
 745                __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
 746                                0, type->num_counters, 0, 0);
 747
 748        for (i = 0; i < type->num_boxes; i++) {
 749                pmus[i].func_id = -1;
 750                pmus[i].pmu_idx = i;
 751                pmus[i].type = type;
 752                INIT_LIST_HEAD(&pmus[i].box_list);
 753                pmus[i].box = alloc_percpu(struct intel_uncore_box *);
 754                if (!pmus[i].box)
 755                        goto fail;
 756        }
 757
 758        if (type->event_descs) {
 759                i = 0;
 760                while (type->event_descs[i].attr.attr.name)
 761                        i++;
 762
 763                attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
 764                                        sizeof(*attr_group), GFP_KERNEL);
 765                if (!attr_group)
 766                        goto fail;
 767
 768                attrs = (struct attribute **)(attr_group + 1);
 769                attr_group->name = "events";
 770                attr_group->attrs = attrs;
 771
 772                for (j = 0; j < i; j++)
 773                        attrs[j] = &type->event_descs[j].attr.attr;
 774
 775                type->events_group = attr_group;
 776        }
 777
 778        type->pmu_group = &uncore_pmu_attr_group;
 779        return 0;
 780fail:
 781        uncore_type_exit(type);
 782        return -ENOMEM;
 783}
 784
 785static int __init uncore_types_init(struct intel_uncore_type **types)
 786{
 787        int i, ret;
 788
 789        for (i = 0; types[i]; i++) {
 790                ret = uncore_type_init(types[i]);
 791                if (ret)
 792                        goto fail;
 793        }
 794        return 0;
 795fail:
 796        while (--i >= 0)
 797                uncore_type_exit(types[i]);
 798        return ret;
 799}
 800
 801/*
 802 * add a pci uncore device
 803 */
 804static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 805{
 806        struct intel_uncore_pmu *pmu;
 807        struct intel_uncore_box *box;
 808        struct intel_uncore_type *type;
 809        int phys_id;
 810        bool first_box = false;
 811
 812        phys_id = uncore_pcibus_to_physid[pdev->bus->number];
 813        if (phys_id < 0)
 814                return -ENODEV;
 815
 816        if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
 817                int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
 818                uncore_extra_pci_dev[phys_id][idx] = pdev;
 819                pci_set_drvdata(pdev, NULL);
 820                return 0;
 821        }
 822
 823        type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
 824        box = uncore_alloc_box(type, NUMA_NO_NODE);
 825        if (!box)
 826                return -ENOMEM;
 827
 828        /*
 829         * for performance monitoring unit with multiple boxes,
 830         * each box has a different function id.
 831         */
 832        pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
 833        if (pmu->func_id < 0)
 834                pmu->func_id = pdev->devfn;
 835        else
 836                WARN_ON_ONCE(pmu->func_id != pdev->devfn);
 837
 838        box->phys_id = phys_id;
 839        box->pci_dev = pdev;
 840        box->pmu = pmu;
 841        uncore_box_init(box);
 842        pci_set_drvdata(pdev, box);
 843
 844        raw_spin_lock(&uncore_box_lock);
 845        if (list_empty(&pmu->box_list))
 846                first_box = true;
 847        list_add_tail(&box->list, &pmu->box_list);
 848        raw_spin_unlock(&uncore_box_lock);
 849
 850        if (first_box)
 851                uncore_pmu_register(pmu);
 852        return 0;
 853}
 854
 855static void uncore_pci_remove(struct pci_dev *pdev)
 856{
 857        struct intel_uncore_box *box = pci_get_drvdata(pdev);
 858        struct intel_uncore_pmu *pmu;
 859        int i, cpu, phys_id = uncore_pcibus_to_physid[pdev->bus->number];
 860        bool last_box = false;
 861
 862        box = pci_get_drvdata(pdev);
 863        if (!box) {
 864                for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
 865                        if (uncore_extra_pci_dev[phys_id][i] == pdev) {
 866                                uncore_extra_pci_dev[phys_id][i] = NULL;
 867                                break;
 868                        }
 869                }
 870                WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
 871                return;
 872        }
 873
 874        pmu = box->pmu;
 875        if (WARN_ON_ONCE(phys_id != box->phys_id))
 876                return;
 877
 878        pci_set_drvdata(pdev, NULL);
 879
 880        raw_spin_lock(&uncore_box_lock);
 881        list_del(&box->list);
 882        if (list_empty(&pmu->box_list))
 883                last_box = true;
 884        raw_spin_unlock(&uncore_box_lock);
 885
 886        for_each_possible_cpu(cpu) {
 887                if (*per_cpu_ptr(pmu->box, cpu) == box) {
 888                        *per_cpu_ptr(pmu->box, cpu) = NULL;
 889                        atomic_dec(&box->refcnt);
 890                }
 891        }
 892
 893        WARN_ON_ONCE(atomic_read(&box->refcnt) != 1);
 894        kfree(box);
 895
 896        if (last_box)
 897                perf_pmu_unregister(&pmu->pmu);
 898}
 899
 900static int __init uncore_pci_init(void)
 901{
 902        int ret;
 903
 904        switch (boot_cpu_data.x86_model) {
 905        case 45: /* Sandy Bridge-EP */
 906                ret = snbep_uncore_pci_init();
 907                break;
 908        case 62: /* Ivy Bridge-EP */
 909                ret = ivbep_uncore_pci_init();
 910                break;
 911        case 63: /* Haswell-EP */
 912                ret = hswep_uncore_pci_init();
 913                break;
 914        case 42: /* Sandy Bridge */
 915                ret = snb_uncore_pci_init();
 916                break;
 917        case 58: /* Ivy Bridge */
 918                ret = ivb_uncore_pci_init();
 919                break;
 920        case 60: /* Haswell */
 921        case 69: /* Haswell Celeron */
 922                ret = hsw_uncore_pci_init();
 923                break;
 924        case 61: /* Broadwell */
 925                ret = bdw_uncore_pci_init();
 926                break;
 927        default:
 928                return 0;
 929        }
 930
 931        if (ret)
 932                return ret;
 933
 934        ret = uncore_types_init(uncore_pci_uncores);
 935        if (ret)
 936                return ret;
 937
 938        uncore_pci_driver->probe = uncore_pci_probe;
 939        uncore_pci_driver->remove = uncore_pci_remove;
 940
 941        ret = pci_register_driver(uncore_pci_driver);
 942        if (ret == 0)
 943                pcidrv_registered = true;
 944        else
 945                uncore_types_exit(uncore_pci_uncores);
 946
 947        return ret;
 948}
 949
 950static void __init uncore_pci_exit(void)
 951{
 952        if (pcidrv_registered) {
 953                pcidrv_registered = false;
 954                pci_unregister_driver(uncore_pci_driver);
 955                uncore_types_exit(uncore_pci_uncores);
 956        }
 957}
 958
 959/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
 960static LIST_HEAD(boxes_to_free);
 961
 962static void uncore_kfree_boxes(void)
 963{
 964        struct intel_uncore_box *box;
 965
 966        while (!list_empty(&boxes_to_free)) {
 967                box = list_entry(boxes_to_free.next,
 968                                 struct intel_uncore_box, list);
 969                list_del(&box->list);
 970                kfree(box);
 971        }
 972}
 973
 974static void uncore_cpu_dying(int cpu)
 975{
 976        struct intel_uncore_type *type;
 977        struct intel_uncore_pmu *pmu;
 978        struct intel_uncore_box *box;
 979        int i, j;
 980
 981        for (i = 0; uncore_msr_uncores[i]; i++) {
 982                type = uncore_msr_uncores[i];
 983                for (j = 0; j < type->num_boxes; j++) {
 984                        pmu = &type->pmus[j];
 985                        box = *per_cpu_ptr(pmu->box, cpu);
 986                        *per_cpu_ptr(pmu->box, cpu) = NULL;
 987                        if (box && atomic_dec_and_test(&box->refcnt))
 988                                list_add(&box->list, &boxes_to_free);
 989                }
 990        }
 991}
 992
 993static int uncore_cpu_starting(int cpu)
 994{
 995        struct intel_uncore_type *type;
 996        struct intel_uncore_pmu *pmu;
 997        struct intel_uncore_box *box, *exist;
 998        int i, j, k, phys_id;
 999
1000        phys_id = topology_physical_package_id(cpu);
1001
1002        for (i = 0; uncore_msr_uncores[i]; i++) {
1003                type = uncore_msr_uncores[i];
1004                for (j = 0; j < type->num_boxes; j++) {
1005                        pmu = &type->pmus[j];
1006                        box = *per_cpu_ptr(pmu->box, cpu);
1007                        /* called by uncore_cpu_init? */
1008                        if (box && box->phys_id >= 0) {
1009                                uncore_box_init(box);
1010                                continue;
1011                        }
1012
1013                        for_each_online_cpu(k) {
1014                                exist = *per_cpu_ptr(pmu->box, k);
1015                                if (exist && exist->phys_id == phys_id) {
1016                                        atomic_inc(&exist->refcnt);
1017                                        *per_cpu_ptr(pmu->box, cpu) = exist;
1018                                        if (box) {
1019                                                list_add(&box->list,
1020                                                         &boxes_to_free);
1021                                                box = NULL;
1022                                        }
1023                                        break;
1024                                }
1025                        }
1026
1027                        if (box) {
1028                                box->phys_id = phys_id;
1029                                uncore_box_init(box);
1030                        }
1031                }
1032        }
1033        return 0;
1034}
1035
1036static int uncore_cpu_prepare(int cpu, int phys_id)
1037{
1038        struct intel_uncore_type *type;
1039        struct intel_uncore_pmu *pmu;
1040        struct intel_uncore_box *box;
1041        int i, j;
1042
1043        for (i = 0; uncore_msr_uncores[i]; i++) {
1044                type = uncore_msr_uncores[i];
1045                for (j = 0; j < type->num_boxes; j++) {
1046                        pmu = &type->pmus[j];
1047                        if (pmu->func_id < 0)
1048                                pmu->func_id = j;
1049
1050                        box = uncore_alloc_box(type, cpu_to_node(cpu));
1051                        if (!box)
1052                                return -ENOMEM;
1053
1054                        box->pmu = pmu;
1055                        box->phys_id = phys_id;
1056                        *per_cpu_ptr(pmu->box, cpu) = box;
1057                }
1058        }
1059        return 0;
1060}
1061
1062static void
1063uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
1064{
1065        struct intel_uncore_type *type;
1066        struct intel_uncore_pmu *pmu;
1067        struct intel_uncore_box *box;
1068        int i, j;
1069
1070        for (i = 0; uncores[i]; i++) {
1071                type = uncores[i];
1072                for (j = 0; j < type->num_boxes; j++) {
1073                        pmu = &type->pmus[j];
1074                        if (old_cpu < 0)
1075                                box = uncore_pmu_to_box(pmu, new_cpu);
1076                        else
1077                                box = uncore_pmu_to_box(pmu, old_cpu);
1078                        if (!box)
1079                                continue;
1080
1081                        if (old_cpu < 0) {
1082                                WARN_ON_ONCE(box->cpu != -1);
1083                                box->cpu = new_cpu;
1084                                continue;
1085                        }
1086
1087                        WARN_ON_ONCE(box->cpu != old_cpu);
1088                        if (new_cpu >= 0) {
1089                                uncore_pmu_cancel_hrtimer(box);
1090                                perf_pmu_migrate_context(&pmu->pmu,
1091                                                old_cpu, new_cpu);
1092                                box->cpu = new_cpu;
1093                        } else {
1094                                box->cpu = -1;
1095                        }
1096                }
1097        }
1098}
1099
1100static void uncore_event_exit_cpu(int cpu)
1101{
1102        int i, phys_id, target;
1103
1104        /* if exiting cpu is used for collecting uncore events */
1105        if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1106                return;
1107
1108        /* find a new cpu to collect uncore events */
1109        phys_id = topology_physical_package_id(cpu);
1110        target = -1;
1111        for_each_online_cpu(i) {
1112                if (i == cpu)
1113                        continue;
1114                if (phys_id == topology_physical_package_id(i)) {
1115                        target = i;
1116                        break;
1117                }
1118        }
1119
1120        /* migrate uncore events to the new cpu */
1121        if (target >= 0)
1122                cpumask_set_cpu(target, &uncore_cpu_mask);
1123
1124        uncore_change_context(uncore_msr_uncores, cpu, target);
1125        uncore_change_context(uncore_pci_uncores, cpu, target);
1126}
1127
1128static void uncore_event_init_cpu(int cpu)
1129{
1130        int i, phys_id;
1131
1132        phys_id = topology_physical_package_id(cpu);
1133        for_each_cpu(i, &uncore_cpu_mask) {
1134                if (phys_id == topology_physical_package_id(i))
1135                        return;
1136        }
1137
1138        cpumask_set_cpu(cpu, &uncore_cpu_mask);
1139
1140        uncore_change_context(uncore_msr_uncores, -1, cpu);
1141        uncore_change_context(uncore_pci_uncores, -1, cpu);
1142}
1143
1144static int uncore_cpu_notifier(struct notifier_block *self,
1145                               unsigned long action, void *hcpu)
1146{
1147        unsigned int cpu = (long)hcpu;
1148
1149        /* allocate/free data structure for uncore box */
1150        switch (action & ~CPU_TASKS_FROZEN) {
1151        case CPU_UP_PREPARE:
1152                uncore_cpu_prepare(cpu, -1);
1153                break;
1154        case CPU_STARTING:
1155                uncore_cpu_starting(cpu);
1156                break;
1157        case CPU_UP_CANCELED:
1158        case CPU_DYING:
1159                uncore_cpu_dying(cpu);
1160                break;
1161        case CPU_ONLINE:
1162        case CPU_DEAD:
1163                uncore_kfree_boxes();
1164                break;
1165        default:
1166                break;
1167        }
1168
1169        /* select the cpu that collects uncore events */
1170        switch (action & ~CPU_TASKS_FROZEN) {
1171        case CPU_DOWN_FAILED:
1172        case CPU_STARTING:
1173                uncore_event_init_cpu(cpu);
1174                break;
1175        case CPU_DOWN_PREPARE:
1176                uncore_event_exit_cpu(cpu);
1177                break;
1178        default:
1179                break;
1180        }
1181
1182        return NOTIFY_OK;
1183}
1184
1185static struct notifier_block uncore_cpu_nb = {
1186        .notifier_call  = uncore_cpu_notifier,
1187        /*
1188         * to migrate uncore events, our notifier should be executed
1189         * before perf core's notifier.
1190         */
1191        .priority       = CPU_PRI_PERF + 1,
1192};
1193
1194static void __init uncore_cpu_setup(void *dummy)
1195{
1196        uncore_cpu_starting(smp_processor_id());
1197}
1198
1199static int __init uncore_cpu_init(void)
1200{
1201        int ret;
1202
1203        switch (boot_cpu_data.x86_model) {
1204        case 26: /* Nehalem */
1205        case 30:
1206        case 37: /* Westmere */
1207        case 44:
1208                nhm_uncore_cpu_init();
1209                break;
1210        case 42: /* Sandy Bridge */
1211        case 58: /* Ivy Bridge */
1212                snb_uncore_cpu_init();
1213                break;
1214        case 45: /* Sandy Bridge-EP */
1215                snbep_uncore_cpu_init();
1216                break;
1217        case 46: /* Nehalem-EX */
1218        case 47: /* Westmere-EX aka. Xeon E7 */
1219                nhmex_uncore_cpu_init();
1220                break;
1221        case 62: /* Ivy Bridge-EP */
1222                ivbep_uncore_cpu_init();
1223                break;
1224        case 63: /* Haswell-EP */
1225                hswep_uncore_cpu_init();
1226                break;
1227        default:
1228                return 0;
1229        }
1230
1231        ret = uncore_types_init(uncore_msr_uncores);
1232        if (ret)
1233                return ret;
1234
1235        return 0;
1236}
1237
1238static int __init uncore_pmus_register(void)
1239{
1240        struct intel_uncore_pmu *pmu;
1241        struct intel_uncore_type *type;
1242        int i, j;
1243
1244        for (i = 0; uncore_msr_uncores[i]; i++) {
1245                type = uncore_msr_uncores[i];
1246                for (j = 0; j < type->num_boxes; j++) {
1247                        pmu = &type->pmus[j];
1248                        uncore_pmu_register(pmu);
1249                }
1250        }
1251
1252        return 0;
1253}
1254
1255static void __init uncore_cpumask_init(void)
1256{
1257        int cpu;
1258
1259        /*
1260         * ony invoke once from msr or pci init code
1261         */
1262        if (!cpumask_empty(&uncore_cpu_mask))
1263                return;
1264
1265        cpu_notifier_register_begin();
1266
1267        for_each_online_cpu(cpu) {
1268                int i, phys_id = topology_physical_package_id(cpu);
1269
1270                for_each_cpu(i, &uncore_cpu_mask) {
1271                        if (phys_id == topology_physical_package_id(i)) {
1272                                phys_id = -1;
1273                                break;
1274                        }
1275                }
1276                if (phys_id < 0)
1277                        continue;
1278
1279                uncore_cpu_prepare(cpu, phys_id);
1280                uncore_event_init_cpu(cpu);
1281        }
1282        on_each_cpu(uncore_cpu_setup, NULL, 1);
1283
1284        __register_cpu_notifier(&uncore_cpu_nb);
1285
1286        cpu_notifier_register_done();
1287}
1288
1289
1290static int __init intel_uncore_init(void)
1291{
1292        int ret;
1293
1294        if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
1295                return -ENODEV;
1296
1297        if (cpu_has_hypervisor)
1298                return -ENODEV;
1299
1300        ret = uncore_pci_init();
1301        if (ret)
1302                goto fail;
1303        ret = uncore_cpu_init();
1304        if (ret) {
1305                uncore_pci_exit();
1306                goto fail;
1307        }
1308        uncore_cpumask_init();
1309
1310        uncore_pmus_register();
1311        return 0;
1312fail:
1313        return ret;
1314}
1315device_initcall(intel_uncore_init);
1316