linux/arch/x86/events/intel/uncore.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2#include <linux/module.h>
   3
   4#include <asm/cpu_device_id.h>
   5#include <asm/intel-family.h>
   6#include "uncore.h"
   7#include "uncore_discovery.h"
   8
   9static bool uncore_no_discover;
  10module_param(uncore_no_discover, bool, 0);
  11MODULE_PARM_DESC(uncore_no_discover, "Don't enable the Intel uncore PerfMon discovery mechanism "
  12                                     "(default: enable the discovery mechanism).");
  13struct intel_uncore_type *empty_uncore[] = { NULL, };
  14struct intel_uncore_type **uncore_msr_uncores = empty_uncore;
  15struct intel_uncore_type **uncore_pci_uncores = empty_uncore;
  16struct intel_uncore_type **uncore_mmio_uncores = empty_uncore;
  17
  18static bool pcidrv_registered;
  19struct pci_driver *uncore_pci_driver;
  20/* The PCI driver for the device which the uncore doesn't own. */
  21struct pci_driver *uncore_pci_sub_driver;
  22/* pci bus to socket mapping */
  23DEFINE_RAW_SPINLOCK(pci2phy_map_lock);
  24struct list_head pci2phy_map_head = LIST_HEAD_INIT(pci2phy_map_head);
  25struct pci_extra_dev *uncore_extra_pci_dev;
  26int __uncore_max_dies;
  27
  28/* mask of cpus that collect uncore events */
  29static cpumask_t uncore_cpu_mask;
  30
  31/* constraint for the fixed counter */
  32static struct event_constraint uncore_constraint_fixed =
  33        EVENT_CONSTRAINT(~0ULL, 1 << UNCORE_PMC_IDX_FIXED, ~0ULL);
  34struct event_constraint uncore_constraint_empty =
  35        EVENT_CONSTRAINT(0, 0, 0);
  36
  37MODULE_LICENSE("GPL");
  38
  39int uncore_pcibus_to_dieid(struct pci_bus *bus)
  40{
  41        struct pci2phy_map *map;
  42        int die_id = -1;
  43
  44        raw_spin_lock(&pci2phy_map_lock);
  45        list_for_each_entry(map, &pci2phy_map_head, list) {
  46                if (map->segment == pci_domain_nr(bus)) {
  47                        die_id = map->pbus_to_dieid[bus->number];
  48                        break;
  49                }
  50        }
  51        raw_spin_unlock(&pci2phy_map_lock);
  52
  53        return die_id;
  54}
  55
  56int uncore_die_to_segment(int die)
  57{
  58        struct pci_bus *bus = NULL;
  59
  60        /* Find first pci bus which attributes to specified die. */
  61        while ((bus = pci_find_next_bus(bus)) &&
  62               (die != uncore_pcibus_to_dieid(bus)))
  63                ;
  64
  65        return bus ? pci_domain_nr(bus) : -EINVAL;
  66}
  67
  68static void uncore_free_pcibus_map(void)
  69{
  70        struct pci2phy_map *map, *tmp;
  71
  72        list_for_each_entry_safe(map, tmp, &pci2phy_map_head, list) {
  73                list_del(&map->list);
  74                kfree(map);
  75        }
  76}
  77
  78struct pci2phy_map *__find_pci2phy_map(int segment)
  79{
  80        struct pci2phy_map *map, *alloc = NULL;
  81        int i;
  82
  83        lockdep_assert_held(&pci2phy_map_lock);
  84
  85lookup:
  86        list_for_each_entry(map, &pci2phy_map_head, list) {
  87                if (map->segment == segment)
  88                        goto end;
  89        }
  90
  91        if (!alloc) {
  92                raw_spin_unlock(&pci2phy_map_lock);
  93                alloc = kmalloc(sizeof(struct pci2phy_map), GFP_KERNEL);
  94                raw_spin_lock(&pci2phy_map_lock);
  95
  96                if (!alloc)
  97                        return NULL;
  98
  99                goto lookup;
 100        }
 101
 102        map = alloc;
 103        alloc = NULL;
 104        map->segment = segment;
 105        for (i = 0; i < 256; i++)
 106                map->pbus_to_dieid[i] = -1;
 107        list_add_tail(&map->list, &pci2phy_map_head);
 108
 109end:
 110        kfree(alloc);
 111        return map;
 112}
 113
 114ssize_t uncore_event_show(struct device *dev,
 115                          struct device_attribute *attr, char *buf)
 116{
 117        struct uncore_event_desc *event =
 118                container_of(attr, struct uncore_event_desc, attr);
 119        return sprintf(buf, "%s", event->config);
 120}
 121
 122struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
 123{
 124        unsigned int dieid = topology_logical_die_id(cpu);
 125
 126        /*
 127         * The unsigned check also catches the '-1' return value for non
 128         * existent mappings in the topology map.
 129         */
 130        return dieid < uncore_max_dies() ? pmu->boxes[dieid] : NULL;
 131}
 132
 133u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
 134{
 135        u64 count;
 136
 137        rdmsrl(event->hw.event_base, count);
 138
 139        return count;
 140}
 141
 142void uncore_mmio_exit_box(struct intel_uncore_box *box)
 143{
 144        if (box->io_addr)
 145                iounmap(box->io_addr);
 146}
 147
 148u64 uncore_mmio_read_counter(struct intel_uncore_box *box,
 149                             struct perf_event *event)
 150{
 151        if (!box->io_addr)
 152                return 0;
 153
 154        if (!uncore_mmio_is_valid_offset(box, event->hw.event_base))
 155                return 0;
 156
 157        return readq(box->io_addr + event->hw.event_base);
 158}
 159
 160/*
 161 * generic get constraint function for shared match/mask registers.
 162 */
 163struct event_constraint *
 164uncore_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
 165{
 166        struct intel_uncore_extra_reg *er;
 167        struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
 168        struct hw_perf_event_extra *reg2 = &event->hw.branch_reg;
 169        unsigned long flags;
 170        bool ok = false;
 171
 172        /*
 173         * reg->alloc can be set due to existing state, so for fake box we
 174         * need to ignore this, otherwise we might fail to allocate proper
 175         * fake state for this extra reg constraint.
 176         */
 177        if (reg1->idx == EXTRA_REG_NONE ||
 178            (!uncore_box_is_fake(box) && reg1->alloc))
 179                return NULL;
 180
 181        er = &box->shared_regs[reg1->idx];
 182        raw_spin_lock_irqsave(&er->lock, flags);
 183        if (!atomic_read(&er->ref) ||
 184            (er->config1 == reg1->config && er->config2 == reg2->config)) {
 185                atomic_inc(&er->ref);
 186                er->config1 = reg1->config;
 187                er->config2 = reg2->config;
 188                ok = true;
 189        }
 190        raw_spin_unlock_irqrestore(&er->lock, flags);
 191
 192        if (ok) {
 193                if (!uncore_box_is_fake(box))
 194                        reg1->alloc = 1;
 195                return NULL;
 196        }
 197
 198        return &uncore_constraint_empty;
 199}
 200
 201void uncore_put_constraint(struct intel_uncore_box *box, struct perf_event *event)
 202{
 203        struct intel_uncore_extra_reg *er;
 204        struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
 205
 206        /*
 207         * Only put constraint if extra reg was actually allocated. Also
 208         * takes care of event which do not use an extra shared reg.
 209         *
 210         * Also, if this is a fake box we shouldn't touch any event state
 211         * (reg->alloc) and we don't care about leaving inconsistent box
 212         * state either since it will be thrown out.
 213         */
 214        if (uncore_box_is_fake(box) || !reg1->alloc)
 215                return;
 216
 217        er = &box->shared_regs[reg1->idx];
 218        atomic_dec(&er->ref);
 219        reg1->alloc = 0;
 220}
 221
 222u64 uncore_shared_reg_config(struct intel_uncore_box *box, int idx)
 223{
 224        struct intel_uncore_extra_reg *er;
 225        unsigned long flags;
 226        u64 config;
 227
 228        er = &box->shared_regs[idx];
 229
 230        raw_spin_lock_irqsave(&er->lock, flags);
 231        config = er->config;
 232        raw_spin_unlock_irqrestore(&er->lock, flags);
 233
 234        return config;
 235}
 236
 237static void uncore_assign_hw_event(struct intel_uncore_box *box,
 238                                   struct perf_event *event, int idx)
 239{
 240        struct hw_perf_event *hwc = &event->hw;
 241
 242        hwc->idx = idx;
 243        hwc->last_tag = ++box->tags[idx];
 244
 245        if (uncore_pmc_fixed(hwc->idx)) {
 246                hwc->event_base = uncore_fixed_ctr(box);
 247                hwc->config_base = uncore_fixed_ctl(box);
 248                return;
 249        }
 250
 251        hwc->config_base = uncore_event_ctl(box, hwc->idx);
 252        hwc->event_base  = uncore_perf_ctr(box, hwc->idx);
 253}
 254
 255void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event)
 256{
 257        u64 prev_count, new_count, delta;
 258        int shift;
 259
 260        if (uncore_pmc_freerunning(event->hw.idx))
 261                shift = 64 - uncore_freerunning_bits(box, event);
 262        else if (uncore_pmc_fixed(event->hw.idx))
 263                shift = 64 - uncore_fixed_ctr_bits(box);
 264        else
 265                shift = 64 - uncore_perf_ctr_bits(box);
 266
 267        /* the hrtimer might modify the previous event value */
 268again:
 269        prev_count = local64_read(&event->hw.prev_count);
 270        new_count = uncore_read_counter(box, event);
 271        if (local64_xchg(&event->hw.prev_count, new_count) != prev_count)
 272                goto again;
 273
 274        delta = (new_count << shift) - (prev_count << shift);
 275        delta >>= shift;
 276
 277        local64_add(delta, &event->count);
 278}
 279
 280/*
 281 * The overflow interrupt is unavailable for SandyBridge-EP, is broken
 282 * for SandyBridge. So we use hrtimer to periodically poll the counter
 283 * to avoid overflow.
 284 */
 285static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
 286{
 287        struct intel_uncore_box *box;
 288        struct perf_event *event;
 289        unsigned long flags;
 290        int bit;
 291
 292        box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
 293        if (!box->n_active || box->cpu != smp_processor_id())
 294                return HRTIMER_NORESTART;
 295        /*
 296         * disable local interrupt to prevent uncore_pmu_event_start/stop
 297         * to interrupt the update process
 298         */
 299        local_irq_save(flags);
 300
 301        /*
 302         * handle boxes with an active event list as opposed to active
 303         * counters
 304         */
 305        list_for_each_entry(event, &box->active_list, active_entry) {
 306                uncore_perf_event_update(box, event);
 307        }
 308
 309        for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
 310                uncore_perf_event_update(box, box->events[bit]);
 311
 312        local_irq_restore(flags);
 313
 314        hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
 315        return HRTIMER_RESTART;
 316}
 317
 318void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
 319{
 320        hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
 321                      HRTIMER_MODE_REL_PINNED);
 322}
 323
 324void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
 325{
 326        hrtimer_cancel(&box->hrtimer);
 327}
 328
 329static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
 330{
 331        hrtimer_init(&box->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 332        box->hrtimer.function = uncore_pmu_hrtimer;
 333}
 334
 335static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
 336                                                 int node)
 337{
 338        int i, size, numshared = type->num_shared_regs ;
 339        struct intel_uncore_box *box;
 340
 341        size = sizeof(*box) + numshared * sizeof(struct intel_uncore_extra_reg);
 342
 343        box = kzalloc_node(size, GFP_KERNEL, node);
 344        if (!box)
 345                return NULL;
 346
 347        for (i = 0; i < numshared; i++)
 348                raw_spin_lock_init(&box->shared_regs[i].lock);
 349
 350        uncore_pmu_init_hrtimer(box);
 351        box->cpu = -1;
 352        box->dieid = -1;
 353
 354        /* set default hrtimer timeout */
 355        box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL;
 356
 357        INIT_LIST_HEAD(&box->active_list);
 358
 359        return box;
 360}
 361
 362/*
 363 * Using uncore_pmu_event_init pmu event_init callback
 364 * as a detection point for uncore events.
 365 */
 366static int uncore_pmu_event_init(struct perf_event *event);
 367
 368static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
 369{
 370        return &box->pmu->pmu == event->pmu;
 371}
 372
 373static int
 374uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
 375                      bool dogrp)
 376{
 377        struct perf_event *event;
 378        int n, max_count;
 379
 380        max_count = box->pmu->type->num_counters;
 381        if (box->pmu->type->fixed_ctl)
 382                max_count++;
 383
 384        if (box->n_events >= max_count)
 385                return -EINVAL;
 386
 387        n = box->n_events;
 388
 389        if (is_box_event(box, leader)) {
 390                box->event_list[n] = leader;
 391                n++;
 392        }
 393
 394        if (!dogrp)
 395                return n;
 396
 397        for_each_sibling_event(event, leader) {
 398                if (!is_box_event(box, event) ||
 399                    event->state <= PERF_EVENT_STATE_OFF)
 400                        continue;
 401
 402                if (n >= max_count)
 403                        return -EINVAL;
 404
 405                box->event_list[n] = event;
 406                n++;
 407        }
 408        return n;
 409}
 410
 411static struct event_constraint *
 412uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *event)
 413{
 414        struct intel_uncore_type *type = box->pmu->type;
 415        struct event_constraint *c;
 416
 417        if (type->ops->get_constraint) {
 418                c = type->ops->get_constraint(box, event);
 419                if (c)
 420                        return c;
 421        }
 422
 423        if (event->attr.config == UNCORE_FIXED_EVENT)
 424                return &uncore_constraint_fixed;
 425
 426        if (type->constraints) {
 427                for_each_event_constraint(c, type->constraints) {
 428                        if ((event->hw.config & c->cmask) == c->code)
 429                                return c;
 430                }
 431        }
 432
 433        return &type->unconstrainted;
 434}
 435
 436static void uncore_put_event_constraint(struct intel_uncore_box *box,
 437                                        struct perf_event *event)
 438{
 439        if (box->pmu->type->ops->put_constraint)
 440                box->pmu->type->ops->put_constraint(box, event);
 441}
 442
 443static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
 444{
 445        unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
 446        struct event_constraint *c;
 447        int i, wmin, wmax, ret = 0;
 448        struct hw_perf_event *hwc;
 449
 450        bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
 451
 452        for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 453                c = uncore_get_event_constraint(box, box->event_list[i]);
 454                box->event_constraint[i] = c;
 455                wmin = min(wmin, c->weight);
 456                wmax = max(wmax, c->weight);
 457        }
 458
 459        /* fastpath, try to reuse previous register */
 460        for (i = 0; i < n; i++) {
 461                hwc = &box->event_list[i]->hw;
 462                c = box->event_constraint[i];
 463
 464                /* never assigned */
 465                if (hwc->idx == -1)
 466                        break;
 467
 468                /* constraint still honored */
 469                if (!test_bit(hwc->idx, c->idxmsk))
 470                        break;
 471
 472                /* not already used */
 473                if (test_bit(hwc->idx, used_mask))
 474                        break;
 475
 476                __set_bit(hwc->idx, used_mask);
 477                if (assign)
 478                        assign[i] = hwc->idx;
 479        }
 480        /* slow path */
 481        if (i != n)
 482                ret = perf_assign_events(box->event_constraint, n,
 483                                         wmin, wmax, n, assign);
 484
 485        if (!assign || ret) {
 486                for (i = 0; i < n; i++)
 487                        uncore_put_event_constraint(box, box->event_list[i]);
 488        }
 489        return ret ? -EINVAL : 0;
 490}
 491
 492void uncore_pmu_event_start(struct perf_event *event, int flags)
 493{
 494        struct intel_uncore_box *box = uncore_event_to_box(event);
 495        int idx = event->hw.idx;
 496
 497        if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX))
 498                return;
 499
 500        /*
 501         * Free running counter is read-only and always active.
 502         * Use the current counter value as start point.
 503         * There is no overflow interrupt for free running counter.
 504         * Use hrtimer to periodically poll the counter to avoid overflow.
 505         */
 506        if (uncore_pmc_freerunning(event->hw.idx)) {
 507                list_add_tail(&event->active_entry, &box->active_list);
 508                local64_set(&event->hw.prev_count,
 509                            uncore_read_counter(box, event));
 510                if (box->n_active++ == 0)
 511                        uncore_pmu_start_hrtimer(box);
 512                return;
 513        }
 514
 515        if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
 516                return;
 517
 518        event->hw.state = 0;
 519        box->events[idx] = event;
 520        box->n_active++;
 521        __set_bit(idx, box->active_mask);
 522
 523        local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
 524        uncore_enable_event(box, event);
 525
 526        if (box->n_active == 1)
 527                uncore_pmu_start_hrtimer(box);
 528}
 529
 530void uncore_pmu_event_stop(struct perf_event *event, int flags)
 531{
 532        struct intel_uncore_box *box = uncore_event_to_box(event);
 533        struct hw_perf_event *hwc = &event->hw;
 534
 535        /* Cannot disable free running counter which is read-only */
 536        if (uncore_pmc_freerunning(hwc->idx)) {
 537                list_del(&event->active_entry);
 538                if (--box->n_active == 0)
 539                        uncore_pmu_cancel_hrtimer(box);
 540                uncore_perf_event_update(box, event);
 541                return;
 542        }
 543
 544        if (__test_and_clear_bit(hwc->idx, box->active_mask)) {
 545                uncore_disable_event(box, event);
 546                box->n_active--;
 547                box->events[hwc->idx] = NULL;
 548                WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
 549                hwc->state |= PERF_HES_STOPPED;
 550
 551                if (box->n_active == 0)
 552                        uncore_pmu_cancel_hrtimer(box);
 553        }
 554
 555        if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 556                /*
 557                 * Drain the remaining delta count out of a event
 558                 * that we are disabling:
 559                 */
 560                uncore_perf_event_update(box, event);
 561                hwc->state |= PERF_HES_UPTODATE;
 562        }
 563}
 564
 565int uncore_pmu_event_add(struct perf_event *event, int flags)
 566{
 567        struct intel_uncore_box *box = uncore_event_to_box(event);
 568        struct hw_perf_event *hwc = &event->hw;
 569        int assign[UNCORE_PMC_IDX_MAX];
 570        int i, n, ret;
 571
 572        if (!box)
 573                return -ENODEV;
 574
 575        /*
 576         * The free funning counter is assigned in event_init().
 577         * The free running counter event and free running counter
 578         * are 1:1 mapped. It doesn't need to be tracked in event_list.
 579         */
 580        if (uncore_pmc_freerunning(hwc->idx)) {
 581                if (flags & PERF_EF_START)
 582                        uncore_pmu_event_start(event, 0);
 583                return 0;
 584        }
 585
 586        ret = n = uncore_collect_events(box, event, false);
 587        if (ret < 0)
 588                return ret;
 589
 590        hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 591        if (!(flags & PERF_EF_START))
 592                hwc->state |= PERF_HES_ARCH;
 593
 594        ret = uncore_assign_events(box, assign, n);
 595        if (ret)
 596                return ret;
 597
 598        /* save events moving to new counters */
 599        for (i = 0; i < box->n_events; i++) {
 600                event = box->event_list[i];
 601                hwc = &event->hw;
 602
 603                if (hwc->idx == assign[i] &&
 604                        hwc->last_tag == box->tags[assign[i]])
 605                        continue;
 606                /*
 607                 * Ensure we don't accidentally enable a stopped
 608                 * counter simply because we rescheduled.
 609                 */
 610                if (hwc->state & PERF_HES_STOPPED)
 611                        hwc->state |= PERF_HES_ARCH;
 612
 613                uncore_pmu_event_stop(event, PERF_EF_UPDATE);
 614        }
 615
 616        /* reprogram moved events into new counters */
 617        for (i = 0; i < n; i++) {
 618                event = box->event_list[i];
 619                hwc = &event->hw;
 620
 621                if (hwc->idx != assign[i] ||
 622                        hwc->last_tag != box->tags[assign[i]])
 623                        uncore_assign_hw_event(box, event, assign[i]);
 624                else if (i < box->n_events)
 625                        continue;
 626
 627                if (hwc->state & PERF_HES_ARCH)
 628                        continue;
 629
 630                uncore_pmu_event_start(event, 0);
 631        }
 632        box->n_events = n;
 633
 634        return 0;
 635}
 636
 637void uncore_pmu_event_del(struct perf_event *event, int flags)
 638{
 639        struct intel_uncore_box *box = uncore_event_to_box(event);
 640        int i;
 641
 642        uncore_pmu_event_stop(event, PERF_EF_UPDATE);
 643
 644        /*
 645         * The event for free running counter is not tracked by event_list.
 646         * It doesn't need to force event->hw.idx = -1 to reassign the counter.
 647         * Because the event and the free running counter are 1:1 mapped.
 648         */
 649        if (uncore_pmc_freerunning(event->hw.idx))
 650                return;
 651
 652        for (i = 0; i < box->n_events; i++) {
 653                if (event == box->event_list[i]) {
 654                        uncore_put_event_constraint(box, event);
 655
 656                        for (++i; i < box->n_events; i++)
 657                                box->event_list[i - 1] = box->event_list[i];
 658
 659                        --box->n_events;
 660                        break;
 661                }
 662        }
 663
 664        event->hw.idx = -1;
 665        event->hw.last_tag = ~0ULL;
 666}
 667
 668void uncore_pmu_event_read(struct perf_event *event)
 669{
 670        struct intel_uncore_box *box = uncore_event_to_box(event);
 671        uncore_perf_event_update(box, event);
 672}
 673
 674/*
 675 * validation ensures the group can be loaded onto the
 676 * PMU if it was the only group available.
 677 */
 678static int uncore_validate_group(struct intel_uncore_pmu *pmu,
 679                                struct perf_event *event)
 680{
 681        struct perf_event *leader = event->group_leader;
 682        struct intel_uncore_box *fake_box;
 683        int ret = -EINVAL, n;
 684
 685        /* The free running counter is always active. */
 686        if (uncore_pmc_freerunning(event->hw.idx))
 687                return 0;
 688
 689        fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
 690        if (!fake_box)
 691                return -ENOMEM;
 692
 693        fake_box->pmu = pmu;
 694        /*
 695         * the event is not yet connected with its
 696         * siblings therefore we must first collect
 697         * existing siblings, then add the new event
 698         * before we can simulate the scheduling
 699         */
 700        n = uncore_collect_events(fake_box, leader, true);
 701        if (n < 0)
 702                goto out;
 703
 704        fake_box->n_events = n;
 705        n = uncore_collect_events(fake_box, event, false);
 706        if (n < 0)
 707                goto out;
 708
 709        fake_box->n_events = n;
 710
 711        ret = uncore_assign_events(fake_box, NULL, n);
 712out:
 713        kfree(fake_box);
 714        return ret;
 715}
 716
 717static int uncore_pmu_event_init(struct perf_event *event)
 718{
 719        struct intel_uncore_pmu *pmu;
 720        struct intel_uncore_box *box;
 721        struct hw_perf_event *hwc = &event->hw;
 722        int ret;
 723
 724        if (event->attr.type != event->pmu->type)
 725                return -ENOENT;
 726
 727        pmu = uncore_event_to_pmu(event);
 728        /* no device found for this pmu */
 729        if (pmu->func_id < 0)
 730                return -ENOENT;
 731
 732        /* Sampling not supported yet */
 733        if (hwc->sample_period)
 734                return -EINVAL;
 735
 736        /*
 737         * Place all uncore events for a particular physical package
 738         * onto a single cpu
 739         */
 740        if (event->cpu < 0)
 741                return -EINVAL;
 742        box = uncore_pmu_to_box(pmu, event->cpu);
 743        if (!box || box->cpu < 0)
 744                return -EINVAL;
 745        event->cpu = box->cpu;
 746        event->pmu_private = box;
 747
 748        event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
 749
 750        event->hw.idx = -1;
 751        event->hw.last_tag = ~0ULL;
 752        event->hw.extra_reg.idx = EXTRA_REG_NONE;
 753        event->hw.branch_reg.idx = EXTRA_REG_NONE;
 754
 755        if (event->attr.config == UNCORE_FIXED_EVENT) {
 756                /* no fixed counter */
 757                if (!pmu->type->fixed_ctl)
 758                        return -EINVAL;
 759                /*
 760                 * if there is only one fixed counter, only the first pmu
 761                 * can access the fixed counter
 762                 */
 763                if (pmu->type->single_fixed && pmu->pmu_idx > 0)
 764                        return -EINVAL;
 765
 766                /* fixed counters have event field hardcoded to zero */
 767                hwc->config = 0ULL;
 768        } else if (is_freerunning_event(event)) {
 769                hwc->config = event->attr.config;
 770                if (!check_valid_freerunning_event(box, event))
 771                        return -EINVAL;
 772                event->hw.idx = UNCORE_PMC_IDX_FREERUNNING;
 773                /*
 774                 * The free running counter event and free running counter
 775                 * are always 1:1 mapped.
 776                 * The free running counter is always active.
 777                 * Assign the free running counter here.
 778                 */
 779                event->hw.event_base = uncore_freerunning_counter(box, event);
 780        } else {
 781                hwc->config = event->attr.config &
 782                              (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
 783                if (pmu->type->ops->hw_config) {
 784                        ret = pmu->type->ops->hw_config(box, event);
 785                        if (ret)
 786                                return ret;
 787                }
 788        }
 789
 790        if (event->group_leader != event)
 791                ret = uncore_validate_group(pmu, event);
 792        else
 793                ret = 0;
 794
 795        return ret;
 796}
 797
 798static void uncore_pmu_enable(struct pmu *pmu)
 799{
 800        struct intel_uncore_pmu *uncore_pmu;
 801        struct intel_uncore_box *box;
 802
 803        uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
 804
 805        box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
 806        if (!box)
 807                return;
 808
 809        if (uncore_pmu->type->ops->enable_box)
 810                uncore_pmu->type->ops->enable_box(box);
 811}
 812
 813static void uncore_pmu_disable(struct pmu *pmu)
 814{
 815        struct intel_uncore_pmu *uncore_pmu;
 816        struct intel_uncore_box *box;
 817
 818        uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
 819
 820        box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
 821        if (!box)
 822                return;
 823
 824        if (uncore_pmu->type->ops->disable_box)
 825                uncore_pmu->type->ops->disable_box(box);
 826}
 827
 828static ssize_t uncore_get_attr_cpumask(struct device *dev,
 829                                struct device_attribute *attr, char *buf)
 830{
 831        return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
 832}
 833
 834static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
 835
 836static struct attribute *uncore_pmu_attrs[] = {
 837        &dev_attr_cpumask.attr,
 838        NULL,
 839};
 840
 841static const struct attribute_group uncore_pmu_attr_group = {
 842        .attrs = uncore_pmu_attrs,
 843};
 844
 845void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
 846{
 847        struct intel_uncore_type *type = pmu->type;
 848
 849        if (type->num_boxes == 1)
 850                sprintf(pmu_name, "uncore_type_%u", type->type_id);
 851        else {
 852                sprintf(pmu_name, "uncore_type_%u_%d",
 853                        type->type_id, type->box_ids[pmu->pmu_idx]);
 854        }
 855}
 856
 857static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
 858{
 859        struct intel_uncore_type *type = pmu->type;
 860
 861        /*
 862         * No uncore block name in discovery table.
 863         * Use uncore_type_&typeid_&boxid as name.
 864         */
 865        if (!type->name) {
 866                uncore_get_alias_name(pmu->name, pmu);
 867                return;
 868        }
 869
 870        if (type->num_boxes == 1) {
 871                if (strlen(type->name) > 0)
 872                        sprintf(pmu->name, "uncore_%s", type->name);
 873                else
 874                        sprintf(pmu->name, "uncore");
 875        } else {
 876                /*
 877                 * Use the box ID from the discovery table if applicable.
 878                 */
 879                sprintf(pmu->name, "uncore_%s_%d", type->name,
 880                        type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
 881        }
 882}
 883
 884static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
 885{
 886        int ret;
 887
 888        if (!pmu->type->pmu) {
 889                pmu->pmu = (struct pmu) {
 890                        .attr_groups    = pmu->type->attr_groups,
 891                        .task_ctx_nr    = perf_invalid_context,
 892                        .pmu_enable     = uncore_pmu_enable,
 893                        .pmu_disable    = uncore_pmu_disable,
 894                        .event_init     = uncore_pmu_event_init,
 895                        .add            = uncore_pmu_event_add,
 896                        .del            = uncore_pmu_event_del,
 897                        .start          = uncore_pmu_event_start,
 898                        .stop           = uncore_pmu_event_stop,
 899                        .read           = uncore_pmu_event_read,
 900                        .module         = THIS_MODULE,
 901                        .capabilities   = PERF_PMU_CAP_NO_EXCLUDE,
 902                        .attr_update    = pmu->type->attr_update,
 903                };
 904        } else {
 905                pmu->pmu = *pmu->type->pmu;
 906                pmu->pmu.attr_groups = pmu->type->attr_groups;
 907                pmu->pmu.attr_update = pmu->type->attr_update;
 908        }
 909
 910        uncore_get_pmu_name(pmu);
 911
 912        ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
 913        if (!ret)
 914                pmu->registered = true;
 915        return ret;
 916}
 917
 918static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu)
 919{
 920        if (!pmu->registered)
 921                return;
 922        perf_pmu_unregister(&pmu->pmu);
 923        pmu->registered = false;
 924}
 925
 926static void uncore_free_boxes(struct intel_uncore_pmu *pmu)
 927{
 928        int die;
 929
 930        for (die = 0; die < uncore_max_dies(); die++)
 931                kfree(pmu->boxes[die]);
 932        kfree(pmu->boxes);
 933}
 934
 935static void uncore_type_exit(struct intel_uncore_type *type)
 936{
 937        struct intel_uncore_pmu *pmu = type->pmus;
 938        int i;
 939
 940        if (type->cleanup_mapping)
 941                type->cleanup_mapping(type);
 942
 943        if (pmu) {
 944                for (i = 0; i < type->num_boxes; i++, pmu++) {
 945                        uncore_pmu_unregister(pmu);
 946                        uncore_free_boxes(pmu);
 947                }
 948                kfree(type->pmus);
 949                type->pmus = NULL;
 950        }
 951        if (type->box_ids) {
 952                kfree(type->box_ids);
 953                type->box_ids = NULL;
 954        }
 955        kfree(type->events_group);
 956        type->events_group = NULL;
 957}
 958
 959static void uncore_types_exit(struct intel_uncore_type **types)
 960{
 961        for (; *types; types++)
 962                uncore_type_exit(*types);
 963}
 964
 965static int __init uncore_type_init(struct intel_uncore_type *type, bool setid)
 966{
 967        struct intel_uncore_pmu *pmus;
 968        size_t size;
 969        int i, j;
 970
 971        pmus = kcalloc(type->num_boxes, sizeof(*pmus), GFP_KERNEL);
 972        if (!pmus)
 973                return -ENOMEM;
 974
 975        size = uncore_max_dies() * sizeof(struct intel_uncore_box *);
 976
 977        for (i = 0; i < type->num_boxes; i++) {
 978                pmus[i].func_id = setid ? i : -1;
 979                pmus[i].pmu_idx = i;
 980                pmus[i].type    = type;
 981                pmus[i].boxes   = kzalloc(size, GFP_KERNEL);
 982                if (!pmus[i].boxes)
 983                        goto err;
 984        }
 985
 986        type->pmus = pmus;
 987        type->unconstrainted = (struct event_constraint)
 988                __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
 989                                0, type->num_counters, 0, 0);
 990
 991        if (type->event_descs) {
 992                struct {
 993                        struct attribute_group group;
 994                        struct attribute *attrs[];
 995                } *attr_group;
 996                for (i = 0; type->event_descs[i].attr.attr.name; i++);
 997
 998                attr_group = kzalloc(struct_size(attr_group, attrs, i + 1),
 999                                                                GFP_KERNEL);
1000                if (!attr_group)
1001                        goto err;
1002
1003                attr_group->group.name = "events";
1004                attr_group->group.attrs = attr_group->attrs;
1005
1006                for (j = 0; j < i; j++)
1007                        attr_group->attrs[j] = &type->event_descs[j].attr.attr;
1008
1009                type->events_group = &attr_group->group;
1010        }
1011
1012        type->pmu_group = &uncore_pmu_attr_group;
1013
1014        if (type->set_mapping)
1015                type->set_mapping(type);
1016
1017        return 0;
1018
1019err:
1020        for (i = 0; i < type->num_boxes; i++)
1021                kfree(pmus[i].boxes);
1022        kfree(pmus);
1023
1024        return -ENOMEM;
1025}
1026
1027static int __init
1028uncore_types_init(struct intel_uncore_type **types, bool setid)
1029{
1030        int ret;
1031
1032        for (; *types; types++) {
1033                ret = uncore_type_init(*types, setid);
1034                if (ret)
1035                        return ret;
1036        }
1037        return 0;
1038}
1039
1040/*
1041 * Get the die information of a PCI device.
1042 * @pdev: The PCI device.
1043 * @die: The die id which the device maps to.
1044 */
1045static int uncore_pci_get_dev_die_info(struct pci_dev *pdev, int *die)
1046{
1047        *die = uncore_pcibus_to_dieid(pdev->bus);
1048        if (*die < 0)
1049                return -EINVAL;
1050
1051        return 0;
1052}
1053
1054static struct intel_uncore_pmu *
1055uncore_pci_find_dev_pmu_from_types(struct pci_dev *pdev)
1056{
1057        struct intel_uncore_type **types = uncore_pci_uncores;
1058        struct intel_uncore_type *type;
1059        u64 box_ctl;
1060        int i, die;
1061
1062        for (; *types; types++) {
1063                type = *types;
1064                for (die = 0; die < __uncore_max_dies; die++) {
1065                        for (i = 0; i < type->num_boxes; i++) {
1066                                if (!type->box_ctls[die])
1067                                        continue;
1068                                box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1069                                if (pdev->devfn == UNCORE_DISCOVERY_PCI_DEVFN(box_ctl) &&
1070                                    pdev->bus->number == UNCORE_DISCOVERY_PCI_BUS(box_ctl) &&
1071                                    pci_domain_nr(pdev->bus) == UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl))
1072                                        return &type->pmus[i];
1073                        }
1074                }
1075        }
1076
1077        return NULL;
1078}
1079
1080/*
1081 * Find the PMU of a PCI device.
1082 * @pdev: The PCI device.
1083 * @ids: The ID table of the available PCI devices with a PMU.
1084 *       If NULL, search the whole uncore_pci_uncores.
1085 */
1086static struct intel_uncore_pmu *
1087uncore_pci_find_dev_pmu(struct pci_dev *pdev, const struct pci_device_id *ids)
1088{
1089        struct intel_uncore_pmu *pmu = NULL;
1090        struct intel_uncore_type *type;
1091        kernel_ulong_t data;
1092        unsigned int devfn;
1093
1094        if (!ids)
1095                return uncore_pci_find_dev_pmu_from_types(pdev);
1096
1097        while (ids && ids->vendor) {
1098                if ((ids->vendor == pdev->vendor) &&
1099                    (ids->device == pdev->device)) {
1100                        data = ids->driver_data;
1101                        devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(data),
1102                                          UNCORE_PCI_DEV_FUNC(data));
1103                        if (devfn == pdev->devfn) {
1104                                type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(data)];
1105                                pmu = &type->pmus[UNCORE_PCI_DEV_IDX(data)];
1106                                break;
1107                        }
1108                }
1109                ids++;
1110        }
1111        return pmu;
1112}
1113
1114/*
1115 * Register the PMU for a PCI device
1116 * @pdev: The PCI device.
1117 * @type: The corresponding PMU type of the device.
1118 * @pmu: The corresponding PMU of the device.
1119 * @die: The die id which the device maps to.
1120 */
1121static int uncore_pci_pmu_register(struct pci_dev *pdev,
1122                                   struct intel_uncore_type *type,
1123                                   struct intel_uncore_pmu *pmu,
1124                                   int die)
1125{
1126        struct intel_uncore_box *box;
1127        int ret;
1128
1129        if (WARN_ON_ONCE(pmu->boxes[die] != NULL))
1130                return -EINVAL;
1131
1132        box = uncore_alloc_box(type, NUMA_NO_NODE);
1133        if (!box)
1134                return -ENOMEM;
1135
1136        if (pmu->func_id < 0)
1137                pmu->func_id = pdev->devfn;
1138        else
1139                WARN_ON_ONCE(pmu->func_id != pdev->devfn);
1140
1141        atomic_inc(&box->refcnt);
1142        box->dieid = die;
1143        box->pci_dev = pdev;
1144        box->pmu = pmu;
1145        uncore_box_init(box);
1146
1147        pmu->boxes[die] = box;
1148        if (atomic_inc_return(&pmu->activeboxes) > 1)
1149                return 0;
1150
1151        /* First active box registers the pmu */
1152        ret = uncore_pmu_register(pmu);
1153        if (ret) {
1154                pmu->boxes[die] = NULL;
1155                uncore_box_exit(box);
1156                kfree(box);
1157        }
1158        return ret;
1159}
1160
1161/*
1162 * add a pci uncore device
1163 */
1164static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1165{
1166        struct intel_uncore_type *type;
1167        struct intel_uncore_pmu *pmu = NULL;
1168        int die, ret;
1169
1170        ret = uncore_pci_get_dev_die_info(pdev, &die);
1171        if (ret)
1172                return ret;
1173
1174        if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
1175                int idx = UNCORE_PCI_DEV_IDX(id->driver_data);
1176
1177                uncore_extra_pci_dev[die].dev[idx] = pdev;
1178                pci_set_drvdata(pdev, NULL);
1179                return 0;
1180        }
1181
1182        type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
1183
1184        /*
1185         * Some platforms, e.g.  Knights Landing, use a common PCI device ID
1186         * for multiple instances of an uncore PMU device type. We should check
1187         * PCI slot and func to indicate the uncore box.
1188         */
1189        if (id->driver_data & ~0xffff) {
1190                struct pci_driver *pci_drv = pdev->driver;
1191
1192                pmu = uncore_pci_find_dev_pmu(pdev, pci_drv->id_table);
1193                if (pmu == NULL)
1194                        return -ENODEV;
1195        } else {
1196                /*
1197                 * for performance monitoring unit with multiple boxes,
1198                 * each box has a different function id.
1199                 */
1200                pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
1201        }
1202
1203        ret = uncore_pci_pmu_register(pdev, type, pmu, die);
1204
1205        pci_set_drvdata(pdev, pmu->boxes[die]);
1206
1207        return ret;
1208}
1209
1210/*
1211 * Unregister the PMU of a PCI device
1212 * @pmu: The corresponding PMU is unregistered.
1213 * @die: The die id which the device maps to.
1214 */
1215static void uncore_pci_pmu_unregister(struct intel_uncore_pmu *pmu, int die)
1216{
1217        struct intel_uncore_box *box = pmu->boxes[die];
1218
1219        pmu->boxes[die] = NULL;
1220        if (atomic_dec_return(&pmu->activeboxes) == 0)
1221                uncore_pmu_unregister(pmu);
1222        uncore_box_exit(box);
1223        kfree(box);
1224}
1225
1226static void uncore_pci_remove(struct pci_dev *pdev)
1227{
1228        struct intel_uncore_box *box;
1229        struct intel_uncore_pmu *pmu;
1230        int i, die;
1231
1232        if (uncore_pci_get_dev_die_info(pdev, &die))
1233                return;
1234
1235        box = pci_get_drvdata(pdev);
1236        if (!box) {
1237                for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
1238                        if (uncore_extra_pci_dev[die].dev[i] == pdev) {
1239                                uncore_extra_pci_dev[die].dev[i] = NULL;
1240                                break;
1241                        }
1242                }
1243                WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
1244                return;
1245        }
1246
1247        pmu = box->pmu;
1248
1249        pci_set_drvdata(pdev, NULL);
1250
1251        uncore_pci_pmu_unregister(pmu, die);
1252}
1253
1254static int uncore_bus_notify(struct notifier_block *nb,
1255                             unsigned long action, void *data,
1256                             const struct pci_device_id *ids)
1257{
1258        struct device *dev = data;
1259        struct pci_dev *pdev = to_pci_dev(dev);
1260        struct intel_uncore_pmu *pmu;
1261        int die;
1262
1263        /* Unregister the PMU when the device is going to be deleted. */
1264        if (action != BUS_NOTIFY_DEL_DEVICE)
1265                return NOTIFY_DONE;
1266
1267        pmu = uncore_pci_find_dev_pmu(pdev, ids);
1268        if (!pmu)
1269                return NOTIFY_DONE;
1270
1271        if (uncore_pci_get_dev_die_info(pdev, &die))
1272                return NOTIFY_DONE;
1273
1274        uncore_pci_pmu_unregister(pmu, die);
1275
1276        return NOTIFY_OK;
1277}
1278
1279static int uncore_pci_sub_bus_notify(struct notifier_block *nb,
1280                                     unsigned long action, void *data)
1281{
1282        return uncore_bus_notify(nb, action, data,
1283                                 uncore_pci_sub_driver->id_table);
1284}
1285
1286static struct notifier_block uncore_pci_sub_notifier = {
1287        .notifier_call = uncore_pci_sub_bus_notify,
1288};
1289
1290static void uncore_pci_sub_driver_init(void)
1291{
1292        const struct pci_device_id *ids = uncore_pci_sub_driver->id_table;
1293        struct intel_uncore_type *type;
1294        struct intel_uncore_pmu *pmu;
1295        struct pci_dev *pci_sub_dev;
1296        bool notify = false;
1297        unsigned int devfn;
1298        int die;
1299
1300        while (ids && ids->vendor) {
1301                pci_sub_dev = NULL;
1302                type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(ids->driver_data)];
1303                /*
1304                 * Search the available device, and register the
1305                 * corresponding PMU.
1306                 */
1307                while ((pci_sub_dev = pci_get_device(PCI_VENDOR_ID_INTEL,
1308                                                     ids->device, pci_sub_dev))) {
1309                        devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data),
1310                                          UNCORE_PCI_DEV_FUNC(ids->driver_data));
1311                        if (devfn != pci_sub_dev->devfn)
1312                                continue;
1313
1314                        pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)];
1315                        if (!pmu)
1316                                continue;
1317
1318                        if (uncore_pci_get_dev_die_info(pci_sub_dev, &die))
1319                                continue;
1320
1321                        if (!uncore_pci_pmu_register(pci_sub_dev, type, pmu,
1322                                                     die))
1323                                notify = true;
1324                }
1325                ids++;
1326        }
1327
1328        if (notify && bus_register_notifier(&pci_bus_type, &uncore_pci_sub_notifier))
1329                notify = false;
1330
1331        if (!notify)
1332                uncore_pci_sub_driver = NULL;
1333}
1334
1335static int uncore_pci_bus_notify(struct notifier_block *nb,
1336                                     unsigned long action, void *data)
1337{
1338        return uncore_bus_notify(nb, action, data, NULL);
1339}
1340
1341static struct notifier_block uncore_pci_notifier = {
1342        .notifier_call = uncore_pci_bus_notify,
1343};
1344
1345
1346static void uncore_pci_pmus_register(void)
1347{
1348        struct intel_uncore_type **types = uncore_pci_uncores;
1349        struct intel_uncore_type *type;
1350        struct intel_uncore_pmu *pmu;
1351        struct pci_dev *pdev;
1352        u64 box_ctl;
1353        int i, die;
1354
1355        for (; *types; types++) {
1356                type = *types;
1357                for (die = 0; die < __uncore_max_dies; die++) {
1358                        for (i = 0; i < type->num_boxes; i++) {
1359                                if (!type->box_ctls[die])
1360                                        continue;
1361                                box_ctl = type->box_ctls[die] + type->pci_offsets[i];
1362                                pdev = pci_get_domain_bus_and_slot(UNCORE_DISCOVERY_PCI_DOMAIN(box_ctl),
1363                                                                   UNCORE_DISCOVERY_PCI_BUS(box_ctl),
1364                                                                   UNCORE_DISCOVERY_PCI_DEVFN(box_ctl));
1365                                if (!pdev)
1366                                        continue;
1367                                pmu = &type->pmus[i];
1368
1369                                uncore_pci_pmu_register(pdev, type, pmu, die);
1370                        }
1371                }
1372        }
1373
1374        bus_register_notifier(&pci_bus_type, &uncore_pci_notifier);
1375}
1376
1377static int __init uncore_pci_init(void)
1378{
1379        size_t size;
1380        int ret;
1381
1382        size = uncore_max_dies() * sizeof(struct pci_extra_dev);
1383        uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL);
1384        if (!uncore_extra_pci_dev) {
1385                ret = -ENOMEM;
1386                goto err;
1387        }
1388
1389        ret = uncore_types_init(uncore_pci_uncores, false);
1390        if (ret)
1391                goto errtype;
1392
1393        if (uncore_pci_driver) {
1394                uncore_pci_driver->probe = uncore_pci_probe;
1395                uncore_pci_driver->remove = uncore_pci_remove;
1396
1397                ret = pci_register_driver(uncore_pci_driver);
1398                if (ret)
1399                        goto errtype;
1400        } else
1401                uncore_pci_pmus_register();
1402
1403        if (uncore_pci_sub_driver)
1404                uncore_pci_sub_driver_init();
1405
1406        pcidrv_registered = true;
1407        return 0;
1408
1409errtype:
1410        uncore_types_exit(uncore_pci_uncores);
1411        kfree(uncore_extra_pci_dev);
1412        uncore_extra_pci_dev = NULL;
1413        uncore_free_pcibus_map();
1414err:
1415        uncore_pci_uncores = empty_uncore;
1416        return ret;
1417}
1418
1419static void uncore_pci_exit(void)
1420{
1421        if (pcidrv_registered) {
1422                pcidrv_registered = false;
1423                if (uncore_pci_sub_driver)
1424                        bus_unregister_notifier(&pci_bus_type, &uncore_pci_sub_notifier);
1425                if (uncore_pci_driver)
1426                        pci_unregister_driver(uncore_pci_driver);
1427                else
1428                        bus_unregister_notifier(&pci_bus_type, &uncore_pci_notifier);
1429                uncore_types_exit(uncore_pci_uncores);
1430                kfree(uncore_extra_pci_dev);
1431                uncore_free_pcibus_map();
1432        }
1433}
1434
1435static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu,
1436                                   int new_cpu)
1437{
1438        struct intel_uncore_pmu *pmu = type->pmus;
1439        struct intel_uncore_box *box;
1440        int i, die;
1441
1442        die = topology_logical_die_id(old_cpu < 0 ? new_cpu : old_cpu);
1443        for (i = 0; i < type->num_boxes; i++, pmu++) {
1444                box = pmu->boxes[die];
1445                if (!box)
1446                        continue;
1447
1448                if (old_cpu < 0) {
1449                        WARN_ON_ONCE(box->cpu != -1);
1450                        box->cpu = new_cpu;
1451                        continue;
1452                }
1453
1454                WARN_ON_ONCE(box->cpu != old_cpu);
1455                box->cpu = -1;
1456                if (new_cpu < 0)
1457                        continue;
1458
1459                uncore_pmu_cancel_hrtimer(box);
1460                perf_pmu_migrate_context(&pmu->pmu, old_cpu, new_cpu);
1461                box->cpu = new_cpu;
1462        }
1463}
1464
1465static void uncore_change_context(struct intel_uncore_type **uncores,
1466                                  int old_cpu, int new_cpu)
1467{
1468        for (; *uncores; uncores++)
1469                uncore_change_type_ctx(*uncores, old_cpu, new_cpu);
1470}
1471
1472static void uncore_box_unref(struct intel_uncore_type **types, int id)
1473{
1474        struct intel_uncore_type *type;
1475        struct intel_uncore_pmu *pmu;
1476        struct intel_uncore_box *box;
1477        int i;
1478
1479        for (; *types; types++) {
1480                type = *types;
1481                pmu = type->pmus;
1482                for (i = 0; i < type->num_boxes; i++, pmu++) {
1483                        box = pmu->boxes[id];
1484                        if (box && atomic_dec_return(&box->refcnt) == 0)
1485                                uncore_box_exit(box);
1486                }
1487        }
1488}
1489
1490static int uncore_event_cpu_offline(unsigned int cpu)
1491{
1492        int die, target;
1493
1494        /* Check if exiting cpu is used for collecting uncore events */
1495        if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask))
1496                goto unref;
1497        /* Find a new cpu to collect uncore events */
1498        target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
1499
1500        /* Migrate uncore events to the new target */
1501        if (target < nr_cpu_ids)
1502                cpumask_set_cpu(target, &uncore_cpu_mask);
1503        else
1504                target = -1;
1505
1506        uncore_change_context(uncore_msr_uncores, cpu, target);
1507        uncore_change_context(uncore_mmio_uncores, cpu, target);
1508        uncore_change_context(uncore_pci_uncores, cpu, target);
1509
1510unref:
1511        /* Clear the references */
1512        die = topology_logical_die_id(cpu);
1513        uncore_box_unref(uncore_msr_uncores, die);
1514        uncore_box_unref(uncore_mmio_uncores, die);
1515        return 0;
1516}
1517
1518static int allocate_boxes(struct intel_uncore_type **types,
1519                         unsigned int die, unsigned int cpu)
1520{
1521        struct intel_uncore_box *box, *tmp;
1522        struct intel_uncore_type *type;
1523        struct intel_uncore_pmu *pmu;
1524        LIST_HEAD(allocated);
1525        int i;
1526
1527        /* Try to allocate all required boxes */
1528        for (; *types; types++) {
1529                type = *types;
1530                pmu = type->pmus;
1531                for (i = 0; i < type->num_boxes; i++, pmu++) {
1532                        if (pmu->boxes[die])
1533                                continue;
1534                        box = uncore_alloc_box(type, cpu_to_node(cpu));
1535                        if (!box)
1536                                goto cleanup;
1537                        box->pmu = pmu;
1538                        box->dieid = die;
1539                        list_add(&box->active_list, &allocated);
1540                }
1541        }
1542        /* Install them in the pmus */
1543        list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1544                list_del_init(&box->active_list);
1545                box->pmu->boxes[die] = box;
1546        }
1547        return 0;
1548
1549cleanup:
1550        list_for_each_entry_safe(box, tmp, &allocated, active_list) {
1551                list_del_init(&box->active_list);
1552                kfree(box);
1553        }
1554        return -ENOMEM;
1555}
1556
1557static int uncore_box_ref(struct intel_uncore_type **types,
1558                          int id, unsigned int cpu)
1559{
1560        struct intel_uncore_type *type;
1561        struct intel_uncore_pmu *pmu;
1562        struct intel_uncore_box *box;
1563        int i, ret;
1564
1565        ret = allocate_boxes(types, id, cpu);
1566        if (ret)
1567                return ret;
1568
1569        for (; *types; types++) {
1570                type = *types;
1571                pmu = type->pmus;
1572                for (i = 0; i < type->num_boxes; i++, pmu++) {
1573                        box = pmu->boxes[id];
1574                        if (box && atomic_inc_return(&box->refcnt) == 1)
1575                                uncore_box_init(box);
1576                }
1577        }
1578        return 0;
1579}
1580
1581static int uncore_event_cpu_online(unsigned int cpu)
1582{
1583        int die, target, msr_ret, mmio_ret;
1584
1585        die = topology_logical_die_id(cpu);
1586        msr_ret = uncore_box_ref(uncore_msr_uncores, die, cpu);
1587        mmio_ret = uncore_box_ref(uncore_mmio_uncores, die, cpu);
1588        if (msr_ret && mmio_ret)
1589                return -ENOMEM;
1590
1591        /*
1592         * Check if there is an online cpu in the package
1593         * which collects uncore events already.
1594         */
1595        target = cpumask_any_and(&uncore_cpu_mask, topology_die_cpumask(cpu));
1596        if (target < nr_cpu_ids)
1597                return 0;
1598
1599        cpumask_set_cpu(cpu, &uncore_cpu_mask);
1600
1601        if (!msr_ret)
1602                uncore_change_context(uncore_msr_uncores, -1, cpu);
1603        if (!mmio_ret)
1604                uncore_change_context(uncore_mmio_uncores, -1, cpu);
1605        uncore_change_context(uncore_pci_uncores, -1, cpu);
1606        return 0;
1607}
1608
1609static int __init type_pmu_register(struct intel_uncore_type *type)
1610{
1611        int i, ret;
1612
1613        for (i = 0; i < type->num_boxes; i++) {
1614                ret = uncore_pmu_register(&type->pmus[i]);
1615                if (ret)
1616                        return ret;
1617        }
1618        return 0;
1619}
1620
1621static int __init uncore_msr_pmus_register(void)
1622{
1623        struct intel_uncore_type **types = uncore_msr_uncores;
1624        int ret;
1625
1626        for (; *types; types++) {
1627                ret = type_pmu_register(*types);
1628                if (ret)
1629                        return ret;
1630        }
1631        return 0;
1632}
1633
1634static int __init uncore_cpu_init(void)
1635{
1636        int ret;
1637
1638        ret = uncore_types_init(uncore_msr_uncores, true);
1639        if (ret)
1640                goto err;
1641
1642        ret = uncore_msr_pmus_register();
1643        if (ret)
1644                goto err;
1645        return 0;
1646err:
1647        uncore_types_exit(uncore_msr_uncores);
1648        uncore_msr_uncores = empty_uncore;
1649        return ret;
1650}
1651
1652static int __init uncore_mmio_init(void)
1653{
1654        struct intel_uncore_type **types = uncore_mmio_uncores;
1655        int ret;
1656
1657        ret = uncore_types_init(types, true);
1658        if (ret)
1659                goto err;
1660
1661        for (; *types; types++) {
1662                ret = type_pmu_register(*types);
1663                if (ret)
1664                        goto err;
1665        }
1666        return 0;
1667err:
1668        uncore_types_exit(uncore_mmio_uncores);
1669        uncore_mmio_uncores = empty_uncore;
1670        return ret;
1671}
1672
1673struct intel_uncore_init_fun {
1674        void    (*cpu_init)(void);
1675        int     (*pci_init)(void);
1676        void    (*mmio_init)(void);
1677        bool    use_discovery;
1678};
1679
1680static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
1681        .cpu_init = nhm_uncore_cpu_init,
1682};
1683
1684static const struct intel_uncore_init_fun snb_uncore_init __initconst = {
1685        .cpu_init = snb_uncore_cpu_init,
1686        .pci_init = snb_uncore_pci_init,
1687};
1688
1689static const struct intel_uncore_init_fun ivb_uncore_init __initconst = {
1690        .cpu_init = snb_uncore_cpu_init,
1691        .pci_init = ivb_uncore_pci_init,
1692};
1693
1694static const struct intel_uncore_init_fun hsw_uncore_init __initconst = {
1695        .cpu_init = snb_uncore_cpu_init,
1696        .pci_init = hsw_uncore_pci_init,
1697};
1698
1699static const struct intel_uncore_init_fun bdw_uncore_init __initconst = {
1700        .cpu_init = snb_uncore_cpu_init,
1701        .pci_init = bdw_uncore_pci_init,
1702};
1703
1704static const struct intel_uncore_init_fun snbep_uncore_init __initconst = {
1705        .cpu_init = snbep_uncore_cpu_init,
1706        .pci_init = snbep_uncore_pci_init,
1707};
1708
1709static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = {
1710        .cpu_init = nhmex_uncore_cpu_init,
1711};
1712
1713static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = {
1714        .cpu_init = ivbep_uncore_cpu_init,
1715        .pci_init = ivbep_uncore_pci_init,
1716};
1717
1718static const struct intel_uncore_init_fun hswep_uncore_init __initconst = {
1719        .cpu_init = hswep_uncore_cpu_init,
1720        .pci_init = hswep_uncore_pci_init,
1721};
1722
1723static const struct intel_uncore_init_fun bdx_uncore_init __initconst = {
1724        .cpu_init = bdx_uncore_cpu_init,
1725        .pci_init = bdx_uncore_pci_init,
1726};
1727
1728static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
1729        .cpu_init = knl_uncore_cpu_init,
1730        .pci_init = knl_uncore_pci_init,
1731};
1732
1733static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
1734        .cpu_init = skl_uncore_cpu_init,
1735        .pci_init = skl_uncore_pci_init,
1736};
1737
1738static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
1739        .cpu_init = skx_uncore_cpu_init,
1740        .pci_init = skx_uncore_pci_init,
1741};
1742
1743static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
1744        .cpu_init = icl_uncore_cpu_init,
1745        .pci_init = skl_uncore_pci_init,
1746};
1747
1748static const struct intel_uncore_init_fun tgl_uncore_init __initconst = {
1749        .cpu_init = tgl_uncore_cpu_init,
1750        .mmio_init = tgl_uncore_mmio_init,
1751};
1752
1753static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
1754        .cpu_init = tgl_uncore_cpu_init,
1755        .mmio_init = tgl_l_uncore_mmio_init,
1756};
1757
1758static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {
1759        .cpu_init = tgl_uncore_cpu_init,
1760        .pci_init = skl_uncore_pci_init,
1761};
1762
1763static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
1764        .cpu_init = adl_uncore_cpu_init,
1765        .mmio_init = tgl_uncore_mmio_init,
1766};
1767
1768static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
1769        .cpu_init = icx_uncore_cpu_init,
1770        .pci_init = icx_uncore_pci_init,
1771        .mmio_init = icx_uncore_mmio_init,
1772};
1773
1774static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
1775        .cpu_init = snr_uncore_cpu_init,
1776        .pci_init = snr_uncore_pci_init,
1777        .mmio_init = snr_uncore_mmio_init,
1778};
1779
1780static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
1781        .cpu_init = spr_uncore_cpu_init,
1782        .pci_init = spr_uncore_pci_init,
1783        .mmio_init = spr_uncore_mmio_init,
1784        .use_discovery = true,
1785};
1786
1787static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
1788        .cpu_init = intel_uncore_generic_uncore_cpu_init,
1789        .pci_init = intel_uncore_generic_uncore_pci_init,
1790        .mmio_init = intel_uncore_generic_uncore_mmio_init,
1791};
1792
1793static const struct x86_cpu_id intel_uncore_match[] __initconst = {
1794        X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP,          &nhm_uncore_init),
1795        X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,             &nhm_uncore_init),
1796        X86_MATCH_INTEL_FAM6_MODEL(WESTMERE,            &nhm_uncore_init),
1797        X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP,         &nhm_uncore_init),
1798        X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,         &snb_uncore_init),
1799        X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,           &ivb_uncore_init),
1800        X86_MATCH_INTEL_FAM6_MODEL(HASWELL,             &hsw_uncore_init),
1801        X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,           &hsw_uncore_init),
1802        X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,           &hsw_uncore_init),
1803        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,           &bdw_uncore_init),
1804        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,         &bdw_uncore_init),
1805        X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,       &snbep_uncore_init),
1806        X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX,          &nhmex_uncore_init),
1807        X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX,         &nhmex_uncore_init),
1808        X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,         &ivbep_uncore_init),
1809        X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,           &hswep_uncore_init),
1810        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,         &bdx_uncore_init),
1811        X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,         &bdx_uncore_init),
1812        X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &knl_uncore_init),
1813        X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,        &knl_uncore_init),
1814        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,             &skl_uncore_init),
1815        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,           &skl_uncore_init),
1816        X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,           &skx_uncore_init),
1817        X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,          &skl_uncore_init),
1818        X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,            &skl_uncore_init),
1819        X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,         &skl_uncore_init),
1820        X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,           &skl_uncore_init),
1821        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           &icl_uncore_init),
1822        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI,        &icl_uncore_init),
1823        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &icl_uncore_init),
1824        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &icx_uncore_init),
1825        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           &icx_uncore_init),
1826        X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         &tgl_l_uncore_init),
1827        X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           &tgl_uncore_init),
1828        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &rkl_uncore_init),
1829        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_uncore_init),
1830        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_uncore_init),
1831        X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &spr_uncore_init),
1832        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &snr_uncore_init),
1833        {},
1834};
1835MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
1836
1837static int __init intel_uncore_init(void)
1838{
1839        const struct x86_cpu_id *id;
1840        struct intel_uncore_init_fun *uncore_init;
1841        int pret = 0, cret = 0, mret = 0, ret;
1842
1843        if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
1844                return -ENODEV;
1845
1846        __uncore_max_dies =
1847                topology_max_packages() * topology_max_die_per_package();
1848
1849        id = x86_match_cpu(intel_uncore_match);
1850        if (!id) {
1851                if (!uncore_no_discover && intel_uncore_has_discovery_tables())
1852                        uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
1853                else
1854                        return -ENODEV;
1855        } else {
1856                uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
1857                if (uncore_no_discover && uncore_init->use_discovery)
1858                        return -ENODEV;
1859                if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
1860                        return -ENODEV;
1861        }
1862
1863        if (uncore_init->pci_init) {
1864                pret = uncore_init->pci_init();
1865                if (!pret)
1866                        pret = uncore_pci_init();
1867        }
1868
1869        if (uncore_init->cpu_init) {
1870                uncore_init->cpu_init();
1871                cret = uncore_cpu_init();
1872        }
1873
1874        if (uncore_init->mmio_init) {
1875                uncore_init->mmio_init();
1876                mret = uncore_mmio_init();
1877        }
1878
1879        if (cret && pret && mret) {
1880                ret = -ENODEV;
1881                goto free_discovery;
1882        }
1883
1884        /* Install hotplug callbacks to setup the targets for each package */
1885        ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE,
1886                                "perf/x86/intel/uncore:online",
1887                                uncore_event_cpu_online,
1888                                uncore_event_cpu_offline);
1889        if (ret)
1890                goto err;
1891        return 0;
1892
1893err:
1894        uncore_types_exit(uncore_msr_uncores);
1895        uncore_types_exit(uncore_mmio_uncores);
1896        uncore_pci_exit();
1897free_discovery:
1898        intel_uncore_clear_discovery_tables();
1899        return ret;
1900}
1901module_init(intel_uncore_init);
1902
1903static void __exit intel_uncore_exit(void)
1904{
1905        cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE);
1906        uncore_types_exit(uncore_msr_uncores);
1907        uncore_types_exit(uncore_mmio_uncores);
1908        uncore_pci_exit();
1909        intel_uncore_clear_discovery_tables();
1910}
1911module_exit(intel_uncore_exit);
1912