linux/arch/s390/kernel/perf_cpum_cf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Performance event support for s390x - CPU-measurement Counter Facility
   4 *
   5 *  Copyright IBM Corp. 2012, 2019
   6 *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
   7 */
   8#define KMSG_COMPONENT  "cpum_cf"
   9#define pr_fmt(fmt)     KMSG_COMPONENT ": " fmt
  10
  11#include <linux/kernel.h>
  12#include <linux/kernel_stat.h>
  13#include <linux/percpu.h>
  14#include <linux/notifier.h>
  15#include <linux/init.h>
  16#include <linux/export.h>
  17#include <asm/cpu_mcf.h>
  18
  19static enum cpumf_ctr_set get_counter_set(u64 event)
  20{
  21        int set = CPUMF_CTR_SET_MAX;
  22
  23        if (event < 32)
  24                set = CPUMF_CTR_SET_BASIC;
  25        else if (event < 64)
  26                set = CPUMF_CTR_SET_USER;
  27        else if (event < 128)
  28                set = CPUMF_CTR_SET_CRYPTO;
  29        else if (event < 288)
  30                set = CPUMF_CTR_SET_EXT;
  31        else if (event >= 448 && event < 496)
  32                set = CPUMF_CTR_SET_MT_DIAG;
  33
  34        return set;
  35}
  36
  37static int validate_ctr_version(const struct hw_perf_event *hwc)
  38{
  39        struct cpu_cf_events *cpuhw;
  40        int err = 0;
  41        u16 mtdiag_ctl;
  42
  43        cpuhw = &get_cpu_var(cpu_cf_events);
  44
  45        /* check required version for counter sets */
  46        switch (hwc->config_base) {
  47        case CPUMF_CTR_SET_BASIC:
  48        case CPUMF_CTR_SET_USER:
  49                if (cpuhw->info.cfvn < 1)
  50                        err = -EOPNOTSUPP;
  51                break;
  52        case CPUMF_CTR_SET_CRYPTO:
  53                if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 &&
  54                     hwc->config > 79) ||
  55                    (cpuhw->info.csvn >= 6 && hwc->config > 83))
  56                        err = -EOPNOTSUPP;
  57                break;
  58        case CPUMF_CTR_SET_EXT:
  59                if (cpuhw->info.csvn < 1)
  60                        err = -EOPNOTSUPP;
  61                if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
  62                    (cpuhw->info.csvn == 2 && hwc->config > 175) ||
  63                    (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5
  64                     && hwc->config > 255) ||
  65                    (cpuhw->info.csvn >= 6 && hwc->config > 287))
  66                        err = -EOPNOTSUPP;
  67                break;
  68        case CPUMF_CTR_SET_MT_DIAG:
  69                if (cpuhw->info.csvn <= 3)
  70                        err = -EOPNOTSUPP;
  71                /*
  72                 * MT-diagnostic counters are read-only.  The counter set
  73                 * is automatically enabled and activated on all CPUs with
  74                 * multithreading (SMT).  Deactivation of multithreading
  75                 * also disables the counter set.  State changes are ignored
  76                 * by lcctl().  Because Linux controls SMT enablement through
  77                 * a kernel parameter only, the counter set is either disabled
  78                 * or enabled and active.
  79                 *
  80                 * Thus, the counters can only be used if SMT is on and the
  81                 * counter set is enabled and active.
  82                 */
  83                mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG];
  84                if (!((cpuhw->info.auth_ctl & mtdiag_ctl) &&
  85                      (cpuhw->info.enable_ctl & mtdiag_ctl) &&
  86                      (cpuhw->info.act_ctl & mtdiag_ctl)))
  87                        err = -EOPNOTSUPP;
  88                break;
  89        }
  90
  91        put_cpu_var(cpu_cf_events);
  92        return err;
  93}
  94
  95static int validate_ctr_auth(const struct hw_perf_event *hwc)
  96{
  97        struct cpu_cf_events *cpuhw;
  98        u64 ctrs_state;
  99        int err = 0;
 100
 101        cpuhw = &get_cpu_var(cpu_cf_events);
 102
 103        /* Check authorization for cpu counter sets.
 104         * If the particular CPU counter set is not authorized,
 105         * return with -ENOENT in order to fall back to other
 106         * PMUs that might suffice the event request.
 107         */
 108        ctrs_state = cpumf_ctr_ctl[hwc->config_base];
 109        if (!(ctrs_state & cpuhw->info.auth_ctl))
 110                err = -ENOENT;
 111
 112        put_cpu_var(cpu_cf_events);
 113        return err;
 114}
 115
 116/*
 117 * Change the CPUMF state to active.
 118 * Enable and activate the CPU-counter sets according
 119 * to the per-cpu control state.
 120 */
 121static void cpumf_pmu_enable(struct pmu *pmu)
 122{
 123        struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 124        int err;
 125
 126        if (cpuhw->flags & PMU_F_ENABLED)
 127                return;
 128
 129        err = lcctl(cpuhw->state);
 130        if (err) {
 131                pr_err("Enabling the performance measuring unit "
 132                       "failed with rc=%x\n", err);
 133                return;
 134        }
 135
 136        cpuhw->flags |= PMU_F_ENABLED;
 137}
 138
 139/*
 140 * Change the CPUMF state to inactive.
 141 * Disable and enable (inactive) the CPU-counter sets according
 142 * to the per-cpu control state.
 143 */
 144static void cpumf_pmu_disable(struct pmu *pmu)
 145{
 146        struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 147        int err;
 148        u64 inactive;
 149
 150        if (!(cpuhw->flags & PMU_F_ENABLED))
 151                return;
 152
 153        inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
 154        err = lcctl(inactive);
 155        if (err) {
 156                pr_err("Disabling the performance measuring unit "
 157                       "failed with rc=%x\n", err);
 158                return;
 159        }
 160
 161        cpuhw->flags &= ~PMU_F_ENABLED;
 162}
 163
 164
 165/* Number of perf events counting hardware events */
 166static atomic_t num_events = ATOMIC_INIT(0);
 167/* Used to avoid races in calling reserve/release_cpumf_hardware */
 168static DEFINE_MUTEX(pmc_reserve_mutex);
 169
 170/* Release the PMU if event is the last perf event */
 171static void hw_perf_event_destroy(struct perf_event *event)
 172{
 173        if (!atomic_add_unless(&num_events, -1, 1)) {
 174                mutex_lock(&pmc_reserve_mutex);
 175                if (atomic_dec_return(&num_events) == 0)
 176                        __kernel_cpumcf_end();
 177                mutex_unlock(&pmc_reserve_mutex);
 178        }
 179}
 180
 181/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
 182static const int cpumf_generic_events_basic[] = {
 183        [PERF_COUNT_HW_CPU_CYCLES]          = 0,
 184        [PERF_COUNT_HW_INSTRUCTIONS]        = 1,
 185        [PERF_COUNT_HW_CACHE_REFERENCES]    = -1,
 186        [PERF_COUNT_HW_CACHE_MISSES]        = -1,
 187        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
 188        [PERF_COUNT_HW_BRANCH_MISSES]       = -1,
 189        [PERF_COUNT_HW_BUS_CYCLES]          = -1,
 190};
 191/* CPUMF <-> perf event mappings for userspace (problem-state set) */
 192static const int cpumf_generic_events_user[] = {
 193        [PERF_COUNT_HW_CPU_CYCLES]          = 32,
 194        [PERF_COUNT_HW_INSTRUCTIONS]        = 33,
 195        [PERF_COUNT_HW_CACHE_REFERENCES]    = -1,
 196        [PERF_COUNT_HW_CACHE_MISSES]        = -1,
 197        [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
 198        [PERF_COUNT_HW_BRANCH_MISSES]       = -1,
 199        [PERF_COUNT_HW_BUS_CYCLES]          = -1,
 200};
 201
 202static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
 203{
 204        struct perf_event_attr *attr = &event->attr;
 205        struct hw_perf_event *hwc = &event->hw;
 206        enum cpumf_ctr_set set;
 207        int err = 0;
 208        u64 ev;
 209
 210        switch (type) {
 211        case PERF_TYPE_RAW:
 212                /* Raw events are used to access counters directly,
 213                 * hence do not permit excludes */
 214                if (attr->exclude_kernel || attr->exclude_user ||
 215                    attr->exclude_hv)
 216                        return -EOPNOTSUPP;
 217                ev = attr->config;
 218                break;
 219
 220        case PERF_TYPE_HARDWARE:
 221                if (is_sampling_event(event))   /* No sampling support */
 222                        return -ENOENT;
 223                ev = attr->config;
 224                /* Count user space (problem-state) only */
 225                if (!attr->exclude_user && attr->exclude_kernel) {
 226                        if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
 227                                return -EOPNOTSUPP;
 228                        ev = cpumf_generic_events_user[ev];
 229
 230                /* No support for kernel space counters only */
 231                } else if (!attr->exclude_kernel && attr->exclude_user) {
 232                        return -EOPNOTSUPP;
 233
 234                /* Count user and kernel space */
 235                } else {
 236                        if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
 237                                return -EOPNOTSUPP;
 238                        ev = cpumf_generic_events_basic[ev];
 239                }
 240                break;
 241
 242        default:
 243                return -ENOENT;
 244        }
 245
 246        if (ev == -1)
 247                return -ENOENT;
 248
 249        if (ev > PERF_CPUM_CF_MAX_CTR)
 250                return -ENOENT;
 251
 252        /* Obtain the counter set to which the specified counter belongs */
 253        set = get_counter_set(ev);
 254        switch (set) {
 255        case CPUMF_CTR_SET_BASIC:
 256        case CPUMF_CTR_SET_USER:
 257        case CPUMF_CTR_SET_CRYPTO:
 258        case CPUMF_CTR_SET_EXT:
 259        case CPUMF_CTR_SET_MT_DIAG:
 260                /*
 261                 * Use the hardware perf event structure to store the
 262                 * counter number in the 'config' member and the counter
 263                 * set number in the 'config_base'.  The counter set number
 264                 * is then later used to enable/disable the counter(s).
 265                 */
 266                hwc->config = ev;
 267                hwc->config_base = set;
 268                break;
 269        case CPUMF_CTR_SET_MAX:
 270                /* The counter could not be associated to a counter set */
 271                return -EINVAL;
 272        };
 273
 274        /* Initialize for using the CPU-measurement counter facility */
 275        if (!atomic_inc_not_zero(&num_events)) {
 276                mutex_lock(&pmc_reserve_mutex);
 277                if (atomic_read(&num_events) == 0 && __kernel_cpumcf_begin())
 278                        err = -EBUSY;
 279                else
 280                        atomic_inc(&num_events);
 281                mutex_unlock(&pmc_reserve_mutex);
 282        }
 283        if (err)
 284                return err;
 285        event->destroy = hw_perf_event_destroy;
 286
 287        /* Finally, validate version and authorization of the counter set */
 288        err = validate_ctr_auth(hwc);
 289        if (!err)
 290                err = validate_ctr_version(hwc);
 291
 292        return err;
 293}
 294
 295static int cpumf_pmu_event_init(struct perf_event *event)
 296{
 297        unsigned int type = event->attr.type;
 298        int err;
 299
 300        if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
 301                err = __hw_perf_event_init(event, type);
 302        else if (event->pmu->type == type)
 303                /* Registered as unknown PMU */
 304                err = __hw_perf_event_init(event, PERF_TYPE_RAW);
 305        else
 306                return -ENOENT;
 307
 308        if (unlikely(err) && event->destroy)
 309                event->destroy(event);
 310
 311        return err;
 312}
 313
 314static int hw_perf_event_reset(struct perf_event *event)
 315{
 316        u64 prev, new;
 317        int err;
 318
 319        do {
 320                prev = local64_read(&event->hw.prev_count);
 321                err = ecctr(event->hw.config, &new);
 322                if (err) {
 323                        if (err != 3)
 324                                break;
 325                        /* The counter is not (yet) available. This
 326                         * might happen if the counter set to which
 327                         * this counter belongs is in the disabled
 328                         * state.
 329                         */
 330                        new = 0;
 331                }
 332        } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
 333
 334        return err;
 335}
 336
 337static void hw_perf_event_update(struct perf_event *event)
 338{
 339        u64 prev, new, delta;
 340        int err;
 341
 342        do {
 343                prev = local64_read(&event->hw.prev_count);
 344                err = ecctr(event->hw.config, &new);
 345                if (err)
 346                        return;
 347        } while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
 348
 349        delta = (prev <= new) ? new - prev
 350                              : (-1ULL - prev) + new + 1;        /* overflow */
 351        local64_add(delta, &event->count);
 352}
 353
 354static void cpumf_pmu_read(struct perf_event *event)
 355{
 356        if (event->hw.state & PERF_HES_STOPPED)
 357                return;
 358
 359        hw_perf_event_update(event);
 360}
 361
 362static void cpumf_pmu_start(struct perf_event *event, int flags)
 363{
 364        struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 365        struct hw_perf_event *hwc = &event->hw;
 366
 367        if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
 368                return;
 369
 370        if (WARN_ON_ONCE(hwc->config == -1))
 371                return;
 372
 373        if (flags & PERF_EF_RELOAD)
 374                WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 375
 376        hwc->state = 0;
 377
 378        /* (Re-)enable and activate the counter set */
 379        ctr_set_enable(&cpuhw->state, hwc->config_base);
 380        ctr_set_start(&cpuhw->state, hwc->config_base);
 381
 382        /* The counter set to which this counter belongs can be already active.
 383         * Because all counters in a set are active, the event->hw.prev_count
 384         * needs to be synchronized.  At this point, the counter set can be in
 385         * the inactive or disabled state.
 386         */
 387        hw_perf_event_reset(event);
 388
 389        /* increment refcount for this counter set */
 390        atomic_inc(&cpuhw->ctr_set[hwc->config_base]);
 391}
 392
 393static void cpumf_pmu_stop(struct perf_event *event, int flags)
 394{
 395        struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 396        struct hw_perf_event *hwc = &event->hw;
 397
 398        if (!(hwc->state & PERF_HES_STOPPED)) {
 399                /* Decrement reference count for this counter set and if this
 400                 * is the last used counter in the set, clear activation
 401                 * control and set the counter set state to inactive.
 402                 */
 403                if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base]))
 404                        ctr_set_stop(&cpuhw->state, hwc->config_base);
 405                event->hw.state |= PERF_HES_STOPPED;
 406        }
 407
 408        if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
 409                hw_perf_event_update(event);
 410                event->hw.state |= PERF_HES_UPTODATE;
 411        }
 412}
 413
 414static int cpumf_pmu_add(struct perf_event *event, int flags)
 415{
 416        struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 417
 418        /* Check authorization for the counter set to which this
 419         * counter belongs.
 420         * For group events transaction, the authorization check is
 421         * done in cpumf_pmu_commit_txn().
 422         */
 423        if (!(cpuhw->txn_flags & PERF_PMU_TXN_ADD))
 424                if (validate_ctr_auth(&event->hw))
 425                        return -ENOENT;
 426
 427        ctr_set_enable(&cpuhw->state, event->hw.config_base);
 428        event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 429
 430        if (flags & PERF_EF_START)
 431                cpumf_pmu_start(event, PERF_EF_RELOAD);
 432
 433        perf_event_update_userpage(event);
 434
 435        return 0;
 436}
 437
 438static void cpumf_pmu_del(struct perf_event *event, int flags)
 439{
 440        struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 441
 442        cpumf_pmu_stop(event, PERF_EF_UPDATE);
 443
 444        /* Check if any counter in the counter set is still used.  If not used,
 445         * change the counter set to the disabled state.  This also clears the
 446         * content of all counters in the set.
 447         *
 448         * When a new perf event has been added but not yet started, this can
 449         * clear enable control and resets all counters in a set.  Therefore,
 450         * cpumf_pmu_start() always has to reenable a counter set.
 451         */
 452        if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base]))
 453                ctr_set_disable(&cpuhw->state, event->hw.config_base);
 454
 455        perf_event_update_userpage(event);
 456}
 457
 458/*
 459 * Start group events scheduling transaction.
 460 * Set flags to perform a single test at commit time.
 461 *
 462 * We only support PERF_PMU_TXN_ADD transactions. Save the
 463 * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
 464 * transactions.
 465 */
 466static void cpumf_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
 467{
 468        struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 469
 470        WARN_ON_ONCE(cpuhw->txn_flags);         /* txn already in flight */
 471
 472        cpuhw->txn_flags = txn_flags;
 473        if (txn_flags & ~PERF_PMU_TXN_ADD)
 474                return;
 475
 476        perf_pmu_disable(pmu);
 477        cpuhw->tx_state = cpuhw->state;
 478}
 479
 480/*
 481 * Stop and cancel a group events scheduling tranctions.
 482 * Assumes cpumf_pmu_del() is called for each successful added
 483 * cpumf_pmu_add() during the transaction.
 484 */
 485static void cpumf_pmu_cancel_txn(struct pmu *pmu)
 486{
 487        unsigned int txn_flags;
 488        struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 489
 490        WARN_ON_ONCE(!cpuhw->txn_flags);        /* no txn in flight */
 491
 492        txn_flags = cpuhw->txn_flags;
 493        cpuhw->txn_flags = 0;
 494        if (txn_flags & ~PERF_PMU_TXN_ADD)
 495                return;
 496
 497        WARN_ON(cpuhw->tx_state != cpuhw->state);
 498
 499        perf_pmu_enable(pmu);
 500}
 501
 502/*
 503 * Commit the group events scheduling transaction.  On success, the
 504 * transaction is closed.   On error, the transaction is kept open
 505 * until cpumf_pmu_cancel_txn() is called.
 506 */
 507static int cpumf_pmu_commit_txn(struct pmu *pmu)
 508{
 509        struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
 510        u64 state;
 511
 512        WARN_ON_ONCE(!cpuhw->txn_flags);        /* no txn in flight */
 513
 514        if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
 515                cpuhw->txn_flags = 0;
 516                return 0;
 517        }
 518
 519        /* check if the updated state can be scheduled */
 520        state = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
 521        state >>= CPUMF_LCCTL_ENABLE_SHIFT;
 522        if ((state & cpuhw->info.auth_ctl) != state)
 523                return -ENOENT;
 524
 525        cpuhw->txn_flags = 0;
 526        perf_pmu_enable(pmu);
 527        return 0;
 528}
 529
 530/* Performance monitoring unit for s390x */
 531static struct pmu cpumf_pmu = {
 532        .task_ctx_nr  = perf_sw_context,
 533        .capabilities = PERF_PMU_CAP_NO_INTERRUPT,
 534        .pmu_enable   = cpumf_pmu_enable,
 535        .pmu_disable  = cpumf_pmu_disable,
 536        .event_init   = cpumf_pmu_event_init,
 537        .add          = cpumf_pmu_add,
 538        .del          = cpumf_pmu_del,
 539        .start        = cpumf_pmu_start,
 540        .stop         = cpumf_pmu_stop,
 541        .read         = cpumf_pmu_read,
 542        .start_txn    = cpumf_pmu_start_txn,
 543        .commit_txn   = cpumf_pmu_commit_txn,
 544        .cancel_txn   = cpumf_pmu_cancel_txn,
 545};
 546
 547static int __init cpumf_pmu_init(void)
 548{
 549        int rc;
 550
 551        if (!kernel_cpumcf_avail())
 552                return -ENODEV;
 553
 554        cpumf_pmu.attr_groups = cpumf_cf_event_group();
 555        rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
 556        if (rc)
 557                pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
 558        return rc;
 559}
 560subsys_initcall(cpumf_pmu_init);
 561