linux/tools/perf/arch/x86/util/intel-pt.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * intel_pt.c: Intel Processor Trace support
   4 * Copyright (c) 2013-2015, Intel Corporation.
   5 */
   6
   7#include <errno.h>
   8#include <stdbool.h>
   9#include <linux/kernel.h>
  10#include <linux/types.h>
  11#include <linux/bitops.h>
  12#include <linux/log2.h>
  13#include <linux/zalloc.h>
  14#include <cpuid.h>
  15
  16#include "../../util/session.h"
  17#include "../../util/event.h"
  18#include "../../util/evlist.h"
  19#include "../../util/evsel.h"
  20#include "../../util/cpumap.h"
  21#include "../../util/mmap.h"
  22#include <subcmd/parse-options.h>
  23#include "../../util/parse-events.h"
  24#include "../../util/pmu.h"
  25#include "../../util/debug.h"
  26#include "../../util/auxtrace.h"
  27#include "../../util/record.h"
  28#include "../../util/target.h"
  29#include "../../util/tsc.h"
  30#include <internal/lib.h> // page_size
  31#include "../../util/intel-pt.h"
  32
  33#define KiB(x) ((x) * 1024)
  34#define MiB(x) ((x) * 1024 * 1024)
  35#define KiB_MASK(x) (KiB(x) - 1)
  36#define MiB_MASK(x) (MiB(x) - 1)
  37
  38#define INTEL_PT_PSB_PERIOD_NEAR        256
  39
  40struct intel_pt_snapshot_ref {
  41        void *ref_buf;
  42        size_t ref_offset;
  43        bool wrapped;
  44};
  45
  46struct intel_pt_recording {
  47        struct auxtrace_record          itr;
  48        struct perf_pmu                 *intel_pt_pmu;
  49        int                             have_sched_switch;
  50        struct evlist           *evlist;
  51        bool                            snapshot_mode;
  52        bool                            snapshot_init_done;
  53        size_t                          snapshot_size;
  54        size_t                          snapshot_ref_buf_size;
  55        int                             snapshot_ref_cnt;
  56        struct intel_pt_snapshot_ref    *snapshot_refs;
  57        size_t                          priv_size;
  58};
  59
  60static int intel_pt_parse_terms_with_default(struct list_head *formats,
  61                                             const char *str,
  62                                             u64 *config)
  63{
  64        struct list_head *terms;
  65        struct perf_event_attr attr = { .size = 0, };
  66        int err;
  67
  68        terms = malloc(sizeof(struct list_head));
  69        if (!terms)
  70                return -ENOMEM;
  71
  72        INIT_LIST_HEAD(terms);
  73
  74        err = parse_events_terms(terms, str);
  75        if (err)
  76                goto out_free;
  77
  78        attr.config = *config;
  79        err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
  80        if (err)
  81                goto out_free;
  82
  83        *config = attr.config;
  84out_free:
  85        parse_events_terms__delete(terms);
  86        return err;
  87}
  88
  89static int intel_pt_parse_terms(struct list_head *formats, const char *str,
  90                                u64 *config)
  91{
  92        *config = 0;
  93        return intel_pt_parse_terms_with_default(formats, str, config);
  94}
  95
  96static u64 intel_pt_masked_bits(u64 mask, u64 bits)
  97{
  98        const u64 top_bit = 1ULL << 63;
  99        u64 res = 0;
 100        int i;
 101
 102        for (i = 0; i < 64; i++) {
 103                if (mask & top_bit) {
 104                        res <<= 1;
 105                        if (bits & top_bit)
 106                                res |= 1;
 107                }
 108                mask <<= 1;
 109                bits <<= 1;
 110        }
 111
 112        return res;
 113}
 114
 115static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
 116                                struct evlist *evlist, u64 *res)
 117{
 118        struct evsel *evsel;
 119        u64 mask;
 120
 121        *res = 0;
 122
 123        mask = perf_pmu__format_bits(&intel_pt_pmu->format, str);
 124        if (!mask)
 125                return -EINVAL;
 126
 127        evlist__for_each_entry(evlist, evsel) {
 128                if (evsel->core.attr.type == intel_pt_pmu->type) {
 129                        *res = intel_pt_masked_bits(mask, evsel->core.attr.config);
 130                        return 0;
 131                }
 132        }
 133
 134        return -EINVAL;
 135}
 136
 137static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu,
 138                                  struct evlist *evlist)
 139{
 140        u64 val;
 141        int err, topa_multiple_entries;
 142        size_t psb_period;
 143
 144        if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries",
 145                                "%d", &topa_multiple_entries) != 1)
 146                topa_multiple_entries = 0;
 147
 148        /*
 149         * Use caps/topa_multiple_entries to indicate early hardware that had
 150         * extra frequent PSBs.
 151         */
 152        if (!topa_multiple_entries) {
 153                psb_period = 256;
 154                goto out;
 155        }
 156
 157        err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val);
 158        if (err)
 159                val = 0;
 160
 161        psb_period = 1 << (val + 11);
 162out:
 163        pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period);
 164        return psb_period;
 165}
 166
 167static int intel_pt_pick_bit(int bits, int target)
 168{
 169        int pos, pick = -1;
 170
 171        for (pos = 0; bits; bits >>= 1, pos++) {
 172                if (bits & 1) {
 173                        if (pos <= target || pick < 0)
 174                                pick = pos;
 175                        if (pos >= target)
 176                                break;
 177                }
 178        }
 179
 180        return pick;
 181}
 182
 183static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
 184{
 185        char buf[256];
 186        int mtc, mtc_periods = 0, mtc_period;
 187        int psb_cyc, psb_periods, psb_period;
 188        int pos = 0;
 189        u64 config;
 190        char c;
 191
 192        pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
 193
 194        if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d",
 195                                &mtc) != 1)
 196                mtc = 1;
 197
 198        if (mtc) {
 199                if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x",
 200                                        &mtc_periods) != 1)
 201                        mtc_periods = 0;
 202                if (mtc_periods) {
 203                        mtc_period = intel_pt_pick_bit(mtc_periods, 3);
 204                        pos += scnprintf(buf + pos, sizeof(buf) - pos,
 205                                         ",mtc,mtc_period=%d", mtc_period);
 206                }
 207        }
 208
 209        if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d",
 210                                &psb_cyc) != 1)
 211                psb_cyc = 1;
 212
 213        if (psb_cyc && mtc_periods) {
 214                if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x",
 215                                        &psb_periods) != 1)
 216                        psb_periods = 0;
 217                if (psb_periods) {
 218                        psb_period = intel_pt_pick_bit(psb_periods, 3);
 219                        pos += scnprintf(buf + pos, sizeof(buf) - pos,
 220                                         ",psb_period=%d", psb_period);
 221                }
 222        }
 223
 224        if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
 225            perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
 226                pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
 227
 228        pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
 229
 230        intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
 231
 232        return config;
 233}
 234
 235static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
 236                                           struct record_opts *opts,
 237                                           const char *str)
 238{
 239        struct intel_pt_recording *ptr =
 240                        container_of(itr, struct intel_pt_recording, itr);
 241        unsigned long long snapshot_size = 0;
 242        char *endptr;
 243
 244        if (str) {
 245                snapshot_size = strtoull(str, &endptr, 0);
 246                if (*endptr || snapshot_size > SIZE_MAX)
 247                        return -1;
 248        }
 249
 250        opts->auxtrace_snapshot_mode = true;
 251        opts->auxtrace_snapshot_size = snapshot_size;
 252
 253        ptr->snapshot_size = snapshot_size;
 254
 255        return 0;
 256}
 257
 258struct perf_event_attr *
 259intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
 260{
 261        struct perf_event_attr *attr;
 262
 263        attr = zalloc(sizeof(struct perf_event_attr));
 264        if (!attr)
 265                return NULL;
 266
 267        attr->config = intel_pt_default_config(intel_pt_pmu);
 268
 269        intel_pt_pmu->selectable = true;
 270
 271        return attr;
 272}
 273
 274static const char *intel_pt_find_filter(struct evlist *evlist,
 275                                        struct perf_pmu *intel_pt_pmu)
 276{
 277        struct evsel *evsel;
 278
 279        evlist__for_each_entry(evlist, evsel) {
 280                if (evsel->core.attr.type == intel_pt_pmu->type)
 281                        return evsel->filter;
 282        }
 283
 284        return NULL;
 285}
 286
 287static size_t intel_pt_filter_bytes(const char *filter)
 288{
 289        size_t len = filter ? strlen(filter) : 0;
 290
 291        return len ? roundup(len + 1, 8) : 0;
 292}
 293
 294static size_t
 295intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist)
 296{
 297        struct intel_pt_recording *ptr =
 298                        container_of(itr, struct intel_pt_recording, itr);
 299        const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu);
 300
 301        ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
 302                         intel_pt_filter_bytes(filter);
 303
 304        return ptr->priv_size;
 305}
 306
 307static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
 308{
 309        unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
 310
 311        __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
 312        *n = ebx;
 313        *d = eax;
 314}
 315
 316static int intel_pt_info_fill(struct auxtrace_record *itr,
 317                              struct perf_session *session,
 318                              struct perf_record_auxtrace_info *auxtrace_info,
 319                              size_t priv_size)
 320{
 321        struct intel_pt_recording *ptr =
 322                        container_of(itr, struct intel_pt_recording, itr);
 323        struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
 324        struct perf_event_mmap_page *pc;
 325        struct perf_tsc_conversion tc = { .time_mult = 0, };
 326        bool cap_user_time_zero = false, per_cpu_mmaps;
 327        u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
 328        u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
 329        unsigned long max_non_turbo_ratio;
 330        size_t filter_str_len;
 331        const char *filter;
 332        __u64 *info;
 333        int err;
 334
 335        if (priv_size != ptr->priv_size)
 336                return -EINVAL;
 337
 338        intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
 339        intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
 340                             &noretcomp_bit);
 341        intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit);
 342        mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
 343                                              "mtc_period");
 344        intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit);
 345
 346        intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
 347
 348        if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
 349                                "%lu", &max_non_turbo_ratio) != 1)
 350                max_non_turbo_ratio = 0;
 351
 352        filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
 353        filter_str_len = filter ? strlen(filter) : 0;
 354
 355        if (!session->evlist->core.nr_mmaps)
 356                return -EINVAL;
 357
 358        pc = session->evlist->mmap[0].core.base;
 359        if (pc) {
 360                err = perf_read_tsc_conversion(pc, &tc);
 361                if (err) {
 362                        if (err != -EOPNOTSUPP)
 363                                return err;
 364                } else {
 365                        cap_user_time_zero = tc.time_mult != 0;
 366                }
 367                if (!cap_user_time_zero)
 368                        ui__warning("Intel Processor Trace: TSC not available\n");
 369        }
 370
 371        per_cpu_mmaps = !perf_cpu_map__empty(session->evlist->core.cpus);
 372
 373        auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
 374        auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
 375        auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
 376        auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult;
 377        auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero;
 378        auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero;
 379        auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit;
 380        auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit;
 381        auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
 382        auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
 383        auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
 384        auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit;
 385        auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits;
 386        auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
 387        auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
 388        auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
 389        auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio;
 390        auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len;
 391
 392        info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
 393
 394        if (filter_str_len) {
 395                size_t len = intel_pt_filter_bytes(filter);
 396
 397                strncpy((char *)info, filter, len);
 398                info += len >> 3;
 399        }
 400
 401        return 0;
 402}
 403
 404static int intel_pt_track_switches(struct evlist *evlist)
 405{
 406        const char *sched_switch = "sched:sched_switch";
 407        struct evsel *evsel;
 408        int err;
 409
 410        if (!perf_evlist__can_select_event(evlist, sched_switch))
 411                return -EPERM;
 412
 413        err = parse_events(evlist, sched_switch, NULL);
 414        if (err) {
 415                pr_debug2("%s: failed to parse %s, error %d\n",
 416                          __func__, sched_switch, err);
 417                return err;
 418        }
 419
 420        evsel = evlist__last(evlist);
 421
 422        perf_evsel__set_sample_bit(evsel, CPU);
 423        perf_evsel__set_sample_bit(evsel, TIME);
 424
 425        evsel->core.system_wide = true;
 426        evsel->no_aux_samples = true;
 427        evsel->immediate = true;
 428
 429        return 0;
 430}
 431
 432static void intel_pt_valid_str(char *str, size_t len, u64 valid)
 433{
 434        unsigned int val, last = 0, state = 1;
 435        int p = 0;
 436
 437        str[0] = '\0';
 438
 439        for (val = 0; val <= 64; val++, valid >>= 1) {
 440                if (valid & 1) {
 441                        last = val;
 442                        switch (state) {
 443                        case 0:
 444                                p += scnprintf(str + p, len - p, ",");
 445                                /* Fall through */
 446                        case 1:
 447                                p += scnprintf(str + p, len - p, "%u", val);
 448                                state = 2;
 449                                break;
 450                        case 2:
 451                                state = 3;
 452                                break;
 453                        case 3:
 454                                state = 4;
 455                                break;
 456                        default:
 457                                break;
 458                        }
 459                } else {
 460                        switch (state) {
 461                        case 3:
 462                                p += scnprintf(str + p, len - p, ",%u", last);
 463                                state = 0;
 464                                break;
 465                        case 4:
 466                                p += scnprintf(str + p, len - p, "-%u", last);
 467                                state = 0;
 468                                break;
 469                        default:
 470                                break;
 471                        }
 472                        if (state != 1)
 473                                state = 0;
 474                }
 475        }
 476}
 477
 478static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu,
 479                                    const char *caps, const char *name,
 480                                    const char *supported, u64 config)
 481{
 482        char valid_str[256];
 483        unsigned int shift;
 484        unsigned long long valid;
 485        u64 bits;
 486        int ok;
 487
 488        if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1)
 489                valid = 0;
 490
 491        if (supported &&
 492            perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok)
 493                valid = 0;
 494
 495        valid |= 1;
 496
 497        bits = perf_pmu__format_bits(&intel_pt_pmu->format, name);
 498
 499        config &= bits;
 500
 501        for (shift = 0; bits && !(bits & 1); shift++)
 502                bits >>= 1;
 503
 504        config >>= shift;
 505
 506        if (config > 63)
 507                goto out_err;
 508
 509        if (valid & (1 << config))
 510                return 0;
 511out_err:
 512        intel_pt_valid_str(valid_str, sizeof(valid_str), valid);
 513        pr_err("Invalid %s for %s. Valid values are: %s\n",
 514               name, INTEL_PT_PMU_NAME, valid_str);
 515        return -EINVAL;
 516}
 517
 518static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
 519                                    struct evsel *evsel)
 520{
 521        int err;
 522        char c;
 523
 524        if (!evsel)
 525                return 0;
 526
 527        /*
 528         * If supported, force pass-through config term (pt=1) even if user
 529         * sets pt=0, which avoids senseless kernel errors.
 530         */
 531        if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
 532            !(evsel->core.attr.config & 1)) {
 533                pr_warning("pt=0 doesn't make sense, forcing pt=1\n");
 534                evsel->core.attr.config |= 1;
 535        }
 536
 537        err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
 538                                       "cyc_thresh", "caps/psb_cyc",
 539                                       evsel->core.attr.config);
 540        if (err)
 541                return err;
 542
 543        err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods",
 544                                       "mtc_period", "caps/mtc",
 545                                       evsel->core.attr.config);
 546        if (err)
 547                return err;
 548
 549        return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods",
 550                                        "psb_period", "caps/psb_cyc",
 551                                        evsel->core.attr.config);
 552}
 553
 554/*
 555 * Currently, there is not enough information to disambiguate different PEBS
 556 * events, so only allow one.
 557 */
 558static bool intel_pt_too_many_aux_output(struct evlist *evlist)
 559{
 560        struct evsel *evsel;
 561        int aux_output_cnt = 0;
 562
 563        evlist__for_each_entry(evlist, evsel)
 564                aux_output_cnt += !!evsel->core.attr.aux_output;
 565
 566        if (aux_output_cnt > 1) {
 567                pr_err(INTEL_PT_PMU_NAME " supports at most one event with aux-output\n");
 568                return true;
 569        }
 570
 571        return false;
 572}
 573
 574static int intel_pt_recording_options(struct auxtrace_record *itr,
 575                                      struct evlist *evlist,
 576                                      struct record_opts *opts)
 577{
 578        struct intel_pt_recording *ptr =
 579                        container_of(itr, struct intel_pt_recording, itr);
 580        struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
 581        bool have_timing_info, need_immediate = false;
 582        struct evsel *evsel, *intel_pt_evsel = NULL;
 583        const struct perf_cpu_map *cpus = evlist->core.cpus;
 584        bool privileged = perf_event_paranoid_check(-1);
 585        u64 tsc_bit;
 586        int err;
 587
 588        ptr->evlist = evlist;
 589        ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
 590
 591        evlist__for_each_entry(evlist, evsel) {
 592                if (evsel->core.attr.type == intel_pt_pmu->type) {
 593                        if (intel_pt_evsel) {
 594                                pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
 595                                return -EINVAL;
 596                        }
 597                        evsel->core.attr.freq = 0;
 598                        evsel->core.attr.sample_period = 1;
 599                        intel_pt_evsel = evsel;
 600                        opts->full_auxtrace = true;
 601                }
 602        }
 603
 604        if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
 605                pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n");
 606                return -EINVAL;
 607        }
 608
 609        if (opts->use_clockid) {
 610                pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
 611                return -EINVAL;
 612        }
 613
 614        if (intel_pt_too_many_aux_output(evlist))
 615                return -EINVAL;
 616
 617        if (!opts->full_auxtrace)
 618                return 0;
 619
 620        err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
 621        if (err)
 622                return err;
 623
 624        /* Set default sizes for snapshot mode */
 625        if (opts->auxtrace_snapshot_mode) {
 626                size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
 627
 628                if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
 629                        if (privileged) {
 630                                opts->auxtrace_mmap_pages = MiB(4) / page_size;
 631                        } else {
 632                                opts->auxtrace_mmap_pages = KiB(128) / page_size;
 633                                if (opts->mmap_pages == UINT_MAX)
 634                                        opts->mmap_pages = KiB(256) / page_size;
 635                        }
 636                } else if (!opts->auxtrace_mmap_pages && !privileged &&
 637                           opts->mmap_pages == UINT_MAX) {
 638                        opts->mmap_pages = KiB(256) / page_size;
 639                }
 640                if (!opts->auxtrace_snapshot_size)
 641                        opts->auxtrace_snapshot_size =
 642                                opts->auxtrace_mmap_pages * (size_t)page_size;
 643                if (!opts->auxtrace_mmap_pages) {
 644                        size_t sz = opts->auxtrace_snapshot_size;
 645
 646                        sz = round_up(sz, page_size) / page_size;
 647                        opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
 648                }
 649                if (opts->auxtrace_snapshot_size >
 650                                opts->auxtrace_mmap_pages * (size_t)page_size) {
 651                        pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
 652                               opts->auxtrace_snapshot_size,
 653                               opts->auxtrace_mmap_pages * (size_t)page_size);
 654                        return -EINVAL;
 655                }
 656                if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
 657                        pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
 658                        return -EINVAL;
 659                }
 660                pr_debug2("Intel PT snapshot size: %zu\n",
 661                          opts->auxtrace_snapshot_size);
 662                if (psb_period &&
 663                    opts->auxtrace_snapshot_size <= psb_period +
 664                                                  INTEL_PT_PSB_PERIOD_NEAR)
 665                        ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n",
 666                                    opts->auxtrace_snapshot_size, psb_period);
 667        }
 668
 669        /* Set default sizes for full trace mode */
 670        if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
 671                if (privileged) {
 672                        opts->auxtrace_mmap_pages = MiB(4) / page_size;
 673                } else {
 674                        opts->auxtrace_mmap_pages = KiB(128) / page_size;
 675                        if (opts->mmap_pages == UINT_MAX)
 676                                opts->mmap_pages = KiB(256) / page_size;
 677                }
 678        }
 679
 680        /* Validate auxtrace_mmap_pages */
 681        if (opts->auxtrace_mmap_pages) {
 682                size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
 683                size_t min_sz;
 684
 685                if (opts->auxtrace_snapshot_mode)
 686                        min_sz = KiB(4);
 687                else
 688                        min_sz = KiB(8);
 689
 690                if (sz < min_sz || !is_power_of_2(sz)) {
 691                        pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n",
 692                               min_sz / 1024);
 693                        return -EINVAL;
 694                }
 695        }
 696
 697        intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
 698
 699        if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit))
 700                have_timing_info = true;
 701        else
 702                have_timing_info = false;
 703
 704        /*
 705         * Per-cpu recording needs sched_switch events to distinguish different
 706         * threads.
 707         */
 708        if (have_timing_info && !perf_cpu_map__empty(cpus)) {
 709                if (perf_can_record_switch_events()) {
 710                        bool cpu_wide = !target__none(&opts->target) &&
 711                                        !target__has_task(&opts->target);
 712
 713                        if (!cpu_wide && perf_can_record_cpu_wide()) {
 714                                struct evsel *switch_evsel;
 715
 716                                err = parse_events(evlist, "dummy:u", NULL);
 717                                if (err)
 718                                        return err;
 719
 720                                switch_evsel = evlist__last(evlist);
 721
 722                                switch_evsel->core.attr.freq = 0;
 723                                switch_evsel->core.attr.sample_period = 1;
 724                                switch_evsel->core.attr.context_switch = 1;
 725
 726                                switch_evsel->core.system_wide = true;
 727                                switch_evsel->no_aux_samples = true;
 728                                switch_evsel->immediate = true;
 729
 730                                perf_evsel__set_sample_bit(switch_evsel, TID);
 731                                perf_evsel__set_sample_bit(switch_evsel, TIME);
 732                                perf_evsel__set_sample_bit(switch_evsel, CPU);
 733                                perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
 734
 735                                opts->record_switch_events = false;
 736                                ptr->have_sched_switch = 3;
 737                        } else {
 738                                opts->record_switch_events = true;
 739                                need_immediate = true;
 740                                if (cpu_wide)
 741                                        ptr->have_sched_switch = 3;
 742                                else
 743                                        ptr->have_sched_switch = 2;
 744                        }
 745                } else {
 746                        err = intel_pt_track_switches(evlist);
 747                        if (err == -EPERM)
 748                                pr_debug2("Unable to select sched:sched_switch\n");
 749                        else if (err)
 750                                return err;
 751                        else
 752                                ptr->have_sched_switch = 1;
 753                }
 754        }
 755
 756        if (intel_pt_evsel) {
 757                /*
 758                 * To obtain the auxtrace buffer file descriptor, the auxtrace
 759                 * event must come first.
 760                 */
 761                perf_evlist__to_front(evlist, intel_pt_evsel);
 762                /*
 763                 * In the case of per-cpu mmaps, we need the CPU on the
 764                 * AUX event.
 765                 */
 766                if (!perf_cpu_map__empty(cpus))
 767                        perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
 768        }
 769
 770        /* Add dummy event to keep tracking */
 771        if (opts->full_auxtrace) {
 772                struct evsel *tracking_evsel;
 773
 774                err = parse_events(evlist, "dummy:u", NULL);
 775                if (err)
 776                        return err;
 777
 778                tracking_evsel = evlist__last(evlist);
 779
 780                perf_evlist__set_tracking_event(evlist, tracking_evsel);
 781
 782                tracking_evsel->core.attr.freq = 0;
 783                tracking_evsel->core.attr.sample_period = 1;
 784
 785                tracking_evsel->no_aux_samples = true;
 786                if (need_immediate)
 787                        tracking_evsel->immediate = true;
 788
 789                /* In per-cpu case, always need the time of mmap events etc */
 790                if (!perf_cpu_map__empty(cpus)) {
 791                        perf_evsel__set_sample_bit(tracking_evsel, TIME);
 792                        /* And the CPU for switch events */
 793                        perf_evsel__set_sample_bit(tracking_evsel, CPU);
 794                }
 795                perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
 796        }
 797
 798        /*
 799         * Warn the user when we do not have enough information to decode i.e.
 800         * per-cpu with no sched_switch (except workload-only).
 801         */
 802        if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) &&
 803            !target__none(&opts->target))
 804                ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
 805
 806        return 0;
 807}
 808
 809static int intel_pt_snapshot_start(struct auxtrace_record *itr)
 810{
 811        struct intel_pt_recording *ptr =
 812                        container_of(itr, struct intel_pt_recording, itr);
 813        struct evsel *evsel;
 814
 815        evlist__for_each_entry(ptr->evlist, evsel) {
 816                if (evsel->core.attr.type == ptr->intel_pt_pmu->type)
 817                        return evsel__disable(evsel);
 818        }
 819        return -EINVAL;
 820}
 821
 822static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
 823{
 824        struct intel_pt_recording *ptr =
 825                        container_of(itr, struct intel_pt_recording, itr);
 826        struct evsel *evsel;
 827
 828        evlist__for_each_entry(ptr->evlist, evsel) {
 829                if (evsel->core.attr.type == ptr->intel_pt_pmu->type)
 830                        return evsel__enable(evsel);
 831        }
 832        return -EINVAL;
 833}
 834
 835static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx)
 836{
 837        const size_t sz = sizeof(struct intel_pt_snapshot_ref);
 838        int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2;
 839        struct intel_pt_snapshot_ref *refs;
 840
 841        if (!new_cnt)
 842                new_cnt = 16;
 843
 844        while (new_cnt <= idx)
 845                new_cnt *= 2;
 846
 847        refs = calloc(new_cnt, sz);
 848        if (!refs)
 849                return -ENOMEM;
 850
 851        memcpy(refs, ptr->snapshot_refs, cnt * sz);
 852
 853        ptr->snapshot_refs = refs;
 854        ptr->snapshot_ref_cnt = new_cnt;
 855
 856        return 0;
 857}
 858
 859static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr)
 860{
 861        int i;
 862
 863        for (i = 0; i < ptr->snapshot_ref_cnt; i++)
 864                zfree(&ptr->snapshot_refs[i].ref_buf);
 865        zfree(&ptr->snapshot_refs);
 866}
 867
 868static void intel_pt_recording_free(struct auxtrace_record *itr)
 869{
 870        struct intel_pt_recording *ptr =
 871                        container_of(itr, struct intel_pt_recording, itr);
 872
 873        intel_pt_free_snapshot_refs(ptr);
 874        free(ptr);
 875}
 876
 877static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx,
 878                                       size_t snapshot_buf_size)
 879{
 880        size_t ref_buf_size = ptr->snapshot_ref_buf_size;
 881        void *ref_buf;
 882
 883        ref_buf = zalloc(ref_buf_size);
 884        if (!ref_buf)
 885                return -ENOMEM;
 886
 887        ptr->snapshot_refs[idx].ref_buf = ref_buf;
 888        ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size;
 889
 890        return 0;
 891}
 892
 893static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr,
 894                                             size_t snapshot_buf_size)
 895{
 896        const size_t max_size = 256 * 1024;
 897        size_t buf_size = 0, psb_period;
 898
 899        if (ptr->snapshot_size <= 64 * 1024)
 900                return 0;
 901
 902        psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist);
 903        if (psb_period)
 904                buf_size = psb_period * 2;
 905
 906        if (!buf_size || buf_size > max_size)
 907                buf_size = max_size;
 908
 909        if (buf_size >= snapshot_buf_size)
 910                return 0;
 911
 912        if (buf_size >= ptr->snapshot_size / 2)
 913                return 0;
 914
 915        return buf_size;
 916}
 917
 918static int intel_pt_snapshot_init(struct intel_pt_recording *ptr,
 919                                  size_t snapshot_buf_size)
 920{
 921        if (ptr->snapshot_init_done)
 922                return 0;
 923
 924        ptr->snapshot_init_done = true;
 925
 926        ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr,
 927                                                        snapshot_buf_size);
 928
 929        return 0;
 930}
 931
 932/**
 933 * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer.
 934 * @buf1: first buffer
 935 * @compare_size: number of bytes to compare
 936 * @buf2: second buffer (a circular buffer)
 937 * @offs2: offset in second buffer
 938 * @buf2_size: size of second buffer
 939 *
 940 * The comparison allows for the possibility that the bytes to compare in the
 941 * circular buffer are not contiguous.  It is assumed that @compare_size <=
 942 * @buf2_size.  This function returns %false if the bytes are identical, %true
 943 * otherwise.
 944 */
 945static bool intel_pt_compare_buffers(void *buf1, size_t compare_size,
 946                                     void *buf2, size_t offs2, size_t buf2_size)
 947{
 948        size_t end2 = offs2 + compare_size, part_size;
 949
 950        if (end2 <= buf2_size)
 951                return memcmp(buf1, buf2 + offs2, compare_size);
 952
 953        part_size = end2 - buf2_size;
 954        if (memcmp(buf1, buf2 + offs2, part_size))
 955                return true;
 956
 957        compare_size -= part_size;
 958
 959        return memcmp(buf1 + part_size, buf2, compare_size);
 960}
 961
 962static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset,
 963                                 size_t ref_size, size_t buf_size,
 964                                 void *data, size_t head)
 965{
 966        size_t ref_end = ref_offset + ref_size;
 967
 968        if (ref_end > buf_size) {
 969                if (head > ref_offset || head < ref_end - buf_size)
 970                        return true;
 971        } else if (head > ref_offset && head < ref_end) {
 972                return true;
 973        }
 974
 975        return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset,
 976                                        buf_size);
 977}
 978
 979static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size,
 980                              void *data, size_t head)
 981{
 982        if (head >= ref_size) {
 983                memcpy(ref_buf, data + head - ref_size, ref_size);
 984        } else {
 985                memcpy(ref_buf, data, head);
 986                ref_size -= head;
 987                memcpy(ref_buf + head, data + buf_size - ref_size, ref_size);
 988        }
 989}
 990
 991static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx,
 992                             struct auxtrace_mmap *mm, unsigned char *data,
 993                             u64 head)
 994{
 995        struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx];
 996        bool wrapped;
 997
 998        wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset,
 999                                       ptr->snapshot_ref_buf_size, mm->len,
1000                                       data, head);
1001
1002        intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len,
1003                          data, head);
1004
1005        return wrapped;
1006}
1007
1008static bool intel_pt_first_wrap(u64 *data, size_t buf_size)
1009{
1010        int i, a, b;
1011
1012        b = buf_size >> 3;
1013        a = b - 512;
1014        if (a < 0)
1015                a = 0;
1016
1017        for (i = a; i < b; i++) {
1018                if (data[i])
1019                        return true;
1020        }
1021
1022        return false;
1023}
1024
1025static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx,
1026                                  struct auxtrace_mmap *mm, unsigned char *data,
1027                                  u64 *head, u64 *old)
1028{
1029        struct intel_pt_recording *ptr =
1030                        container_of(itr, struct intel_pt_recording, itr);
1031        bool wrapped;
1032        int err;
1033
1034        pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
1035                  __func__, idx, (size_t)*old, (size_t)*head);
1036
1037        err = intel_pt_snapshot_init(ptr, mm->len);
1038        if (err)
1039                goto out_err;
1040
1041        if (idx >= ptr->snapshot_ref_cnt) {
1042                err = intel_pt_alloc_snapshot_refs(ptr, idx);
1043                if (err)
1044                        goto out_err;
1045        }
1046
1047        if (ptr->snapshot_ref_buf_size) {
1048                if (!ptr->snapshot_refs[idx].ref_buf) {
1049                        err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len);
1050                        if (err)
1051                                goto out_err;
1052                }
1053                wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head);
1054        } else {
1055                wrapped = ptr->snapshot_refs[idx].wrapped;
1056                if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) {
1057                        ptr->snapshot_refs[idx].wrapped = true;
1058                        wrapped = true;
1059                }
1060        }
1061
1062        /*
1063         * In full trace mode 'head' continually increases.  However in snapshot
1064         * mode 'head' is an offset within the buffer.  Here 'old' and 'head'
1065         * are adjusted to match the full trace case which expects that 'old' is
1066         * always less than 'head'.
1067         */
1068        if (wrapped) {
1069                *old = *head;
1070                *head += mm->len;
1071        } else {
1072                if (mm->mask)
1073                        *old &= mm->mask;
1074                else
1075                        *old %= mm->len;
1076                if (*old > *head)
1077                        *head += mm->len;
1078        }
1079
1080        pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
1081                  __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
1082
1083        return 0;
1084
1085out_err:
1086        pr_err("%s: failed, error %d\n", __func__, err);
1087        return err;
1088}
1089
1090static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
1091{
1092        return rdtsc();
1093}
1094
1095static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
1096{
1097        struct intel_pt_recording *ptr =
1098                        container_of(itr, struct intel_pt_recording, itr);
1099        struct evsel *evsel;
1100
1101        evlist__for_each_entry(ptr->evlist, evsel) {
1102                if (evsel->core.attr.type == ptr->intel_pt_pmu->type)
1103                        return perf_evlist__enable_event_idx(ptr->evlist, evsel,
1104                                                             idx);
1105        }
1106        return -EINVAL;
1107}
1108
1109struct auxtrace_record *intel_pt_recording_init(int *err)
1110{
1111        struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
1112        struct intel_pt_recording *ptr;
1113
1114        if (!intel_pt_pmu)
1115                return NULL;
1116
1117        if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
1118                *err = -errno;
1119                return NULL;
1120        }
1121
1122        ptr = zalloc(sizeof(struct intel_pt_recording));
1123        if (!ptr) {
1124                *err = -ENOMEM;
1125                return NULL;
1126        }
1127
1128        ptr->intel_pt_pmu = intel_pt_pmu;
1129        ptr->itr.recording_options = intel_pt_recording_options;
1130        ptr->itr.info_priv_size = intel_pt_info_priv_size;
1131        ptr->itr.info_fill = intel_pt_info_fill;
1132        ptr->itr.free = intel_pt_recording_free;
1133        ptr->itr.snapshot_start = intel_pt_snapshot_start;
1134        ptr->itr.snapshot_finish = intel_pt_snapshot_finish;
1135        ptr->itr.find_snapshot = intel_pt_find_snapshot;
1136        ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
1137        ptr->itr.reference = intel_pt_reference;
1138        ptr->itr.read_finish = intel_pt_read_finish;
1139        return &ptr->itr;
1140}
1141