linux/tools/perf/arch/x86/util/intel-pt.c
<<
>>
Prefs
   1/*
   2 * intel_pt.c: Intel Processor Trace support
   3 * Copyright (c) 2013-2015, Intel Corporation.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms and conditions of the GNU General Public License,
   7 * version 2, as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope it will be useful, but WITHOUT
  10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12 * more details.
  13 *
  14 */
  15
  16#include <errno.h>
  17#include <stdbool.h>
  18#include <linux/kernel.h>
  19#include <linux/types.h>
  20#include <linux/bitops.h>
  21#include <linux/log2.h>
  22#include <cpuid.h>
  23
  24#include "../../perf.h"
  25#include "../../util/session.h"
  26#include "../../util/event.h"
  27#include "../../util/evlist.h"
  28#include "../../util/evsel.h"
  29#include "../../util/cpumap.h"
  30#include <subcmd/parse-options.h>
  31#include "../../util/parse-events.h"
  32#include "../../util/pmu.h"
  33#include "../../util/debug.h"
  34#include "../../util/auxtrace.h"
  35#include "../../util/tsc.h"
  36#include "../../util/intel-pt.h"
  37
  38#define KiB(x) ((x) * 1024)
  39#define MiB(x) ((x) * 1024 * 1024)
  40#define KiB_MASK(x) (KiB(x) - 1)
  41#define MiB_MASK(x) (MiB(x) - 1)
  42
  43#define INTEL_PT_DEFAULT_SAMPLE_SIZE    KiB(4)
  44
  45#define INTEL_PT_MAX_SAMPLE_SIZE        KiB(60)
  46
  47#define INTEL_PT_PSB_PERIOD_NEAR        256
  48
  49struct intel_pt_snapshot_ref {
  50        void *ref_buf;
  51        size_t ref_offset;
  52        bool wrapped;
  53};
  54
  55struct intel_pt_recording {
  56        struct auxtrace_record          itr;
  57        struct perf_pmu                 *intel_pt_pmu;
  58        int                             have_sched_switch;
  59        struct perf_evlist              *evlist;
  60        bool                            snapshot_mode;
  61        bool                            snapshot_init_done;
  62        size_t                          snapshot_size;
  63        size_t                          snapshot_ref_buf_size;
  64        int                             snapshot_ref_cnt;
  65        struct intel_pt_snapshot_ref    *snapshot_refs;
  66        size_t                          priv_size;
  67};
  68
  69static int intel_pt_parse_terms_with_default(struct list_head *formats,
  70                                             const char *str,
  71                                             u64 *config)
  72{
  73        struct list_head *terms;
  74        struct perf_event_attr attr = { .size = 0, };
  75        int err;
  76
  77        terms = malloc(sizeof(struct list_head));
  78        if (!terms)
  79                return -ENOMEM;
  80
  81        INIT_LIST_HEAD(terms);
  82
  83        err = parse_events_terms(terms, str);
  84        if (err)
  85                goto out_free;
  86
  87        attr.config = *config;
  88        err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
  89        if (err)
  90                goto out_free;
  91
  92        *config = attr.config;
  93out_free:
  94        parse_events_terms__delete(terms);
  95        return err;
  96}
  97
  98static int intel_pt_parse_terms(struct list_head *formats, const char *str,
  99                                u64 *config)
 100{
 101        *config = 0;
 102        return intel_pt_parse_terms_with_default(formats, str, config);
 103}
 104
 105static u64 intel_pt_masked_bits(u64 mask, u64 bits)
 106{
 107        const u64 top_bit = 1ULL << 63;
 108        u64 res = 0;
 109        int i;
 110
 111        for (i = 0; i < 64; i++) {
 112                if (mask & top_bit) {
 113                        res <<= 1;
 114                        if (bits & top_bit)
 115                                res |= 1;
 116                }
 117                mask <<= 1;
 118                bits <<= 1;
 119        }
 120
 121        return res;
 122}
 123
 124static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
 125                                struct perf_evlist *evlist, u64 *res)
 126{
 127        struct perf_evsel *evsel;
 128        u64 mask;
 129
 130        *res = 0;
 131
 132        mask = perf_pmu__format_bits(&intel_pt_pmu->format, str);
 133        if (!mask)
 134                return -EINVAL;
 135
 136        evlist__for_each_entry(evlist, evsel) {
 137                if (evsel->attr.type == intel_pt_pmu->type) {
 138                        *res = intel_pt_masked_bits(mask, evsel->attr.config);
 139                        return 0;
 140                }
 141        }
 142
 143        return -EINVAL;
 144}
 145
 146static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu,
 147                                  struct perf_evlist *evlist)
 148{
 149        u64 val;
 150        int err, topa_multiple_entries;
 151        size_t psb_period;
 152
 153        if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries",
 154                                "%d", &topa_multiple_entries) != 1)
 155                topa_multiple_entries = 0;
 156
 157        /*
 158         * Use caps/topa_multiple_entries to indicate early hardware that had
 159         * extra frequent PSBs.
 160         */
 161        if (!topa_multiple_entries) {
 162                psb_period = 256;
 163                goto out;
 164        }
 165
 166        err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val);
 167        if (err)
 168                val = 0;
 169
 170        psb_period = 1 << (val + 11);
 171out:
 172        pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period);
 173        return psb_period;
 174}
 175
 176static int intel_pt_pick_bit(int bits, int target)
 177{
 178        int pos, pick = -1;
 179
 180        for (pos = 0; bits; bits >>= 1, pos++) {
 181                if (bits & 1) {
 182                        if (pos <= target || pick < 0)
 183                                pick = pos;
 184                        if (pos >= target)
 185                                break;
 186                }
 187        }
 188
 189        return pick;
 190}
 191
 192static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
 193{
 194        char buf[256];
 195        int mtc, mtc_periods = 0, mtc_period;
 196        int psb_cyc, psb_periods, psb_period;
 197        int pos = 0;
 198        u64 config;
 199
 200        pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
 201
 202        if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d",
 203                                &mtc) != 1)
 204                mtc = 1;
 205
 206        if (mtc) {
 207                if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x",
 208                                        &mtc_periods) != 1)
 209                        mtc_periods = 0;
 210                if (mtc_periods) {
 211                        mtc_period = intel_pt_pick_bit(mtc_periods, 3);
 212                        pos += scnprintf(buf + pos, sizeof(buf) - pos,
 213                                         ",mtc,mtc_period=%d", mtc_period);
 214                }
 215        }
 216
 217        if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d",
 218                                &psb_cyc) != 1)
 219                psb_cyc = 1;
 220
 221        if (psb_cyc && mtc_periods) {
 222                if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x",
 223                                        &psb_periods) != 1)
 224                        psb_periods = 0;
 225                if (psb_periods) {
 226                        psb_period = intel_pt_pick_bit(psb_periods, 3);
 227                        pos += scnprintf(buf + pos, sizeof(buf) - pos,
 228                                         ",psb_period=%d", psb_period);
 229                }
 230        }
 231
 232        pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
 233
 234        intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
 235
 236        return config;
 237}
 238
 239static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
 240                                           struct record_opts *opts,
 241                                           const char *str)
 242{
 243        struct intel_pt_recording *ptr =
 244                        container_of(itr, struct intel_pt_recording, itr);
 245        unsigned long long snapshot_size = 0;
 246        char *endptr;
 247
 248        if (str) {
 249                snapshot_size = strtoull(str, &endptr, 0);
 250                if (*endptr || snapshot_size > SIZE_MAX)
 251                        return -1;
 252        }
 253
 254        opts->auxtrace_snapshot_mode = true;
 255        opts->auxtrace_snapshot_size = snapshot_size;
 256
 257        ptr->snapshot_size = snapshot_size;
 258
 259        return 0;
 260}
 261
 262struct perf_event_attr *
 263intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
 264{
 265        struct perf_event_attr *attr;
 266
 267        attr = zalloc(sizeof(struct perf_event_attr));
 268        if (!attr)
 269                return NULL;
 270
 271        attr->config = intel_pt_default_config(intel_pt_pmu);
 272
 273        intel_pt_pmu->selectable = true;
 274
 275        return attr;
 276}
 277
 278static const char *intel_pt_find_filter(struct perf_evlist *evlist,
 279                                        struct perf_pmu *intel_pt_pmu)
 280{
 281        struct perf_evsel *evsel;
 282
 283        evlist__for_each_entry(evlist, evsel) {
 284                if (evsel->attr.type == intel_pt_pmu->type)
 285                        return evsel->filter;
 286        }
 287
 288        return NULL;
 289}
 290
 291static size_t intel_pt_filter_bytes(const char *filter)
 292{
 293        size_t len = filter ? strlen(filter) : 0;
 294
 295        return len ? roundup(len + 1, 8) : 0;
 296}
 297
 298static size_t
 299intel_pt_info_priv_size(struct auxtrace_record *itr, struct perf_evlist *evlist)
 300{
 301        struct intel_pt_recording *ptr =
 302                        container_of(itr, struct intel_pt_recording, itr);
 303        const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu);
 304
 305        ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
 306                         intel_pt_filter_bytes(filter);
 307
 308        return ptr->priv_size;
 309}
 310
 311static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
 312{
 313        unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
 314
 315        __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
 316        *n = ebx;
 317        *d = eax;
 318}
 319
 320static int intel_pt_info_fill(struct auxtrace_record *itr,
 321                              struct perf_session *session,
 322                              struct auxtrace_info_event *auxtrace_info,
 323                              size_t priv_size)
 324{
 325        struct intel_pt_recording *ptr =
 326                        container_of(itr, struct intel_pt_recording, itr);
 327        struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
 328        struct perf_event_mmap_page *pc;
 329        struct perf_tsc_conversion tc = { .time_mult = 0, };
 330        bool cap_user_time_zero = false, per_cpu_mmaps;
 331        u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
 332        u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
 333        unsigned long max_non_turbo_ratio;
 334        size_t filter_str_len;
 335        const char *filter;
 336        u64 *info;
 337        int err;
 338
 339        if (priv_size != ptr->priv_size)
 340                return -EINVAL;
 341
 342        intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
 343        intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
 344                             &noretcomp_bit);
 345        intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit);
 346        mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
 347                                              "mtc_period");
 348        intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit);
 349
 350        intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
 351
 352        if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
 353                                "%lu", &max_non_turbo_ratio) != 1)
 354                max_non_turbo_ratio = 0;
 355
 356        filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
 357        filter_str_len = filter ? strlen(filter) : 0;
 358
 359        if (!session->evlist->nr_mmaps)
 360                return -EINVAL;
 361
 362        pc = session->evlist->mmap[0].base;
 363        if (pc) {
 364                err = perf_read_tsc_conversion(pc, &tc);
 365                if (err) {
 366                        if (err != -EOPNOTSUPP)
 367                                return err;
 368                } else {
 369                        cap_user_time_zero = tc.time_mult != 0;
 370                }
 371                if (!cap_user_time_zero)
 372                        ui__warning("Intel Processor Trace: TSC not available\n");
 373        }
 374
 375        per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus);
 376
 377        auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
 378        auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
 379        auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
 380        auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult;
 381        auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero;
 382        auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero;
 383        auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit;
 384        auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit;
 385        auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
 386        auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
 387        auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
 388        auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit;
 389        auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits;
 390        auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
 391        auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
 392        auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
 393        auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio;
 394        auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len;
 395
 396        info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
 397
 398        if (filter_str_len) {
 399                size_t len = intel_pt_filter_bytes(filter);
 400
 401                strncpy((char *)info, filter, len);
 402                info += len >> 3;
 403        }
 404
 405        return 0;
 406}
 407
 408static int intel_pt_track_switches(struct perf_evlist *evlist)
 409{
 410        const char *sched_switch = "sched:sched_switch";
 411        struct perf_evsel *evsel;
 412        int err;
 413
 414        if (!perf_evlist__can_select_event(evlist, sched_switch))
 415                return -EPERM;
 416
 417        err = parse_events(evlist, sched_switch, NULL);
 418        if (err) {
 419                pr_debug2("%s: failed to parse %s, error %d\n",
 420                          __func__, sched_switch, err);
 421                return err;
 422        }
 423
 424        evsel = perf_evlist__last(evlist);
 425
 426        perf_evsel__set_sample_bit(evsel, CPU);
 427        perf_evsel__set_sample_bit(evsel, TIME);
 428
 429        evsel->system_wide = true;
 430        evsel->no_aux_samples = true;
 431        evsel->immediate = true;
 432
 433        return 0;
 434}
 435
 436static void intel_pt_valid_str(char *str, size_t len, u64 valid)
 437{
 438        unsigned int val, last = 0, state = 1;
 439        int p = 0;
 440
 441        str[0] = '\0';
 442
 443        for (val = 0; val <= 64; val++, valid >>= 1) {
 444                if (valid & 1) {
 445                        last = val;
 446                        switch (state) {
 447                        case 0:
 448                                p += scnprintf(str + p, len - p, ",");
 449                                /* Fall through */
 450                        case 1:
 451                                p += scnprintf(str + p, len - p, "%u", val);
 452                                state = 2;
 453                                break;
 454                        case 2:
 455                                state = 3;
 456                                break;
 457                        case 3:
 458                                state = 4;
 459                                break;
 460                        default:
 461                                break;
 462                        }
 463                } else {
 464                        switch (state) {
 465                        case 3:
 466                                p += scnprintf(str + p, len - p, ",%u", last);
 467                                state = 0;
 468                                break;
 469                        case 4:
 470                                p += scnprintf(str + p, len - p, "-%u", last);
 471                                state = 0;
 472                                break;
 473                        default:
 474                                break;
 475                        }
 476                        if (state != 1)
 477                                state = 0;
 478                }
 479        }
 480}
 481
 482static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu,
 483                                    const char *caps, const char *name,
 484                                    const char *supported, u64 config)
 485{
 486        char valid_str[256];
 487        unsigned int shift;
 488        unsigned long long valid;
 489        u64 bits;
 490        int ok;
 491
 492        if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1)
 493                valid = 0;
 494
 495        if (supported &&
 496            perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok)
 497                valid = 0;
 498
 499        valid |= 1;
 500
 501        bits = perf_pmu__format_bits(&intel_pt_pmu->format, name);
 502
 503        config &= bits;
 504
 505        for (shift = 0; bits && !(bits & 1); shift++)
 506                bits >>= 1;
 507
 508        config >>= shift;
 509
 510        if (config > 63)
 511                goto out_err;
 512
 513        if (valid & (1 << config))
 514                return 0;
 515out_err:
 516        intel_pt_valid_str(valid_str, sizeof(valid_str), valid);
 517        pr_err("Invalid %s for %s. Valid values are: %s\n",
 518               name, INTEL_PT_PMU_NAME, valid_str);
 519        return -EINVAL;
 520}
 521
 522static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
 523                                    struct perf_evsel *evsel)
 524{
 525        int err;
 526
 527        if (!evsel)
 528                return 0;
 529
 530        err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
 531                                       "cyc_thresh", "caps/psb_cyc",
 532                                       evsel->attr.config);
 533        if (err)
 534                return err;
 535
 536        err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods",
 537                                       "mtc_period", "caps/mtc",
 538                                       evsel->attr.config);
 539        if (err)
 540                return err;
 541
 542        return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods",
 543                                        "psb_period", "caps/psb_cyc",
 544                                        evsel->attr.config);
 545}
 546
 547static int intel_pt_recording_options(struct auxtrace_record *itr,
 548                                      struct perf_evlist *evlist,
 549                                      struct record_opts *opts)
 550{
 551        struct intel_pt_recording *ptr =
 552                        container_of(itr, struct intel_pt_recording, itr);
 553        struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
 554        bool have_timing_info, need_immediate = false;
 555        struct perf_evsel *evsel, *intel_pt_evsel = NULL;
 556        const struct cpu_map *cpus = evlist->cpus;
 557        bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
 558        u64 tsc_bit;
 559        int err;
 560
 561        ptr->evlist = evlist;
 562        ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
 563
 564        evlist__for_each_entry(evlist, evsel) {
 565                if (evsel->attr.type == intel_pt_pmu->type) {
 566                        if (intel_pt_evsel) {
 567                                pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
 568                                return -EINVAL;
 569                        }
 570                        evsel->attr.freq = 0;
 571                        evsel->attr.sample_period = 1;
 572                        intel_pt_evsel = evsel;
 573                        opts->full_auxtrace = true;
 574                }
 575        }
 576
 577        if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
 578                pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n");
 579                return -EINVAL;
 580        }
 581
 582        /*
 583         * RHEL7 we don't support use_clockid yet,
 584         * so ommiting following upstream hunk:
 585
 586        if (opts->use_clockid) {
 587                pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
 588                return -EINVAL;
 589        }
 590
 591         */
 592
 593        if (!opts->full_auxtrace)
 594                return 0;
 595
 596        err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
 597        if (err)
 598                return err;
 599
 600        /* Set default sizes for snapshot mode */
 601        if (opts->auxtrace_snapshot_mode) {
 602                size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
 603
 604                if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
 605                        if (privileged) {
 606                                opts->auxtrace_mmap_pages = MiB(4) / page_size;
 607                        } else {
 608                                opts->auxtrace_mmap_pages = KiB(128) / page_size;
 609                                if (opts->mmap_pages == UINT_MAX)
 610                                        opts->mmap_pages = KiB(256) / page_size;
 611                        }
 612                } else if (!opts->auxtrace_mmap_pages && !privileged &&
 613                           opts->mmap_pages == UINT_MAX) {
 614                        opts->mmap_pages = KiB(256) / page_size;
 615                }
 616                if (!opts->auxtrace_snapshot_size)
 617                        opts->auxtrace_snapshot_size =
 618                                opts->auxtrace_mmap_pages * (size_t)page_size;
 619                if (!opts->auxtrace_mmap_pages) {
 620                        size_t sz = opts->auxtrace_snapshot_size;
 621
 622                        sz = round_up(sz, page_size) / page_size;
 623                        opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
 624                }
 625                if (opts->auxtrace_snapshot_size >
 626                                opts->auxtrace_mmap_pages * (size_t)page_size) {
 627                        pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
 628                               opts->auxtrace_snapshot_size,
 629                               opts->auxtrace_mmap_pages * (size_t)page_size);
 630                        return -EINVAL;
 631                }
 632                if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
 633                        pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
 634                        return -EINVAL;
 635                }
 636                pr_debug2("Intel PT snapshot size: %zu\n",
 637                          opts->auxtrace_snapshot_size);
 638                if (psb_period &&
 639                    opts->auxtrace_snapshot_size <= psb_period +
 640                                                  INTEL_PT_PSB_PERIOD_NEAR)
 641                        ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n",
 642                                    opts->auxtrace_snapshot_size, psb_period);
 643        }
 644
 645        /* Set default sizes for full trace mode */
 646        if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
 647                if (privileged) {
 648                        opts->auxtrace_mmap_pages = MiB(4) / page_size;
 649                } else {
 650                        opts->auxtrace_mmap_pages = KiB(128) / page_size;
 651                        if (opts->mmap_pages == UINT_MAX)
 652                                opts->mmap_pages = KiB(256) / page_size;
 653                }
 654        }
 655
 656        /* Validate auxtrace_mmap_pages */
 657        if (opts->auxtrace_mmap_pages) {
 658                size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
 659                size_t min_sz;
 660
 661                if (opts->auxtrace_snapshot_mode)
 662                        min_sz = KiB(4);
 663                else
 664                        min_sz = KiB(8);
 665
 666                if (sz < min_sz || !is_power_of_2(sz)) {
 667                        pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n",
 668                               min_sz / 1024);
 669                        return -EINVAL;
 670                }
 671        }
 672
 673        intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
 674
 675        if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit))
 676                have_timing_info = true;
 677        else
 678                have_timing_info = false;
 679
 680        /*
 681         * Per-cpu recording needs sched_switch events to distinguish different
 682         * threads.
 683         */
 684        if (have_timing_info && !cpu_map__empty(cpus)) {
 685                if (perf_can_record_switch_events()) {
 686                        bool cpu_wide = !target__none(&opts->target) &&
 687                                        !target__has_task(&opts->target);
 688
 689                        if (!cpu_wide && perf_can_record_cpu_wide()) {
 690                                struct perf_evsel *switch_evsel;
 691
 692                                err = parse_events(evlist, "dummy:u", NULL);
 693                                if (err)
 694                                        return err;
 695
 696                                switch_evsel = perf_evlist__last(evlist);
 697
 698                                switch_evsel->attr.freq = 0;
 699                                switch_evsel->attr.sample_period = 1;
 700                                switch_evsel->attr.context_switch = 1;
 701
 702                                switch_evsel->system_wide = true;
 703                                switch_evsel->no_aux_samples = true;
 704                                switch_evsel->immediate = true;
 705
 706                                perf_evsel__set_sample_bit(switch_evsel, TID);
 707                                perf_evsel__set_sample_bit(switch_evsel, TIME);
 708                                perf_evsel__set_sample_bit(switch_evsel, CPU);
 709
 710                                opts->record_switch_events = false;
 711                                ptr->have_sched_switch = 3;
 712                        } else {
 713                                opts->record_switch_events = true;
 714                                need_immediate = true;
 715                                if (cpu_wide)
 716                                        ptr->have_sched_switch = 3;
 717                                else
 718                                        ptr->have_sched_switch = 2;
 719                        }
 720                } else {
 721                        err = intel_pt_track_switches(evlist);
 722                        if (err == -EPERM)
 723                                pr_debug2("Unable to select sched:sched_switch\n");
 724                        else if (err)
 725                                return err;
 726                        else
 727                                ptr->have_sched_switch = 1;
 728                }
 729        }
 730
 731        if (intel_pt_evsel) {
 732                /*
 733                 * To obtain the auxtrace buffer file descriptor, the auxtrace
 734                 * event must come first.
 735                 */
 736                perf_evlist__to_front(evlist, intel_pt_evsel);
 737                /*
 738                 * In the case of per-cpu mmaps, we need the CPU on the
 739                 * AUX event.
 740                 */
 741                if (!cpu_map__empty(cpus))
 742                        perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
 743        }
 744
 745        /* Add dummy event to keep tracking */
 746        if (opts->full_auxtrace) {
 747                struct perf_evsel *tracking_evsel;
 748
 749                err = parse_events(evlist, "dummy:u", NULL);
 750                if (err)
 751                        return err;
 752
 753                tracking_evsel = perf_evlist__last(evlist);
 754
 755                perf_evlist__set_tracking_event(evlist, tracking_evsel);
 756
 757                tracking_evsel->attr.freq = 0;
 758                tracking_evsel->attr.sample_period = 1;
 759
 760                if (need_immediate)
 761                        tracking_evsel->immediate = true;
 762
 763                /* In per-cpu case, always need the time of mmap events etc */
 764                if (!cpu_map__empty(cpus)) {
 765                        perf_evsel__set_sample_bit(tracking_evsel, TIME);
 766                        /* And the CPU for switch events */
 767                        perf_evsel__set_sample_bit(tracking_evsel, CPU);
 768                }
 769        }
 770
 771        /*
 772         * Warn the user when we do not have enough information to decode i.e.
 773         * per-cpu with no sched_switch (except workload-only).
 774         */
 775        if (!ptr->have_sched_switch && !cpu_map__empty(cpus) &&
 776            !target__none(&opts->target))
 777                ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
 778
 779        return 0;
 780}
 781
 782static int intel_pt_snapshot_start(struct auxtrace_record *itr)
 783{
 784        struct intel_pt_recording *ptr =
 785                        container_of(itr, struct intel_pt_recording, itr);
 786        struct perf_evsel *evsel;
 787
 788        evlist__for_each_entry(ptr->evlist, evsel) {
 789                if (evsel->attr.type == ptr->intel_pt_pmu->type)
 790                        return perf_evsel__disable(evsel);
 791        }
 792        return -EINVAL;
 793}
 794
 795static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
 796{
 797        struct intel_pt_recording *ptr =
 798                        container_of(itr, struct intel_pt_recording, itr);
 799        struct perf_evsel *evsel;
 800
 801        evlist__for_each_entry(ptr->evlist, evsel) {
 802                if (evsel->attr.type == ptr->intel_pt_pmu->type)
 803                        return perf_evsel__enable(evsel);
 804        }
 805        return -EINVAL;
 806}
 807
 808static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx)
 809{
 810        const size_t sz = sizeof(struct intel_pt_snapshot_ref);
 811        int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2;
 812        struct intel_pt_snapshot_ref *refs;
 813
 814        if (!new_cnt)
 815                new_cnt = 16;
 816
 817        while (new_cnt <= idx)
 818                new_cnt *= 2;
 819
 820        refs = calloc(new_cnt, sz);
 821        if (!refs)
 822                return -ENOMEM;
 823
 824        memcpy(refs, ptr->snapshot_refs, cnt * sz);
 825
 826        ptr->snapshot_refs = refs;
 827        ptr->snapshot_ref_cnt = new_cnt;
 828
 829        return 0;
 830}
 831
 832static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr)
 833{
 834        int i;
 835
 836        for (i = 0; i < ptr->snapshot_ref_cnt; i++)
 837                zfree(&ptr->snapshot_refs[i].ref_buf);
 838        zfree(&ptr->snapshot_refs);
 839}
 840
 841static void intel_pt_recording_free(struct auxtrace_record *itr)
 842{
 843        struct intel_pt_recording *ptr =
 844                        container_of(itr, struct intel_pt_recording, itr);
 845
 846        intel_pt_free_snapshot_refs(ptr);
 847        free(ptr);
 848}
 849
 850static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx,
 851                                       size_t snapshot_buf_size)
 852{
 853        size_t ref_buf_size = ptr->snapshot_ref_buf_size;
 854        void *ref_buf;
 855
 856        ref_buf = zalloc(ref_buf_size);
 857        if (!ref_buf)
 858                return -ENOMEM;
 859
 860        ptr->snapshot_refs[idx].ref_buf = ref_buf;
 861        ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size;
 862
 863        return 0;
 864}
 865
 866static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr,
 867                                             size_t snapshot_buf_size)
 868{
 869        const size_t max_size = 256 * 1024;
 870        size_t buf_size = 0, psb_period;
 871
 872        if (ptr->snapshot_size <= 64 * 1024)
 873                return 0;
 874
 875        psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist);
 876        if (psb_period)
 877                buf_size = psb_period * 2;
 878
 879        if (!buf_size || buf_size > max_size)
 880                buf_size = max_size;
 881
 882        if (buf_size >= snapshot_buf_size)
 883                return 0;
 884
 885        if (buf_size >= ptr->snapshot_size / 2)
 886                return 0;
 887
 888        return buf_size;
 889}
 890
 891static int intel_pt_snapshot_init(struct intel_pt_recording *ptr,
 892                                  size_t snapshot_buf_size)
 893{
 894        if (ptr->snapshot_init_done)
 895                return 0;
 896
 897        ptr->snapshot_init_done = true;
 898
 899        ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr,
 900                                                        snapshot_buf_size);
 901
 902        return 0;
 903}
 904
 905/**
 906 * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer.
 907 * @buf1: first buffer
 908 * @compare_size: number of bytes to compare
 909 * @buf2: second buffer (a circular buffer)
 910 * @offs2: offset in second buffer
 911 * @buf2_size: size of second buffer
 912 *
 913 * The comparison allows for the possibility that the bytes to compare in the
 914 * circular buffer are not contiguous.  It is assumed that @compare_size <=
 915 * @buf2_size.  This function returns %false if the bytes are identical, %true
 916 * otherwise.
 917 */
 918static bool intel_pt_compare_buffers(void *buf1, size_t compare_size,
 919                                     void *buf2, size_t offs2, size_t buf2_size)
 920{
 921        size_t end2 = offs2 + compare_size, part_size;
 922
 923        if (end2 <= buf2_size)
 924                return memcmp(buf1, buf2 + offs2, compare_size);
 925
 926        part_size = end2 - buf2_size;
 927        if (memcmp(buf1, buf2 + offs2, part_size))
 928                return true;
 929
 930        compare_size -= part_size;
 931
 932        return memcmp(buf1 + part_size, buf2, compare_size);
 933}
 934
 935static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset,
 936                                 size_t ref_size, size_t buf_size,
 937                                 void *data, size_t head)
 938{
 939        size_t ref_end = ref_offset + ref_size;
 940
 941        if (ref_end > buf_size) {
 942                if (head > ref_offset || head < ref_end - buf_size)
 943                        return true;
 944        } else if (head > ref_offset && head < ref_end) {
 945                return true;
 946        }
 947
 948        return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset,
 949                                        buf_size);
 950}
 951
 952static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size,
 953                              void *data, size_t head)
 954{
 955        if (head >= ref_size) {
 956                memcpy(ref_buf, data + head - ref_size, ref_size);
 957        } else {
 958                memcpy(ref_buf, data, head);
 959                ref_size -= head;
 960                memcpy(ref_buf + head, data + buf_size - ref_size, ref_size);
 961        }
 962}
 963
 964static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx,
 965                             struct auxtrace_mmap *mm, unsigned char *data,
 966                             u64 head)
 967{
 968        struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx];
 969        bool wrapped;
 970
 971        wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset,
 972                                       ptr->snapshot_ref_buf_size, mm->len,
 973                                       data, head);
 974
 975        intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len,
 976                          data, head);
 977
 978        return wrapped;
 979}
 980
 981static bool intel_pt_first_wrap(u64 *data, size_t buf_size)
 982{
 983        int i, a, b;
 984
 985        b = buf_size >> 3;
 986        a = b - 512;
 987        if (a < 0)
 988                a = 0;
 989
 990        for (i = a; i < b; i++) {
 991                if (data[i])
 992                        return true;
 993        }
 994
 995        return false;
 996}
 997
 998static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx,
 999                                  struct auxtrace_mmap *mm, unsigned char *data,
1000                                  u64 *head, u64 *old)
1001{
1002        struct intel_pt_recording *ptr =
1003                        container_of(itr, struct intel_pt_recording, itr);
1004        bool wrapped;
1005        int err;
1006
1007        pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
1008                  __func__, idx, (size_t)*old, (size_t)*head);
1009
1010        err = intel_pt_snapshot_init(ptr, mm->len);
1011        if (err)
1012                goto out_err;
1013
1014        if (idx >= ptr->snapshot_ref_cnt) {
1015                err = intel_pt_alloc_snapshot_refs(ptr, idx);
1016                if (err)
1017                        goto out_err;
1018        }
1019
1020        if (ptr->snapshot_ref_buf_size) {
1021                if (!ptr->snapshot_refs[idx].ref_buf) {
1022                        err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len);
1023                        if (err)
1024                                goto out_err;
1025                }
1026                wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head);
1027        } else {
1028                wrapped = ptr->snapshot_refs[idx].wrapped;
1029                if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) {
1030                        ptr->snapshot_refs[idx].wrapped = true;
1031                        wrapped = true;
1032                }
1033        }
1034
1035        /*
1036         * In full trace mode 'head' continually increases.  However in snapshot
1037         * mode 'head' is an offset within the buffer.  Here 'old' and 'head'
1038         * are adjusted to match the full trace case which expects that 'old' is
1039         * always less than 'head'.
1040         */
1041        if (wrapped) {
1042                *old = *head;
1043                *head += mm->len;
1044        } else {
1045                if (mm->mask)
1046                        *old &= mm->mask;
1047                else
1048                        *old %= mm->len;
1049                if (*old > *head)
1050                        *head += mm->len;
1051        }
1052
1053        pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
1054                  __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
1055
1056        return 0;
1057
1058out_err:
1059        pr_err("%s: failed, error %d\n", __func__, err);
1060        return err;
1061}
1062
1063static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
1064{
1065        return rdtsc();
1066}
1067
1068static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
1069{
1070        struct intel_pt_recording *ptr =
1071                        container_of(itr, struct intel_pt_recording, itr);
1072        struct perf_evsel *evsel;
1073
1074        evlist__for_each_entry(ptr->evlist, evsel) {
1075                if (evsel->attr.type == ptr->intel_pt_pmu->type)
1076                        return perf_evlist__enable_event_idx(ptr->evlist, evsel,
1077                                                             idx);
1078        }
1079        return -EINVAL;
1080}
1081
1082struct auxtrace_record *intel_pt_recording_init(int *err)
1083{
1084        struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
1085        struct intel_pt_recording *ptr;
1086
1087        if (!intel_pt_pmu)
1088                return NULL;
1089
1090        if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
1091                *err = -errno;
1092                return NULL;
1093        }
1094
1095        ptr = zalloc(sizeof(struct intel_pt_recording));
1096        if (!ptr) {
1097                *err = -ENOMEM;
1098                return NULL;
1099        }
1100
1101        ptr->intel_pt_pmu = intel_pt_pmu;
1102        ptr->itr.recording_options = intel_pt_recording_options;
1103        ptr->itr.info_priv_size = intel_pt_info_priv_size;
1104        ptr->itr.info_fill = intel_pt_info_fill;
1105        ptr->itr.free = intel_pt_recording_free;
1106        ptr->itr.snapshot_start = intel_pt_snapshot_start;
1107        ptr->itr.snapshot_finish = intel_pt_snapshot_finish;
1108        ptr->itr.find_snapshot = intel_pt_find_snapshot;
1109        ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
1110        ptr->itr.reference = intel_pt_reference;
1111        ptr->itr.read_finish = intel_pt_read_finish;
1112        return &ptr->itr;
1113}
1114