linux/tools/perf/util/intel-pt.c
<<
>>
Prefs
   1/*
   2 * intel_pt.c: Intel Processor Trace support
   3 * Copyright (c) 2013-2015, Intel Corporation.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms and conditions of the GNU General Public License,
   7 * version 2, as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope it will be useful, but WITHOUT
  10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12 * more details.
  13 *
  14 */
  15
  16#include <stdio.h>
  17#include <stdbool.h>
  18#include <errno.h>
  19#include <linux/kernel.h>
  20#include <linux/types.h>
  21
  22#include "../perf.h"
  23#include "session.h"
  24#include "machine.h"
  25#include "tool.h"
  26#include "event.h"
  27#include "evlist.h"
  28#include "evsel.h"
  29#include "map.h"
  30#include "color.h"
  31#include "util.h"
  32#include "thread.h"
  33#include "thread-stack.h"
  34#include "symbol.h"
  35#include "callchain.h"
  36#include "dso.h"
  37#include "debug.h"
  38#include "auxtrace.h"
  39#include "tsc.h"
  40#include "intel-pt.h"
  41
  42#include "intel-pt-decoder/intel-pt-log.h"
  43#include "intel-pt-decoder/intel-pt-decoder.h"
  44#include "intel-pt-decoder/intel-pt-insn-decoder.h"
  45#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
  46
  47#define MAX_TIMESTAMP (~0ULL)
  48
  49struct intel_pt {
  50        struct auxtrace auxtrace;
  51        struct auxtrace_queues queues;
  52        struct auxtrace_heap heap;
  53        u32 auxtrace_type;
  54        struct perf_session *session;
  55        struct machine *machine;
  56        struct perf_evsel *switch_evsel;
  57        struct thread *unknown_thread;
  58        bool timeless_decoding;
  59        bool sampling_mode;
  60        bool snapshot_mode;
  61        bool per_cpu_mmaps;
  62        bool have_tsc;
  63        bool data_queued;
  64        bool est_tsc;
  65        bool sync_switch;
  66        int have_sched_switch;
  67        u32 pmu_type;
  68        u64 kernel_start;
  69        u64 switch_ip;
  70        u64 ptss_ip;
  71
  72        struct perf_tsc_conversion tc;
  73        bool cap_user_time_zero;
  74
  75        struct itrace_synth_opts synth_opts;
  76
  77        bool sample_instructions;
  78        u64 instructions_sample_type;
  79        u64 instructions_sample_period;
  80        u64 instructions_id;
  81
  82        bool sample_branches;
  83        u32 branches_filter;
  84        u64 branches_sample_type;
  85        u64 branches_id;
  86
  87        bool sample_transactions;
  88        u64 transactions_sample_type;
  89        u64 transactions_id;
  90
  91        bool synth_needs_swap;
  92
  93        u64 tsc_bit;
  94        u64 mtc_bit;
  95        u64 mtc_freq_bits;
  96        u32 tsc_ctc_ratio_n;
  97        u32 tsc_ctc_ratio_d;
  98        u64 cyc_bit;
  99        u64 noretcomp_bit;
 100        unsigned max_non_turbo_ratio;
 101};
 102
 103enum switch_state {
 104        INTEL_PT_SS_NOT_TRACING,
 105        INTEL_PT_SS_UNKNOWN,
 106        INTEL_PT_SS_TRACING,
 107        INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
 108        INTEL_PT_SS_EXPECTING_SWITCH_IP,
 109};
 110
 111struct intel_pt_queue {
 112        struct intel_pt *pt;
 113        unsigned int queue_nr;
 114        struct auxtrace_buffer *buffer;
 115        void *decoder;
 116        const struct intel_pt_state *state;
 117        struct ip_callchain *chain;
 118        union perf_event *event_buf;
 119        bool on_heap;
 120        bool stop;
 121        bool step_through_buffers;
 122        bool use_buffer_pid_tid;
 123        pid_t pid, tid;
 124        int cpu;
 125        int switch_state;
 126        pid_t next_tid;
 127        struct thread *thread;
 128        bool exclude_kernel;
 129        bool have_sample;
 130        u64 time;
 131        u64 timestamp;
 132        u32 flags;
 133        u16 insn_len;
 134        u64 last_insn_cnt;
 135};
 136
 137static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
 138                          unsigned char *buf, size_t len)
 139{
 140        struct intel_pt_pkt packet;
 141        size_t pos = 0;
 142        int ret, pkt_len, i;
 143        char desc[INTEL_PT_PKT_DESC_MAX];
 144        const char *color = PERF_COLOR_BLUE;
 145
 146        color_fprintf(stdout, color,
 147                      ". ... Intel Processor Trace data: size %zu bytes\n",
 148                      len);
 149
 150        while (len) {
 151                ret = intel_pt_get_packet(buf, len, &packet);
 152                if (ret > 0)
 153                        pkt_len = ret;
 154                else
 155                        pkt_len = 1;
 156                printf(".");
 157                color_fprintf(stdout, color, "  %08x: ", pos);
 158                for (i = 0; i < pkt_len; i++)
 159                        color_fprintf(stdout, color, " %02x", buf[i]);
 160                for (; i < 16; i++)
 161                        color_fprintf(stdout, color, "   ");
 162                if (ret > 0) {
 163                        ret = intel_pt_pkt_desc(&packet, desc,
 164                                                INTEL_PT_PKT_DESC_MAX);
 165                        if (ret > 0)
 166                                color_fprintf(stdout, color, " %s\n", desc);
 167                } else {
 168                        color_fprintf(stdout, color, " Bad packet!\n");
 169                }
 170                pos += pkt_len;
 171                buf += pkt_len;
 172                len -= pkt_len;
 173        }
 174}
 175
 176static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
 177                                size_t len)
 178{
 179        printf(".\n");
 180        intel_pt_dump(pt, buf, len);
 181}
 182
 183static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
 184                                   struct auxtrace_buffer *b)
 185{
 186        void *start;
 187
 188        start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
 189                                      pt->have_tsc);
 190        if (!start)
 191                return -EINVAL;
 192        b->use_size = b->data + b->size - start;
 193        b->use_data = start;
 194        return 0;
 195}
 196
 197static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
 198                                        struct auxtrace_queue *queue,
 199                                        struct auxtrace_buffer *buffer)
 200{
 201        if (queue->cpu == -1 && buffer->cpu != -1)
 202                ptq->cpu = buffer->cpu;
 203
 204        ptq->pid = buffer->pid;
 205        ptq->tid = buffer->tid;
 206
 207        intel_pt_log("queue %u cpu %d pid %d tid %d\n",
 208                     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
 209
 210        thread__zput(ptq->thread);
 211
 212        if (ptq->tid != -1) {
 213                if (ptq->pid != -1)
 214                        ptq->thread = machine__findnew_thread(ptq->pt->machine,
 215                                                              ptq->pid,
 216                                                              ptq->tid);
 217                else
 218                        ptq->thread = machine__find_thread(ptq->pt->machine, -1,
 219                                                           ptq->tid);
 220        }
 221}
 222
 223/* This function assumes data is processed sequentially only */
 224static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 225{
 226        struct intel_pt_queue *ptq = data;
 227        struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
 228        struct auxtrace_queue *queue;
 229
 230        if (ptq->stop) {
 231                b->len = 0;
 232                return 0;
 233        }
 234
 235        queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
 236
 237        buffer = auxtrace_buffer__next(queue, buffer);
 238        if (!buffer) {
 239                if (old_buffer)
 240                        auxtrace_buffer__drop_data(old_buffer);
 241                b->len = 0;
 242                return 0;
 243        }
 244
 245        ptq->buffer = buffer;
 246
 247        if (!buffer->data) {
 248                int fd = perf_data_file__fd(ptq->pt->session->file);
 249
 250                buffer->data = auxtrace_buffer__get_data(buffer, fd);
 251                if (!buffer->data)
 252                        return -ENOMEM;
 253        }
 254
 255        if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
 256            intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
 257                return -ENOMEM;
 258
 259        if (old_buffer)
 260                auxtrace_buffer__drop_data(old_buffer);
 261
 262        if (buffer->use_data) {
 263                b->len = buffer->use_size;
 264                b->buf = buffer->use_data;
 265        } else {
 266                b->len = buffer->size;
 267                b->buf = buffer->data;
 268        }
 269        b->ref_timestamp = buffer->reference;
 270
 271        if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
 272                                                      !buffer->consecutive)) {
 273                b->consecutive = false;
 274                b->trace_nr = buffer->buffer_nr + 1;
 275        } else {
 276                b->consecutive = true;
 277        }
 278
 279        if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
 280                                        ptq->tid != buffer->tid))
 281                intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
 282
 283        if (ptq->step_through_buffers)
 284                ptq->stop = true;
 285
 286        if (!b->len)
 287                return intel_pt_get_trace(b, data);
 288
 289        return 0;
 290}
 291
 292struct intel_pt_cache_entry {
 293        struct auxtrace_cache_entry     entry;
 294        u64                             insn_cnt;
 295        u64                             byte_cnt;
 296        enum intel_pt_insn_op           op;
 297        enum intel_pt_insn_branch       branch;
 298        int                             length;
 299        int32_t                         rel;
 300};
 301
 302static int intel_pt_config_div(const char *var, const char *value, void *data)
 303{
 304        int *d = data;
 305        long val;
 306
 307        if (!strcmp(var, "intel-pt.cache-divisor")) {
 308                val = strtol(value, NULL, 0);
 309                if (val > 0 && val <= INT_MAX)
 310                        *d = val;
 311        }
 312
 313        return 0;
 314}
 315
 316static int intel_pt_cache_divisor(void)
 317{
 318        static int d;
 319
 320        if (d)
 321                return d;
 322
 323        perf_config(intel_pt_config_div, &d);
 324
 325        if (!d)
 326                d = 64;
 327
 328        return d;
 329}
 330
 331static unsigned int intel_pt_cache_size(struct dso *dso,
 332                                        struct machine *machine)
 333{
 334        off_t size;
 335
 336        size = dso__data_size(dso, machine);
 337        size /= intel_pt_cache_divisor();
 338        if (size < 1000)
 339                return 10;
 340        if (size > (1 << 21))
 341                return 21;
 342        return 32 - __builtin_clz(size);
 343}
 344
 345static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
 346                                             struct machine *machine)
 347{
 348        struct auxtrace_cache *c;
 349        unsigned int bits;
 350
 351        if (dso->auxtrace_cache)
 352                return dso->auxtrace_cache;
 353
 354        bits = intel_pt_cache_size(dso, machine);
 355
 356        /* Ignoring cache creation failure */
 357        c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
 358
 359        dso->auxtrace_cache = c;
 360
 361        return c;
 362}
 363
 364static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
 365                              u64 offset, u64 insn_cnt, u64 byte_cnt,
 366                              struct intel_pt_insn *intel_pt_insn)
 367{
 368        struct auxtrace_cache *c = intel_pt_cache(dso, machine);
 369        struct intel_pt_cache_entry *e;
 370        int err;
 371
 372        if (!c)
 373                return -ENOMEM;
 374
 375        e = auxtrace_cache__alloc_entry(c);
 376        if (!e)
 377                return -ENOMEM;
 378
 379        e->insn_cnt = insn_cnt;
 380        e->byte_cnt = byte_cnt;
 381        e->op = intel_pt_insn->op;
 382        e->branch = intel_pt_insn->branch;
 383        e->length = intel_pt_insn->length;
 384        e->rel = intel_pt_insn->rel;
 385
 386        err = auxtrace_cache__add(c, offset, &e->entry);
 387        if (err)
 388                auxtrace_cache__free_entry(c, e);
 389
 390        return err;
 391}
 392
 393static struct intel_pt_cache_entry *
 394intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
 395{
 396        struct auxtrace_cache *c = intel_pt_cache(dso, machine);
 397
 398        if (!c)
 399                return NULL;
 400
 401        return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
 402}
 403
 404static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 405                                   uint64_t *insn_cnt_ptr, uint64_t *ip,
 406                                   uint64_t to_ip, uint64_t max_insn_cnt,
 407                                   void *data)
 408{
 409        struct intel_pt_queue *ptq = data;
 410        struct machine *machine = ptq->pt->machine;
 411        struct thread *thread;
 412        struct addr_location al;
 413        unsigned char buf[1024];
 414        size_t bufsz;
 415        ssize_t len;
 416        int x86_64;
 417        u8 cpumode;
 418        u64 offset, start_offset, start_ip;
 419        u64 insn_cnt = 0;
 420        bool one_map = true;
 421
 422        if (to_ip && *ip == to_ip)
 423                goto out_no_cache;
 424
 425        bufsz = intel_pt_insn_max_size();
 426
 427        if (*ip >= ptq->pt->kernel_start)
 428                cpumode = PERF_RECORD_MISC_KERNEL;
 429        else
 430                cpumode = PERF_RECORD_MISC_USER;
 431
 432        thread = ptq->thread;
 433        if (!thread) {
 434                if (cpumode != PERF_RECORD_MISC_KERNEL)
 435                        return -EINVAL;
 436                thread = ptq->pt->unknown_thread;
 437        }
 438
 439        while (1) {
 440                thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
 441                if (!al.map || !al.map->dso)
 442                        return -EINVAL;
 443
 444                if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
 445                    dso__data_status_seen(al.map->dso,
 446                                          DSO_DATA_STATUS_SEEN_ITRACE))
 447                        return -ENOENT;
 448
 449                offset = al.map->map_ip(al.map, *ip);
 450
 451                if (!to_ip && one_map) {
 452                        struct intel_pt_cache_entry *e;
 453
 454                        e = intel_pt_cache_lookup(al.map->dso, machine, offset);
 455                        if (e &&
 456                            (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
 457                                *insn_cnt_ptr = e->insn_cnt;
 458                                *ip += e->byte_cnt;
 459                                intel_pt_insn->op = e->op;
 460                                intel_pt_insn->branch = e->branch;
 461                                intel_pt_insn->length = e->length;
 462                                intel_pt_insn->rel = e->rel;
 463                                intel_pt_log_insn_no_data(intel_pt_insn, *ip);
 464                                return 0;
 465                        }
 466                }
 467
 468                start_offset = offset;
 469                start_ip = *ip;
 470
 471                /* Load maps to ensure dso->is_64_bit has been updated */
 472                map__load(al.map, machine->symbol_filter);
 473
 474                x86_64 = al.map->dso->is_64_bit;
 475
 476                while (1) {
 477                        len = dso__data_read_offset(al.map->dso, machine,
 478                                                    offset, buf, bufsz);
 479                        if (len <= 0)
 480                                return -EINVAL;
 481
 482                        if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
 483                                return -EINVAL;
 484
 485                        intel_pt_log_insn(intel_pt_insn, *ip);
 486
 487                        insn_cnt += 1;
 488
 489                        if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
 490                                goto out;
 491
 492                        if (max_insn_cnt && insn_cnt >= max_insn_cnt)
 493                                goto out_no_cache;
 494
 495                        *ip += intel_pt_insn->length;
 496
 497                        if (to_ip && *ip == to_ip)
 498                                goto out_no_cache;
 499
 500                        if (*ip >= al.map->end)
 501                                break;
 502
 503                        offset += intel_pt_insn->length;
 504                }
 505                one_map = false;
 506        }
 507out:
 508        *insn_cnt_ptr = insn_cnt;
 509
 510        if (!one_map)
 511                goto out_no_cache;
 512
 513        /*
 514         * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
 515         * entries.
 516         */
 517        if (to_ip) {
 518                struct intel_pt_cache_entry *e;
 519
 520                e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
 521                if (e)
 522                        return 0;
 523        }
 524
 525        /* Ignore cache errors */
 526        intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
 527                           *ip - start_ip, intel_pt_insn);
 528
 529        return 0;
 530
 531out_no_cache:
 532        *insn_cnt_ptr = insn_cnt;
 533        return 0;
 534}
 535
 536static bool intel_pt_get_config(struct intel_pt *pt,
 537                                struct perf_event_attr *attr, u64 *config)
 538{
 539        if (attr->type == pt->pmu_type) {
 540                if (config)
 541                        *config = attr->config;
 542                return true;
 543        }
 544
 545        return false;
 546}
 547
 548static bool intel_pt_exclude_kernel(struct intel_pt *pt)
 549{
 550        struct perf_evsel *evsel;
 551
 552        evlist__for_each(pt->session->evlist, evsel) {
 553                if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
 554                    !evsel->attr.exclude_kernel)
 555                        return false;
 556        }
 557        return true;
 558}
 559
 560static bool intel_pt_return_compression(struct intel_pt *pt)
 561{
 562        struct perf_evsel *evsel;
 563        u64 config;
 564
 565        if (!pt->noretcomp_bit)
 566                return true;
 567
 568        evlist__for_each(pt->session->evlist, evsel) {
 569                if (intel_pt_get_config(pt, &evsel->attr, &config) &&
 570                    (config & pt->noretcomp_bit))
 571                        return false;
 572        }
 573        return true;
 574}
 575
 576static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
 577{
 578        struct perf_evsel *evsel;
 579        unsigned int shift;
 580        u64 config;
 581
 582        if (!pt->mtc_freq_bits)
 583                return 0;
 584
 585        for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
 586                config >>= 1;
 587
 588        evlist__for_each(pt->session->evlist, evsel) {
 589                if (intel_pt_get_config(pt, &evsel->attr, &config))
 590                        return (config & pt->mtc_freq_bits) >> shift;
 591        }
 592        return 0;
 593}
 594
 595static bool intel_pt_timeless_decoding(struct intel_pt *pt)
 596{
 597        struct perf_evsel *evsel;
 598        bool timeless_decoding = true;
 599        u64 config;
 600
 601        if (!pt->tsc_bit || !pt->cap_user_time_zero)
 602                return true;
 603
 604        evlist__for_each(pt->session->evlist, evsel) {
 605                if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
 606                        return true;
 607                if (intel_pt_get_config(pt, &evsel->attr, &config)) {
 608                        if (config & pt->tsc_bit)
 609                                timeless_decoding = false;
 610                        else
 611                                return true;
 612                }
 613        }
 614        return timeless_decoding;
 615}
 616
 617static bool intel_pt_tracing_kernel(struct intel_pt *pt)
 618{
 619        struct perf_evsel *evsel;
 620
 621        evlist__for_each(pt->session->evlist, evsel) {
 622                if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
 623                    !evsel->attr.exclude_kernel)
 624                        return true;
 625        }
 626        return false;
 627}
 628
 629static bool intel_pt_have_tsc(struct intel_pt *pt)
 630{
 631        struct perf_evsel *evsel;
 632        bool have_tsc = false;
 633        u64 config;
 634
 635        if (!pt->tsc_bit)
 636                return false;
 637
 638        evlist__for_each(pt->session->evlist, evsel) {
 639                if (intel_pt_get_config(pt, &evsel->attr, &config)) {
 640                        if (config & pt->tsc_bit)
 641                                have_tsc = true;
 642                        else
 643                                return false;
 644                }
 645        }
 646        return have_tsc;
 647}
 648
 649static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
 650{
 651        u64 quot, rem;
 652
 653        quot = ns / pt->tc.time_mult;
 654        rem  = ns % pt->tc.time_mult;
 655        return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
 656                pt->tc.time_mult;
 657}
 658
 659static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 660                                                   unsigned int queue_nr)
 661{
 662        struct intel_pt_params params = { .get_trace = 0, };
 663        struct intel_pt_queue *ptq;
 664
 665        ptq = zalloc(sizeof(struct intel_pt_queue));
 666        if (!ptq)
 667                return NULL;
 668
 669        if (pt->synth_opts.callchain) {
 670                size_t sz = sizeof(struct ip_callchain);
 671
 672                sz += pt->synth_opts.callchain_sz * sizeof(u64);
 673                ptq->chain = zalloc(sz);
 674                if (!ptq->chain)
 675                        goto out_free;
 676        }
 677
 678        ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 679        if (!ptq->event_buf)
 680                goto out_free;
 681
 682        ptq->pt = pt;
 683        ptq->queue_nr = queue_nr;
 684        ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
 685        ptq->pid = -1;
 686        ptq->tid = -1;
 687        ptq->cpu = -1;
 688        ptq->next_tid = -1;
 689
 690        params.get_trace = intel_pt_get_trace;
 691        params.walk_insn = intel_pt_walk_next_insn;
 692        params.data = ptq;
 693        params.return_compression = intel_pt_return_compression(pt);
 694        params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
 695        params.mtc_period = intel_pt_mtc_period(pt);
 696        params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
 697        params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
 698
 699        if (pt->synth_opts.instructions) {
 700                if (pt->synth_opts.period) {
 701                        switch (pt->synth_opts.period_type) {
 702                        case PERF_ITRACE_PERIOD_INSTRUCTIONS:
 703                                params.period_type =
 704                                                INTEL_PT_PERIOD_INSTRUCTIONS;
 705                                params.period = pt->synth_opts.period;
 706                                break;
 707                        case PERF_ITRACE_PERIOD_TICKS:
 708                                params.period_type = INTEL_PT_PERIOD_TICKS;
 709                                params.period = pt->synth_opts.period;
 710                                break;
 711                        case PERF_ITRACE_PERIOD_NANOSECS:
 712                                params.period_type = INTEL_PT_PERIOD_TICKS;
 713                                params.period = intel_pt_ns_to_ticks(pt,
 714                                                        pt->synth_opts.period);
 715                                break;
 716                        default:
 717                                break;
 718                        }
 719                }
 720
 721                if (!params.period) {
 722                        params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
 723                        params.period = 1000;
 724                }
 725        }
 726
 727        ptq->decoder = intel_pt_decoder_new(&params);
 728        if (!ptq->decoder)
 729                goto out_free;
 730
 731        return ptq;
 732
 733out_free:
 734        zfree(&ptq->event_buf);
 735        zfree(&ptq->chain);
 736        free(ptq);
 737        return NULL;
 738}
 739
 740static void intel_pt_free_queue(void *priv)
 741{
 742        struct intel_pt_queue *ptq = priv;
 743
 744        if (!ptq)
 745                return;
 746        thread__zput(ptq->thread);
 747        intel_pt_decoder_free(ptq->decoder);
 748        zfree(&ptq->event_buf);
 749        zfree(&ptq->chain);
 750        free(ptq);
 751}
 752
 753static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
 754                                     struct auxtrace_queue *queue)
 755{
 756        struct intel_pt_queue *ptq = queue->priv;
 757
 758        if (queue->tid == -1 || pt->have_sched_switch) {
 759                ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
 760                thread__zput(ptq->thread);
 761        }
 762
 763        if (!ptq->thread && ptq->tid != -1)
 764                ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
 765
 766        if (ptq->thread) {
 767                ptq->pid = ptq->thread->pid_;
 768                if (queue->cpu == -1)
 769                        ptq->cpu = ptq->thread->cpu;
 770        }
 771}
 772
 773static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
 774{
 775        if (ptq->state->flags & INTEL_PT_ABORT_TX) {
 776                ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
 777        } else if (ptq->state->flags & INTEL_PT_ASYNC) {
 778                if (ptq->state->to_ip)
 779                        ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
 780                                     PERF_IP_FLAG_ASYNC |
 781                                     PERF_IP_FLAG_INTERRUPT;
 782                else
 783                        ptq->flags = PERF_IP_FLAG_BRANCH |
 784                                     PERF_IP_FLAG_TRACE_END;
 785                ptq->insn_len = 0;
 786        } else {
 787                if (ptq->state->from_ip)
 788                        ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
 789                else
 790                        ptq->flags = PERF_IP_FLAG_BRANCH |
 791                                     PERF_IP_FLAG_TRACE_BEGIN;
 792                if (ptq->state->flags & INTEL_PT_IN_TX)
 793                        ptq->flags |= PERF_IP_FLAG_IN_TX;
 794                ptq->insn_len = ptq->state->insn_len;
 795        }
 796}
 797
 798static int intel_pt_setup_queue(struct intel_pt *pt,
 799                                struct auxtrace_queue *queue,
 800                                unsigned int queue_nr)
 801{
 802        struct intel_pt_queue *ptq = queue->priv;
 803
 804        if (list_empty(&queue->head))
 805                return 0;
 806
 807        if (!ptq) {
 808                ptq = intel_pt_alloc_queue(pt, queue_nr);
 809                if (!ptq)
 810                        return -ENOMEM;
 811                queue->priv = ptq;
 812
 813                if (queue->cpu != -1)
 814                        ptq->cpu = queue->cpu;
 815                ptq->tid = queue->tid;
 816
 817                if (pt->sampling_mode) {
 818                        if (pt->timeless_decoding)
 819                                ptq->step_through_buffers = true;
 820                        if (pt->timeless_decoding || !pt->have_sched_switch)
 821                                ptq->use_buffer_pid_tid = true;
 822                }
 823        }
 824
 825        if (!ptq->on_heap &&
 826            (!pt->sync_switch ||
 827             ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
 828                const struct intel_pt_state *state;
 829                int ret;
 830
 831                if (pt->timeless_decoding)
 832                        return 0;
 833
 834                intel_pt_log("queue %u getting timestamp\n", queue_nr);
 835                intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
 836                             queue_nr, ptq->cpu, ptq->pid, ptq->tid);
 837                while (1) {
 838                        state = intel_pt_decode(ptq->decoder);
 839                        if (state->err) {
 840                                if (state->err == INTEL_PT_ERR_NODATA) {
 841                                        intel_pt_log("queue %u has no timestamp\n",
 842                                                     queue_nr);
 843                                        return 0;
 844                                }
 845                                continue;
 846                        }
 847                        if (state->timestamp)
 848                                break;
 849                }
 850
 851                ptq->timestamp = state->timestamp;
 852                intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
 853                             queue_nr, ptq->timestamp);
 854                ptq->state = state;
 855                ptq->have_sample = true;
 856                intel_pt_sample_flags(ptq);
 857                ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
 858                if (ret)
 859                        return ret;
 860                ptq->on_heap = true;
 861        }
 862
 863        return 0;
 864}
 865
 866static int intel_pt_setup_queues(struct intel_pt *pt)
 867{
 868        unsigned int i;
 869        int ret;
 870
 871        for (i = 0; i < pt->queues.nr_queues; i++) {
 872                ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
 873                if (ret)
 874                        return ret;
 875        }
 876        return 0;
 877}
 878
 879static int intel_pt_inject_event(union perf_event *event,
 880                                 struct perf_sample *sample, u64 type,
 881                                 bool swapped)
 882{
 883        event->header.size = perf_event__sample_event_size(sample, type, 0);
 884        return perf_event__synthesize_sample(event, type, 0, sample, swapped);
 885}
 886
 887static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 888{
 889        int ret;
 890        struct intel_pt *pt = ptq->pt;
 891        union perf_event *event = ptq->event_buf;
 892        struct perf_sample sample = { .ip = 0, };
 893
 894        event->sample.header.type = PERF_RECORD_SAMPLE;
 895        event->sample.header.misc = PERF_RECORD_MISC_USER;
 896        event->sample.header.size = sizeof(struct perf_event_header);
 897
 898        if (!pt->timeless_decoding)
 899                sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
 900
 901        sample.ip = ptq->state->from_ip;
 902        sample.pid = ptq->pid;
 903        sample.tid = ptq->tid;
 904        sample.addr = ptq->state->to_ip;
 905        sample.id = ptq->pt->branches_id;
 906        sample.stream_id = ptq->pt->branches_id;
 907        sample.period = 1;
 908        sample.cpu = ptq->cpu;
 909        sample.flags = ptq->flags;
 910        sample.insn_len = ptq->insn_len;
 911
 912        if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
 913                return 0;
 914
 915        if (pt->synth_opts.inject) {
 916                ret = intel_pt_inject_event(event, &sample,
 917                                            pt->branches_sample_type,
 918                                            pt->synth_needs_swap);
 919                if (ret)
 920                        return ret;
 921        }
 922
 923        ret = perf_session__deliver_synth_event(pt->session, event, &sample);
 924        if (ret)
 925                pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
 926                       ret);
 927
 928        return ret;
 929}
 930
 931static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 932{
 933        int ret;
 934        struct intel_pt *pt = ptq->pt;
 935        union perf_event *event = ptq->event_buf;
 936        struct perf_sample sample = { .ip = 0, };
 937
 938        event->sample.header.type = PERF_RECORD_SAMPLE;
 939        event->sample.header.misc = PERF_RECORD_MISC_USER;
 940        event->sample.header.size = sizeof(struct perf_event_header);
 941
 942        if (!pt->timeless_decoding)
 943                sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
 944
 945        sample.ip = ptq->state->from_ip;
 946        sample.pid = ptq->pid;
 947        sample.tid = ptq->tid;
 948        sample.addr = ptq->state->to_ip;
 949        sample.id = ptq->pt->instructions_id;
 950        sample.stream_id = ptq->pt->instructions_id;
 951        sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
 952        sample.cpu = ptq->cpu;
 953        sample.flags = ptq->flags;
 954        sample.insn_len = ptq->insn_len;
 955
 956        ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
 957
 958        if (pt->synth_opts.callchain) {
 959                thread_stack__sample(ptq->thread, ptq->chain,
 960                                     pt->synth_opts.callchain_sz, sample.ip);
 961                sample.callchain = ptq->chain;
 962        }
 963
 964        if (pt->synth_opts.inject) {
 965                ret = intel_pt_inject_event(event, &sample,
 966                                            pt->instructions_sample_type,
 967                                            pt->synth_needs_swap);
 968                if (ret)
 969                        return ret;
 970        }
 971
 972        ret = perf_session__deliver_synth_event(pt->session, event, &sample);
 973        if (ret)
 974                pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
 975                       ret);
 976
 977        return ret;
 978}
 979
 980static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 981{
 982        int ret;
 983        struct intel_pt *pt = ptq->pt;
 984        union perf_event *event = ptq->event_buf;
 985        struct perf_sample sample = { .ip = 0, };
 986
 987        event->sample.header.type = PERF_RECORD_SAMPLE;
 988        event->sample.header.misc = PERF_RECORD_MISC_USER;
 989        event->sample.header.size = sizeof(struct perf_event_header);
 990
 991        if (!pt->timeless_decoding)
 992                sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
 993
 994        sample.ip = ptq->state->from_ip;
 995        sample.pid = ptq->pid;
 996        sample.tid = ptq->tid;
 997        sample.addr = ptq->state->to_ip;
 998        sample.id = ptq->pt->transactions_id;
 999        sample.stream_id = ptq->pt->transactions_id;
1000        sample.period = 1;
1001        sample.cpu = ptq->cpu;
1002        sample.flags = ptq->flags;
1003        sample.insn_len = ptq->insn_len;
1004
1005        if (pt->synth_opts.callchain) {
1006                thread_stack__sample(ptq->thread, ptq->chain,
1007                                     pt->synth_opts.callchain_sz, sample.ip);
1008                sample.callchain = ptq->chain;
1009        }
1010
1011        if (pt->synth_opts.inject) {
1012                ret = intel_pt_inject_event(event, &sample,
1013                                            pt->transactions_sample_type,
1014                                            pt->synth_needs_swap);
1015                if (ret)
1016                        return ret;
1017        }
1018
1019        ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1020        if (ret)
1021                pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
1022                       ret);
1023
1024        return ret;
1025}
1026
1027static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1028                                pid_t pid, pid_t tid, u64 ip)
1029{
1030        union perf_event event;
1031        char msg[MAX_AUXTRACE_ERROR_MSG];
1032        int err;
1033
1034        intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1035
1036        auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1037                             code, cpu, pid, tid, ip, msg);
1038
1039        err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1040        if (err)
1041                pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1042                       err);
1043
1044        return err;
1045}
1046
1047static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1048{
1049        struct auxtrace_queue *queue;
1050        pid_t tid = ptq->next_tid;
1051        int err;
1052
1053        if (tid == -1)
1054                return 0;
1055
1056        intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1057
1058        err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1059
1060        queue = &pt->queues.queue_array[ptq->queue_nr];
1061        intel_pt_set_pid_tid_cpu(pt, queue);
1062
1063        ptq->next_tid = -1;
1064
1065        return err;
1066}
1067
1068static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1069{
1070        struct intel_pt *pt = ptq->pt;
1071
1072        return ip == pt->switch_ip &&
1073               (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1074               !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1075                               PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1076}
1077
1078static int intel_pt_sample(struct intel_pt_queue *ptq)
1079{
1080        const struct intel_pt_state *state = ptq->state;
1081        struct intel_pt *pt = ptq->pt;
1082        int err;
1083
1084        if (!ptq->have_sample)
1085                return 0;
1086
1087        ptq->have_sample = false;
1088
1089        if (pt->sample_instructions &&
1090            (state->type & INTEL_PT_INSTRUCTION)) {
1091                err = intel_pt_synth_instruction_sample(ptq);
1092                if (err)
1093                        return err;
1094        }
1095
1096        if (pt->sample_transactions &&
1097            (state->type & INTEL_PT_TRANSACTION)) {
1098                err = intel_pt_synth_transaction_sample(ptq);
1099                if (err)
1100                        return err;
1101        }
1102
1103        if (!(state->type & INTEL_PT_BRANCH))
1104                return 0;
1105
1106        if (pt->synth_opts.callchain)
1107                thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1108                                    state->to_ip, ptq->insn_len,
1109                                    state->trace_nr);
1110        else
1111                thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
1112
1113        if (pt->sample_branches) {
1114                err = intel_pt_synth_branch_sample(ptq);
1115                if (err)
1116                        return err;
1117        }
1118
1119        if (!pt->sync_switch)
1120                return 0;
1121
1122        if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1123                switch (ptq->switch_state) {
1124                case INTEL_PT_SS_UNKNOWN:
1125                case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1126                        err = intel_pt_next_tid(pt, ptq);
1127                        if (err)
1128                                return err;
1129                        ptq->switch_state = INTEL_PT_SS_TRACING;
1130                        break;
1131                default:
1132                        ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1133                        return 1;
1134                }
1135        } else if (!state->to_ip) {
1136                ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1137        } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1138                ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1139        } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1140                   state->to_ip == pt->ptss_ip &&
1141                   (ptq->flags & PERF_IP_FLAG_CALL)) {
1142                ptq->switch_state = INTEL_PT_SS_TRACING;
1143        }
1144
1145        return 0;
1146}
1147
1148static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip)
1149{
1150        struct map *map;
1151        struct symbol *sym, *start;
1152        u64 ip, switch_ip = 0;
1153
1154        if (ptss_ip)
1155                *ptss_ip = 0;
1156
1157        map = machine__kernel_map(machine, MAP__FUNCTION);
1158        if (!map)
1159                return 0;
1160
1161        if (map__load(map, machine->symbol_filter))
1162                return 0;
1163
1164        start = dso__first_symbol(map->dso, MAP__FUNCTION);
1165
1166        for (sym = start; sym; sym = dso__next_symbol(sym)) {
1167                if (sym->binding == STB_GLOBAL &&
1168                    !strcmp(sym->name, "__switch_to")) {
1169                        ip = map->unmap_ip(map, sym->start);
1170                        if (ip >= map->start && ip < map->end) {
1171                                switch_ip = ip;
1172                                break;
1173                        }
1174                }
1175        }
1176
1177        if (!switch_ip || !ptss_ip)
1178                return 0;
1179
1180        for (sym = start; sym; sym = dso__next_symbol(sym)) {
1181                if (!strcmp(sym->name, "perf_trace_sched_switch")) {
1182                        ip = map->unmap_ip(map, sym->start);
1183                        if (ip >= map->start && ip < map->end) {
1184                                *ptss_ip = ip;
1185                                break;
1186                        }
1187                }
1188        }
1189
1190        return switch_ip;
1191}
1192
1193static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1194{
1195        const struct intel_pt_state *state = ptq->state;
1196        struct intel_pt *pt = ptq->pt;
1197        int err;
1198
1199        if (!pt->kernel_start) {
1200                pt->kernel_start = machine__kernel_start(pt->machine);
1201                if (pt->per_cpu_mmaps && pt->have_sched_switch &&
1202                    !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1203                    !pt->sampling_mode) {
1204                        pt->switch_ip = intel_pt_switch_ip(pt->machine,
1205                                                           &pt->ptss_ip);
1206                        if (pt->switch_ip) {
1207                                intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1208                                             pt->switch_ip, pt->ptss_ip);
1209                                pt->sync_switch = true;
1210                        }
1211                }
1212        }
1213
1214        intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1215                     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1216        while (1) {
1217                err = intel_pt_sample(ptq);
1218                if (err)
1219                        return err;
1220
1221                state = intel_pt_decode(ptq->decoder);
1222                if (state->err) {
1223                        if (state->err == INTEL_PT_ERR_NODATA)
1224                                return 1;
1225                        if (pt->sync_switch &&
1226                            state->from_ip >= pt->kernel_start) {
1227                                pt->sync_switch = false;
1228                                intel_pt_next_tid(pt, ptq);
1229                        }
1230                        if (pt->synth_opts.errors) {
1231                                err = intel_pt_synth_error(pt, state->err,
1232                                                           ptq->cpu, ptq->pid,
1233                                                           ptq->tid,
1234                                                           state->from_ip);
1235                                if (err)
1236                                        return err;
1237                        }
1238                        continue;
1239                }
1240
1241                ptq->state = state;
1242                ptq->have_sample = true;
1243                intel_pt_sample_flags(ptq);
1244
1245                /* Use estimated TSC upon return to user space */
1246                if (pt->est_tsc &&
1247                    (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1248                    state->to_ip && state->to_ip < pt->kernel_start) {
1249                        intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1250                                     state->timestamp, state->est_timestamp);
1251                        ptq->timestamp = state->est_timestamp;
1252                /* Use estimated TSC in unknown switch state */
1253                } else if (pt->sync_switch &&
1254                           ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1255                           intel_pt_is_switch_ip(ptq, state->to_ip) &&
1256                           ptq->next_tid == -1) {
1257                        intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1258                                     state->timestamp, state->est_timestamp);
1259                        ptq->timestamp = state->est_timestamp;
1260                } else if (state->timestamp > ptq->timestamp) {
1261                        ptq->timestamp = state->timestamp;
1262                }
1263
1264                if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1265                        *timestamp = ptq->timestamp;
1266                        return 0;
1267                }
1268        }
1269        return 0;
1270}
1271
1272static inline int intel_pt_update_queues(struct intel_pt *pt)
1273{
1274        if (pt->queues.new_data) {
1275                pt->queues.new_data = false;
1276                return intel_pt_setup_queues(pt);
1277        }
1278        return 0;
1279}
1280
1281static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1282{
1283        unsigned int queue_nr;
1284        u64 ts;
1285        int ret;
1286
1287        while (1) {
1288                struct auxtrace_queue *queue;
1289                struct intel_pt_queue *ptq;
1290
1291                if (!pt->heap.heap_cnt)
1292                        return 0;
1293
1294                if (pt->heap.heap_array[0].ordinal >= timestamp)
1295                        return 0;
1296
1297                queue_nr = pt->heap.heap_array[0].queue_nr;
1298                queue = &pt->queues.queue_array[queue_nr];
1299                ptq = queue->priv;
1300
1301                intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1302                             queue_nr, pt->heap.heap_array[0].ordinal,
1303                             timestamp);
1304
1305                auxtrace_heap__pop(&pt->heap);
1306
1307                if (pt->heap.heap_cnt) {
1308                        ts = pt->heap.heap_array[0].ordinal + 1;
1309                        if (ts > timestamp)
1310                                ts = timestamp;
1311                } else {
1312                        ts = timestamp;
1313                }
1314
1315                intel_pt_set_pid_tid_cpu(pt, queue);
1316
1317                ret = intel_pt_run_decoder(ptq, &ts);
1318
1319                if (ret < 0) {
1320                        auxtrace_heap__add(&pt->heap, queue_nr, ts);
1321                        return ret;
1322                }
1323
1324                if (!ret) {
1325                        ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1326                        if (ret < 0)
1327                                return ret;
1328                } else {
1329                        ptq->on_heap = false;
1330                }
1331        }
1332
1333        return 0;
1334}
1335
1336static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1337                                            u64 time_)
1338{
1339        struct auxtrace_queues *queues = &pt->queues;
1340        unsigned int i;
1341        u64 ts = 0;
1342
1343        for (i = 0; i < queues->nr_queues; i++) {
1344                struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1345                struct intel_pt_queue *ptq = queue->priv;
1346
1347                if (ptq && (tid == -1 || ptq->tid == tid)) {
1348                        ptq->time = time_;
1349                        intel_pt_set_pid_tid_cpu(pt, queue);
1350                        intel_pt_run_decoder(ptq, &ts);
1351                }
1352        }
1353        return 0;
1354}
1355
1356static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1357{
1358        return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1359                                    sample->pid, sample->tid, 0);
1360}
1361
1362static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1363{
1364        unsigned i, j;
1365
1366        if (cpu < 0 || !pt->queues.nr_queues)
1367                return NULL;
1368
1369        if ((unsigned)cpu >= pt->queues.nr_queues)
1370                i = pt->queues.nr_queues - 1;
1371        else
1372                i = cpu;
1373
1374        if (pt->queues.queue_array[i].cpu == cpu)
1375                return pt->queues.queue_array[i].priv;
1376
1377        for (j = 0; i > 0; j++) {
1378                if (pt->queues.queue_array[--i].cpu == cpu)
1379                        return pt->queues.queue_array[i].priv;
1380        }
1381
1382        for (; j < pt->queues.nr_queues; j++) {
1383                if (pt->queues.queue_array[j].cpu == cpu)
1384                        return pt->queues.queue_array[j].priv;
1385        }
1386
1387        return NULL;
1388}
1389
1390static int intel_pt_process_switch(struct intel_pt *pt,
1391                                   struct perf_sample *sample)
1392{
1393        struct intel_pt_queue *ptq;
1394        struct perf_evsel *evsel;
1395        pid_t tid;
1396        int cpu, err;
1397
1398        evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1399        if (evsel != pt->switch_evsel)
1400                return 0;
1401
1402        tid = perf_evsel__intval(evsel, sample, "next_pid");
1403        cpu = sample->cpu;
1404
1405        intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1406                     cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1407                     &pt->tc));
1408
1409        if (!pt->sync_switch)
1410                goto out;
1411
1412        ptq = intel_pt_cpu_to_ptq(pt, cpu);
1413        if (!ptq)
1414                goto out;
1415
1416        switch (ptq->switch_state) {
1417        case INTEL_PT_SS_NOT_TRACING:
1418                ptq->next_tid = -1;
1419                break;
1420        case INTEL_PT_SS_UNKNOWN:
1421        case INTEL_PT_SS_TRACING:
1422                ptq->next_tid = tid;
1423                ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
1424                return 0;
1425        case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
1426                if (!ptq->on_heap) {
1427                        ptq->timestamp = perf_time_to_tsc(sample->time,
1428                                                          &pt->tc);
1429                        err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1430                                                 ptq->timestamp);
1431                        if (err)
1432                                return err;
1433                        ptq->on_heap = true;
1434                }
1435                ptq->switch_state = INTEL_PT_SS_TRACING;
1436                break;
1437        case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1438                ptq->next_tid = tid;
1439                intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1440                break;
1441        default:
1442                break;
1443        }
1444out:
1445        return machine__set_current_tid(pt->machine, cpu, -1, tid);
1446}
1447
1448static int intel_pt_process_itrace_start(struct intel_pt *pt,
1449                                         union perf_event *event,
1450                                         struct perf_sample *sample)
1451{
1452        if (!pt->per_cpu_mmaps)
1453                return 0;
1454
1455        intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1456                     sample->cpu, event->itrace_start.pid,
1457                     event->itrace_start.tid, sample->time,
1458                     perf_time_to_tsc(sample->time, &pt->tc));
1459
1460        return machine__set_current_tid(pt->machine, sample->cpu,
1461                                        event->itrace_start.pid,
1462                                        event->itrace_start.tid);
1463}
1464
1465static int intel_pt_process_event(struct perf_session *session,
1466                                  union perf_event *event,
1467                                  struct perf_sample *sample,
1468                                  struct perf_tool *tool)
1469{
1470        struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1471                                           auxtrace);
1472        u64 timestamp;
1473        int err = 0;
1474
1475        if (dump_trace)
1476                return 0;
1477
1478        if (!tool->ordered_events) {
1479                pr_err("Intel Processor Trace requires ordered events\n");
1480                return -EINVAL;
1481        }
1482
1483        if (sample->time && sample->time != (u64)-1)
1484                timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1485        else
1486                timestamp = 0;
1487
1488        if (timestamp || pt->timeless_decoding) {
1489                err = intel_pt_update_queues(pt);
1490                if (err)
1491                        return err;
1492        }
1493
1494        if (pt->timeless_decoding) {
1495                if (event->header.type == PERF_RECORD_EXIT) {
1496                        err = intel_pt_process_timeless_queues(pt,
1497                                                               event->fork.tid,
1498                                                               sample->time);
1499                }
1500        } else if (timestamp) {
1501                err = intel_pt_process_queues(pt, timestamp);
1502        }
1503        if (err)
1504                return err;
1505
1506        if (event->header.type == PERF_RECORD_AUX &&
1507            (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1508            pt->synth_opts.errors) {
1509                err = intel_pt_lost(pt, sample);
1510                if (err)
1511                        return err;
1512        }
1513
1514        if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1515                err = intel_pt_process_switch(pt, sample);
1516        else if (event->header.type == PERF_RECORD_ITRACE_START)
1517                err = intel_pt_process_itrace_start(pt, event, sample);
1518
1519        intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
1520                     perf_event__name(event->header.type), event->header.type,
1521                     sample->cpu, sample->time, timestamp);
1522
1523        return err;
1524}
1525
1526static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
1527{
1528        struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1529                                           auxtrace);
1530        int ret;
1531
1532        if (dump_trace)
1533                return 0;
1534
1535        if (!tool->ordered_events)
1536                return -EINVAL;
1537
1538        ret = intel_pt_update_queues(pt);
1539        if (ret < 0)
1540                return ret;
1541
1542        if (pt->timeless_decoding)
1543                return intel_pt_process_timeless_queues(pt, -1,
1544                                                        MAX_TIMESTAMP - 1);
1545
1546        return intel_pt_process_queues(pt, MAX_TIMESTAMP);
1547}
1548
1549static void intel_pt_free_events(struct perf_session *session)
1550{
1551        struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1552                                           auxtrace);
1553        struct auxtrace_queues *queues = &pt->queues;
1554        unsigned int i;
1555
1556        for (i = 0; i < queues->nr_queues; i++) {
1557                intel_pt_free_queue(queues->queue_array[i].priv);
1558                queues->queue_array[i].priv = NULL;
1559        }
1560        intel_pt_log_disable();
1561        auxtrace_queues__free(queues);
1562}
1563
1564static void intel_pt_free(struct perf_session *session)
1565{
1566        struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1567                                           auxtrace);
1568
1569        auxtrace_heap__free(&pt->heap);
1570        intel_pt_free_events(session);
1571        session->auxtrace = NULL;
1572        thread__delete(pt->unknown_thread);
1573        free(pt);
1574}
1575
1576static int intel_pt_process_auxtrace_event(struct perf_session *session,
1577                                           union perf_event *event,
1578                                           struct perf_tool *tool __maybe_unused)
1579{
1580        struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1581                                           auxtrace);
1582
1583        if (pt->sampling_mode)
1584                return 0;
1585
1586        if (!pt->data_queued) {
1587                struct auxtrace_buffer *buffer;
1588                off_t data_offset;
1589                int fd = perf_data_file__fd(session->file);
1590                int err;
1591
1592                if (perf_data_file__is_pipe(session->file)) {
1593                        data_offset = 0;
1594                } else {
1595                        data_offset = lseek(fd, 0, SEEK_CUR);
1596                        if (data_offset == -1)
1597                                return -errno;
1598                }
1599
1600                err = auxtrace_queues__add_event(&pt->queues, session, event,
1601                                                 data_offset, &buffer);
1602                if (err)
1603                        return err;
1604
1605                /* Dump here now we have copied a piped trace out of the pipe */
1606                if (dump_trace) {
1607                        if (auxtrace_buffer__get_data(buffer, fd)) {
1608                                intel_pt_dump_event(pt, buffer->data,
1609                                                    buffer->size);
1610                                auxtrace_buffer__put_data(buffer);
1611                        }
1612                }
1613        }
1614
1615        return 0;
1616}
1617
1618struct intel_pt_synth {
1619        struct perf_tool dummy_tool;
1620        struct perf_session *session;
1621};
1622
1623static int intel_pt_event_synth(struct perf_tool *tool,
1624                                union perf_event *event,
1625                                struct perf_sample *sample __maybe_unused,
1626                                struct machine *machine __maybe_unused)
1627{
1628        struct intel_pt_synth *intel_pt_synth =
1629                        container_of(tool, struct intel_pt_synth, dummy_tool);
1630
1631        return perf_session__deliver_synth_event(intel_pt_synth->session, event,
1632                                                 NULL);
1633}
1634
1635static int intel_pt_synth_event(struct perf_session *session,
1636                                struct perf_event_attr *attr, u64 id)
1637{
1638        struct intel_pt_synth intel_pt_synth;
1639
1640        memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
1641        intel_pt_synth.session = session;
1642
1643        return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
1644                                           &id, intel_pt_event_synth);
1645}
1646
1647static int intel_pt_synth_events(struct intel_pt *pt,
1648                                 struct perf_session *session)
1649{
1650        struct perf_evlist *evlist = session->evlist;
1651        struct perf_evsel *evsel;
1652        struct perf_event_attr attr;
1653        bool found = false;
1654        u64 id;
1655        int err;
1656
1657        evlist__for_each(evlist, evsel) {
1658                if (evsel->attr.type == pt->pmu_type && evsel->ids) {
1659                        found = true;
1660                        break;
1661                }
1662        }
1663
1664        if (!found) {
1665                pr_debug("There are no selected events with Intel Processor Trace data\n");
1666                return 0;
1667        }
1668
1669        memset(&attr, 0, sizeof(struct perf_event_attr));
1670        attr.size = sizeof(struct perf_event_attr);
1671        attr.type = PERF_TYPE_HARDWARE;
1672        attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1673        attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1674                            PERF_SAMPLE_PERIOD;
1675        if (pt->timeless_decoding)
1676                attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1677        else
1678                attr.sample_type |= PERF_SAMPLE_TIME;
1679        if (!pt->per_cpu_mmaps)
1680                attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
1681        attr.exclude_user = evsel->attr.exclude_user;
1682        attr.exclude_kernel = evsel->attr.exclude_kernel;
1683        attr.exclude_hv = evsel->attr.exclude_hv;
1684        attr.exclude_host = evsel->attr.exclude_host;
1685        attr.exclude_guest = evsel->attr.exclude_guest;
1686        attr.sample_id_all = evsel->attr.sample_id_all;
1687        attr.read_format = evsel->attr.read_format;
1688
1689        id = evsel->id[0] + 1000000000;
1690        if (!id)
1691                id = 1;
1692
1693        if (pt->synth_opts.instructions) {
1694                attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1695                if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
1696                        attr.sample_period =
1697                                intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
1698                else
1699                        attr.sample_period = pt->synth_opts.period;
1700                pt->instructions_sample_period = attr.sample_period;
1701                if (pt->synth_opts.callchain)
1702                        attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1703                pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1704                         id, (u64)attr.sample_type);
1705                err = intel_pt_synth_event(session, &attr, id);
1706                if (err) {
1707                        pr_err("%s: failed to synthesize 'instructions' event type\n",
1708                               __func__);
1709                        return err;
1710                }
1711                pt->sample_instructions = true;
1712                pt->instructions_sample_type = attr.sample_type;
1713                pt->instructions_id = id;
1714                id += 1;
1715        }
1716
1717        if (pt->synth_opts.transactions) {
1718                attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1719                attr.sample_period = 1;
1720                if (pt->synth_opts.callchain)
1721                        attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1722                pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1723                         id, (u64)attr.sample_type);
1724                err = intel_pt_synth_event(session, &attr, id);
1725                if (err) {
1726                        pr_err("%s: failed to synthesize 'transactions' event type\n",
1727                               __func__);
1728                        return err;
1729                }
1730                pt->sample_transactions = true;
1731                pt->transactions_id = id;
1732                id += 1;
1733                evlist__for_each(evlist, evsel) {
1734                        if (evsel->id && evsel->id[0] == pt->transactions_id) {
1735                                if (evsel->name)
1736                                        zfree(&evsel->name);
1737                                evsel->name = strdup("transactions");
1738                                break;
1739                        }
1740                }
1741        }
1742
1743        if (pt->synth_opts.branches) {
1744                attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1745                attr.sample_period = 1;
1746                attr.sample_type |= PERF_SAMPLE_ADDR;
1747                attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
1748                pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1749                         id, (u64)attr.sample_type);
1750                err = intel_pt_synth_event(session, &attr, id);
1751                if (err) {
1752                        pr_err("%s: failed to synthesize 'branches' event type\n",
1753                               __func__);
1754                        return err;
1755                }
1756                pt->sample_branches = true;
1757                pt->branches_sample_type = attr.sample_type;
1758                pt->branches_id = id;
1759        }
1760
1761        pt->synth_needs_swap = evsel->needs_swap;
1762
1763        return 0;
1764}
1765
1766static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
1767{
1768        struct perf_evsel *evsel;
1769
1770        evlist__for_each_reverse(evlist, evsel) {
1771                const char *name = perf_evsel__name(evsel);
1772
1773                if (!strcmp(name, "sched:sched_switch"))
1774                        return evsel;
1775        }
1776
1777        return NULL;
1778}
1779
1780static const char * const intel_pt_info_fmts[] = {
1781        [INTEL_PT_PMU_TYPE]             = "  PMU Type            %"PRId64"\n",
1782        [INTEL_PT_TIME_SHIFT]           = "  Time Shift          %"PRIu64"\n",
1783        [INTEL_PT_TIME_MULT]            = "  Time Muliplier      %"PRIu64"\n",
1784        [INTEL_PT_TIME_ZERO]            = "  Time Zero           %"PRIu64"\n",
1785        [INTEL_PT_CAP_USER_TIME_ZERO]   = "  Cap Time Zero       %"PRId64"\n",
1786        [INTEL_PT_TSC_BIT]              = "  TSC bit             %#"PRIx64"\n",
1787        [INTEL_PT_NORETCOMP_BIT]        = "  NoRETComp bit       %#"PRIx64"\n",
1788        [INTEL_PT_HAVE_SCHED_SWITCH]    = "  Have sched_switch   %"PRId64"\n",
1789        [INTEL_PT_SNAPSHOT_MODE]        = "  Snapshot mode       %"PRId64"\n",
1790        [INTEL_PT_PER_CPU_MMAPS]        = "  Per-cpu maps        %"PRId64"\n",
1791        [INTEL_PT_MTC_BIT]              = "  MTC bit             %#"PRIx64"\n",
1792        [INTEL_PT_TSC_CTC_N]            = "  TSC:CTC numerator   %"PRIu64"\n",
1793        [INTEL_PT_TSC_CTC_D]            = "  TSC:CTC denominator %"PRIu64"\n",
1794        [INTEL_PT_CYC_BIT]              = "  CYC bit             %#"PRIx64"\n",
1795};
1796
1797static void intel_pt_print_info(u64 *arr, int start, int finish)
1798{
1799        int i;
1800
1801        if (!dump_trace)
1802                return;
1803
1804        for (i = start; i <= finish; i++)
1805                fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
1806}
1807
1808int intel_pt_process_auxtrace_info(union perf_event *event,
1809                                   struct perf_session *session)
1810{
1811        struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
1812        size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
1813        struct intel_pt *pt;
1814        int err;
1815
1816        if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
1817                                        min_sz)
1818                return -EINVAL;
1819
1820        pt = zalloc(sizeof(struct intel_pt));
1821        if (!pt)
1822                return -ENOMEM;
1823
1824        err = auxtrace_queues__init(&pt->queues);
1825        if (err)
1826                goto err_free;
1827
1828        intel_pt_log_set_name(INTEL_PT_PMU_NAME);
1829
1830        pt->session = session;
1831        pt->machine = &session->machines.host; /* No kvm support */
1832        pt->auxtrace_type = auxtrace_info->type;
1833        pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
1834        pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
1835        pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
1836        pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
1837        pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
1838        pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
1839        pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
1840        pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
1841        pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
1842        pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
1843        intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
1844                            INTEL_PT_PER_CPU_MMAPS);
1845
1846        if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
1847                                        (sizeof(u64) * INTEL_PT_CYC_BIT)) {
1848                pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
1849                pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
1850                pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
1851                pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
1852                pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
1853                intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
1854                                    INTEL_PT_CYC_BIT);
1855        }
1856
1857        pt->timeless_decoding = intel_pt_timeless_decoding(pt);
1858        pt->have_tsc = intel_pt_have_tsc(pt);
1859        pt->sampling_mode = false;
1860        pt->est_tsc = !pt->timeless_decoding;
1861
1862        pt->unknown_thread = thread__new(999999999, 999999999);
1863        if (!pt->unknown_thread) {
1864                err = -ENOMEM;
1865                goto err_free_queues;
1866        }
1867        err = thread__set_comm(pt->unknown_thread, "unknown", 0);
1868        if (err)
1869                goto err_delete_thread;
1870        if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
1871                err = -ENOMEM;
1872                goto err_delete_thread;
1873        }
1874
1875        pt->auxtrace.process_event = intel_pt_process_event;
1876        pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
1877        pt->auxtrace.flush_events = intel_pt_flush;
1878        pt->auxtrace.free_events = intel_pt_free_events;
1879        pt->auxtrace.free = intel_pt_free;
1880        session->auxtrace = &pt->auxtrace;
1881
1882        if (dump_trace)
1883                return 0;
1884
1885        if (pt->have_sched_switch == 1) {
1886                pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
1887                if (!pt->switch_evsel) {
1888                        pr_err("%s: missing sched_switch event\n", __func__);
1889                        goto err_delete_thread;
1890                }
1891        }
1892
1893        if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
1894                pt->synth_opts = *session->itrace_synth_opts;
1895        } else {
1896                itrace_synth_opts__set_default(&pt->synth_opts);
1897                if (use_browser != -1) {
1898                        pt->synth_opts.branches = false;
1899                        pt->synth_opts.callchain = true;
1900                }
1901        }
1902
1903        if (pt->synth_opts.log)
1904                intel_pt_log_enable();
1905
1906        /* Maximum non-turbo ratio is TSC freq / 100 MHz */
1907        if (pt->tc.time_mult) {
1908                u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
1909
1910                pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
1911                intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
1912                intel_pt_log("Maximum non-turbo ratio %u\n",
1913                             pt->max_non_turbo_ratio);
1914        }
1915
1916        if (pt->synth_opts.calls)
1917                pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
1918                                       PERF_IP_FLAG_TRACE_END;
1919        if (pt->synth_opts.returns)
1920                pt->branches_filter |= PERF_IP_FLAG_RETURN |
1921                                       PERF_IP_FLAG_TRACE_BEGIN;
1922
1923        if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
1924                symbol_conf.use_callchain = true;
1925                if (callchain_register_param(&callchain_param) < 0) {
1926                        symbol_conf.use_callchain = false;
1927                        pt->synth_opts.callchain = false;
1928                }
1929        }
1930
1931        err = intel_pt_synth_events(pt, session);
1932        if (err)
1933                goto err_delete_thread;
1934
1935        err = auxtrace_queues__process_index(&pt->queues, session);
1936        if (err)
1937                goto err_delete_thread;
1938
1939        if (pt->queues.populated)
1940                pt->data_queued = true;
1941
1942        if (pt->timeless_decoding)
1943                pr_debug2("Intel PT decoding without timestamps\n");
1944
1945        return 0;
1946
1947err_delete_thread:
1948        thread__delete(pt->unknown_thread);
1949err_free_queues:
1950        intel_pt_log_disable();
1951        auxtrace_queues__free(&pt->queues);
1952        session->auxtrace = NULL;
1953err_free:
1954        free(pt);
1955        return err;
1956}
1957