linux/tools/perf/util/intel-pt.c
<<
>>
Prefs
   1/*
   2 * intel_pt.c: Intel Processor Trace support
   3 * Copyright (c) 2013-2015, Intel Corporation.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms and conditions of the GNU General Public License,
   7 * version 2, as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope it will be useful, but WITHOUT
  10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12 * more details.
  13 *
  14 */
  15
  16#include <stdio.h>
  17#include <stdbool.h>
  18#include <errno.h>
  19#include <linux/kernel.h>
  20#include <linux/types.h>
  21
  22#include "../perf.h"
  23#include "session.h"
  24#include "machine.h"
  25#include "sort.h"
  26#include "tool.h"
  27#include "event.h"
  28#include "evlist.h"
  29#include "evsel.h"
  30#include "map.h"
  31#include "color.h"
  32#include "util.h"
  33#include "thread.h"
  34#include "thread-stack.h"
  35#include "symbol.h"
  36#include "callchain.h"
  37#include "dso.h"
  38#include "debug.h"
  39#include "auxtrace.h"
  40#include "tsc.h"
  41#include "intel-pt.h"
  42#include "config.h"
  43
  44#include "intel-pt-decoder/intel-pt-log.h"
  45#include "intel-pt-decoder/intel-pt-decoder.h"
  46#include "intel-pt-decoder/intel-pt-insn-decoder.h"
  47#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
  48
  49#define MAX_TIMESTAMP (~0ULL)
  50
  51struct intel_pt {
  52        struct auxtrace auxtrace;
  53        struct auxtrace_queues queues;
  54        struct auxtrace_heap heap;
  55        u32 auxtrace_type;
  56        struct perf_session *session;
  57        struct machine *machine;
  58        struct perf_evsel *switch_evsel;
  59        struct thread *unknown_thread;
  60        bool timeless_decoding;
  61        bool sampling_mode;
  62        bool snapshot_mode;
  63        bool per_cpu_mmaps;
  64        bool have_tsc;
  65        bool data_queued;
  66        bool est_tsc;
  67        bool sync_switch;
  68        bool mispred_all;
  69        int have_sched_switch;
  70        u32 pmu_type;
  71        u64 kernel_start;
  72        u64 switch_ip;
  73        u64 ptss_ip;
  74
  75        struct perf_tsc_conversion tc;
  76        bool cap_user_time_zero;
  77
  78        struct itrace_synth_opts synth_opts;
  79
  80        bool sample_instructions;
  81        u64 instructions_sample_type;
  82        u64 instructions_sample_period;
  83        u64 instructions_id;
  84
  85        bool sample_branches;
  86        u32 branches_filter;
  87        u64 branches_sample_type;
  88        u64 branches_id;
  89
  90        bool sample_transactions;
  91        u64 transactions_sample_type;
  92        u64 transactions_id;
  93
  94        bool synth_needs_swap;
  95
  96        u64 tsc_bit;
  97        u64 mtc_bit;
  98        u64 mtc_freq_bits;
  99        u32 tsc_ctc_ratio_n;
 100        u32 tsc_ctc_ratio_d;
 101        u64 cyc_bit;
 102        u64 noretcomp_bit;
 103        unsigned max_non_turbo_ratio;
 104
 105        unsigned long num_events;
 106
 107        char *filter;
 108        struct addr_filters filts;
 109};
 110
 111enum switch_state {
 112        INTEL_PT_SS_NOT_TRACING,
 113        INTEL_PT_SS_UNKNOWN,
 114        INTEL_PT_SS_TRACING,
 115        INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
 116        INTEL_PT_SS_EXPECTING_SWITCH_IP,
 117};
 118
 119struct intel_pt_queue {
 120        struct intel_pt *pt;
 121        unsigned int queue_nr;
 122        struct auxtrace_buffer *buffer;
 123        void *decoder;
 124        const struct intel_pt_state *state;
 125        struct ip_callchain *chain;
 126        struct branch_stack *last_branch;
 127        struct branch_stack *last_branch_rb;
 128        size_t last_branch_pos;
 129        union perf_event *event_buf;
 130        bool on_heap;
 131        bool stop;
 132        bool step_through_buffers;
 133        bool use_buffer_pid_tid;
 134        pid_t pid, tid;
 135        int cpu;
 136        int switch_state;
 137        pid_t next_tid;
 138        struct thread *thread;
 139        bool exclude_kernel;
 140        bool have_sample;
 141        u64 time;
 142        u64 timestamp;
 143        u32 flags;
 144        u16 insn_len;
 145        u64 last_insn_cnt;
 146};
 147
 148static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
 149                          unsigned char *buf, size_t len)
 150{
 151        struct intel_pt_pkt packet;
 152        size_t pos = 0;
 153        int ret, pkt_len, i;
 154        char desc[INTEL_PT_PKT_DESC_MAX];
 155        const char *color = PERF_COLOR_BLUE;
 156
 157        color_fprintf(stdout, color,
 158                      ". ... Intel Processor Trace data: size %zu bytes\n",
 159                      len);
 160
 161        while (len) {
 162                ret = intel_pt_get_packet(buf, len, &packet);
 163                if (ret > 0)
 164                        pkt_len = ret;
 165                else
 166                        pkt_len = 1;
 167                printf(".");
 168                color_fprintf(stdout, color, "  %08x: ", pos);
 169                for (i = 0; i < pkt_len; i++)
 170                        color_fprintf(stdout, color, " %02x", buf[i]);
 171                for (; i < 16; i++)
 172                        color_fprintf(stdout, color, "   ");
 173                if (ret > 0) {
 174                        ret = intel_pt_pkt_desc(&packet, desc,
 175                                                INTEL_PT_PKT_DESC_MAX);
 176                        if (ret > 0)
 177                                color_fprintf(stdout, color, " %s\n", desc);
 178                } else {
 179                        color_fprintf(stdout, color, " Bad packet!\n");
 180                }
 181                pos += pkt_len;
 182                buf += pkt_len;
 183                len -= pkt_len;
 184        }
 185}
 186
 187static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
 188                                size_t len)
 189{
 190        printf(".\n");
 191        intel_pt_dump(pt, buf, len);
 192}
 193
 194static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
 195                                   struct auxtrace_buffer *b)
 196{
 197        void *start;
 198
 199        start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
 200                                      pt->have_tsc);
 201        if (!start)
 202                return -EINVAL;
 203        b->use_size = b->data + b->size - start;
 204        b->use_data = start;
 205        return 0;
 206}
 207
 208static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
 209                                        struct auxtrace_queue *queue,
 210                                        struct auxtrace_buffer *buffer)
 211{
 212        if (queue->cpu == -1 && buffer->cpu != -1)
 213                ptq->cpu = buffer->cpu;
 214
 215        ptq->pid = buffer->pid;
 216        ptq->tid = buffer->tid;
 217
 218        intel_pt_log("queue %u cpu %d pid %d tid %d\n",
 219                     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
 220
 221        thread__zput(ptq->thread);
 222
 223        if (ptq->tid != -1) {
 224                if (ptq->pid != -1)
 225                        ptq->thread = machine__findnew_thread(ptq->pt->machine,
 226                                                              ptq->pid,
 227                                                              ptq->tid);
 228                else
 229                        ptq->thread = machine__find_thread(ptq->pt->machine, -1,
 230                                                           ptq->tid);
 231        }
 232}
 233
 234/* This function assumes data is processed sequentially only */
 235static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
 236{
 237        struct intel_pt_queue *ptq = data;
 238        struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
 239        struct auxtrace_queue *queue;
 240
 241        if (ptq->stop) {
 242                b->len = 0;
 243                return 0;
 244        }
 245
 246        queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
 247next:
 248        buffer = auxtrace_buffer__next(queue, buffer);
 249        if (!buffer) {
 250                if (old_buffer)
 251                        auxtrace_buffer__drop_data(old_buffer);
 252                b->len = 0;
 253                return 0;
 254        }
 255
 256        ptq->buffer = buffer;
 257
 258        if (!buffer->data) {
 259                int fd = perf_data_file__fd(ptq->pt->session->file);
 260
 261                buffer->data = auxtrace_buffer__get_data(buffer, fd);
 262                if (!buffer->data)
 263                        return -ENOMEM;
 264        }
 265
 266        if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
 267            intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
 268                return -ENOMEM;
 269
 270        if (buffer->use_data) {
 271                b->len = buffer->use_size;
 272                b->buf = buffer->use_data;
 273        } else {
 274                b->len = buffer->size;
 275                b->buf = buffer->data;
 276        }
 277        b->ref_timestamp = buffer->reference;
 278
 279        /*
 280         * If in snapshot mode and the buffer has no usable data, get next
 281         * buffer and again check overlap against old_buffer.
 282         */
 283        if (ptq->pt->snapshot_mode && !b->len)
 284                goto next;
 285
 286        if (old_buffer)
 287                auxtrace_buffer__drop_data(old_buffer);
 288
 289        if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
 290                                                      !buffer->consecutive)) {
 291                b->consecutive = false;
 292                b->trace_nr = buffer->buffer_nr + 1;
 293        } else {
 294                b->consecutive = true;
 295        }
 296
 297        if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
 298                                        ptq->tid != buffer->tid))
 299                intel_pt_use_buffer_pid_tid(ptq, queue, buffer);
 300
 301        if (ptq->step_through_buffers)
 302                ptq->stop = true;
 303
 304        if (!b->len)
 305                return intel_pt_get_trace(b, data);
 306
 307        return 0;
 308}
 309
 310struct intel_pt_cache_entry {
 311        struct auxtrace_cache_entry     entry;
 312        u64                             insn_cnt;
 313        u64                             byte_cnt;
 314        enum intel_pt_insn_op           op;
 315        enum intel_pt_insn_branch       branch;
 316        int                             length;
 317        int32_t                         rel;
 318};
 319
 320static int intel_pt_config_div(const char *var, const char *value, void *data)
 321{
 322        int *d = data;
 323        long val;
 324
 325        if (!strcmp(var, "intel-pt.cache-divisor")) {
 326                val = strtol(value, NULL, 0);
 327                if (val > 0 && val <= INT_MAX)
 328                        *d = val;
 329        }
 330
 331        return 0;
 332}
 333
 334static int intel_pt_cache_divisor(void)
 335{
 336        static int d;
 337
 338        if (d)
 339                return d;
 340
 341        perf_config(intel_pt_config_div, &d);
 342
 343        if (!d)
 344                d = 64;
 345
 346        return d;
 347}
 348
 349static unsigned int intel_pt_cache_size(struct dso *dso,
 350                                        struct machine *machine)
 351{
 352        off_t size;
 353
 354        size = dso__data_size(dso, machine);
 355        size /= intel_pt_cache_divisor();
 356        if (size < 1000)
 357                return 10;
 358        if (size > (1 << 21))
 359                return 21;
 360        return 32 - __builtin_clz(size);
 361}
 362
 363static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
 364                                             struct machine *machine)
 365{
 366        struct auxtrace_cache *c;
 367        unsigned int bits;
 368
 369        if (dso->auxtrace_cache)
 370                return dso->auxtrace_cache;
 371
 372        bits = intel_pt_cache_size(dso, machine);
 373
 374        /* Ignoring cache creation failure */
 375        c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
 376
 377        dso->auxtrace_cache = c;
 378
 379        return c;
 380}
 381
 382static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
 383                              u64 offset, u64 insn_cnt, u64 byte_cnt,
 384                              struct intel_pt_insn *intel_pt_insn)
 385{
 386        struct auxtrace_cache *c = intel_pt_cache(dso, machine);
 387        struct intel_pt_cache_entry *e;
 388        int err;
 389
 390        if (!c)
 391                return -ENOMEM;
 392
 393        e = auxtrace_cache__alloc_entry(c);
 394        if (!e)
 395                return -ENOMEM;
 396
 397        e->insn_cnt = insn_cnt;
 398        e->byte_cnt = byte_cnt;
 399        e->op = intel_pt_insn->op;
 400        e->branch = intel_pt_insn->branch;
 401        e->length = intel_pt_insn->length;
 402        e->rel = intel_pt_insn->rel;
 403
 404        err = auxtrace_cache__add(c, offset, &e->entry);
 405        if (err)
 406                auxtrace_cache__free_entry(c, e);
 407
 408        return err;
 409}
 410
 411static struct intel_pt_cache_entry *
 412intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
 413{
 414        struct auxtrace_cache *c = intel_pt_cache(dso, machine);
 415
 416        if (!c)
 417                return NULL;
 418
 419        return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
 420}
 421
 422static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 423                                   uint64_t *insn_cnt_ptr, uint64_t *ip,
 424                                   uint64_t to_ip, uint64_t max_insn_cnt,
 425                                   void *data)
 426{
 427        struct intel_pt_queue *ptq = data;
 428        struct machine *machine = ptq->pt->machine;
 429        struct thread *thread;
 430        struct addr_location al;
 431        unsigned char buf[1024];
 432        size_t bufsz;
 433        ssize_t len;
 434        int x86_64;
 435        u8 cpumode;
 436        u64 offset, start_offset, start_ip;
 437        u64 insn_cnt = 0;
 438        bool one_map = true;
 439
 440        if (to_ip && *ip == to_ip)
 441                goto out_no_cache;
 442
 443        bufsz = intel_pt_insn_max_size();
 444
 445        if (*ip >= ptq->pt->kernel_start)
 446                cpumode = PERF_RECORD_MISC_KERNEL;
 447        else
 448                cpumode = PERF_RECORD_MISC_USER;
 449
 450        thread = ptq->thread;
 451        if (!thread) {
 452                if (cpumode != PERF_RECORD_MISC_KERNEL)
 453                        return -EINVAL;
 454                thread = ptq->pt->unknown_thread;
 455        }
 456
 457        while (1) {
 458                thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
 459                if (!al.map || !al.map->dso)
 460                        return -EINVAL;
 461
 462                if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
 463                    dso__data_status_seen(al.map->dso,
 464                                          DSO_DATA_STATUS_SEEN_ITRACE))
 465                        return -ENOENT;
 466
 467                offset = al.map->map_ip(al.map, *ip);
 468
 469                if (!to_ip && one_map) {
 470                        struct intel_pt_cache_entry *e;
 471
 472                        e = intel_pt_cache_lookup(al.map->dso, machine, offset);
 473                        if (e &&
 474                            (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
 475                                *insn_cnt_ptr = e->insn_cnt;
 476                                *ip += e->byte_cnt;
 477                                intel_pt_insn->op = e->op;
 478                                intel_pt_insn->branch = e->branch;
 479                                intel_pt_insn->length = e->length;
 480                                intel_pt_insn->rel = e->rel;
 481                                intel_pt_log_insn_no_data(intel_pt_insn, *ip);
 482                                return 0;
 483                        }
 484                }
 485
 486                start_offset = offset;
 487                start_ip = *ip;
 488
 489                /* Load maps to ensure dso->is_64_bit has been updated */
 490                map__load(al.map);
 491
 492                x86_64 = al.map->dso->is_64_bit;
 493
 494                while (1) {
 495                        len = dso__data_read_offset(al.map->dso, machine,
 496                                                    offset, buf, bufsz);
 497                        if (len <= 0)
 498                                return -EINVAL;
 499
 500                        if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
 501                                return -EINVAL;
 502
 503                        intel_pt_log_insn(intel_pt_insn, *ip);
 504
 505                        insn_cnt += 1;
 506
 507                        if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
 508                                goto out;
 509
 510                        if (max_insn_cnt && insn_cnt >= max_insn_cnt)
 511                                goto out_no_cache;
 512
 513                        *ip += intel_pt_insn->length;
 514
 515                        if (to_ip && *ip == to_ip)
 516                                goto out_no_cache;
 517
 518                        if (*ip >= al.map->end)
 519                                break;
 520
 521                        offset += intel_pt_insn->length;
 522                }
 523                one_map = false;
 524        }
 525out:
 526        *insn_cnt_ptr = insn_cnt;
 527
 528        if (!one_map)
 529                goto out_no_cache;
 530
 531        /*
 532         * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
 533         * entries.
 534         */
 535        if (to_ip) {
 536                struct intel_pt_cache_entry *e;
 537
 538                e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
 539                if (e)
 540                        return 0;
 541        }
 542
 543        /* Ignore cache errors */
 544        intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
 545                           *ip - start_ip, intel_pt_insn);
 546
 547        return 0;
 548
 549out_no_cache:
 550        *insn_cnt_ptr = insn_cnt;
 551        return 0;
 552}
 553
 554static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
 555                                  uint64_t offset, const char *filename)
 556{
 557        struct addr_filter *filt;
 558        bool have_filter   = false;
 559        bool hit_tracestop = false;
 560        bool hit_filter    = false;
 561
 562        list_for_each_entry(filt, &pt->filts.head, list) {
 563                if (filt->start)
 564                        have_filter = true;
 565
 566                if ((filename && !filt->filename) ||
 567                    (!filename && filt->filename) ||
 568                    (filename && strcmp(filename, filt->filename)))
 569                        continue;
 570
 571                if (!(offset >= filt->addr && offset < filt->addr + filt->size))
 572                        continue;
 573
 574                intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
 575                             ip, offset, filename ? filename : "[kernel]",
 576                             filt->start ? "filter" : "stop",
 577                             filt->addr, filt->size);
 578
 579                if (filt->start)
 580                        hit_filter = true;
 581                else
 582                        hit_tracestop = true;
 583        }
 584
 585        if (!hit_tracestop && !hit_filter)
 586                intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
 587                             ip, offset, filename ? filename : "[kernel]");
 588
 589        return hit_tracestop || (have_filter && !hit_filter);
 590}
 591
 592static int __intel_pt_pgd_ip(uint64_t ip, void *data)
 593{
 594        struct intel_pt_queue *ptq = data;
 595        struct thread *thread;
 596        struct addr_location al;
 597        u8 cpumode;
 598        u64 offset;
 599
 600        if (ip >= ptq->pt->kernel_start)
 601                return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
 602
 603        cpumode = PERF_RECORD_MISC_USER;
 604
 605        thread = ptq->thread;
 606        if (!thread)
 607                return -EINVAL;
 608
 609        thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
 610        if (!al.map || !al.map->dso)
 611                return -EINVAL;
 612
 613        offset = al.map->map_ip(al.map, ip);
 614
 615        return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
 616                                     al.map->dso->long_name);
 617}
 618
 619static bool intel_pt_pgd_ip(uint64_t ip, void *data)
 620{
 621        return __intel_pt_pgd_ip(ip, data) > 0;
 622}
 623
 624static bool intel_pt_get_config(struct intel_pt *pt,
 625                                struct perf_event_attr *attr, u64 *config)
 626{
 627        if (attr->type == pt->pmu_type) {
 628                if (config)
 629                        *config = attr->config;
 630                return true;
 631        }
 632
 633        return false;
 634}
 635
 636static bool intel_pt_exclude_kernel(struct intel_pt *pt)
 637{
 638        struct perf_evsel *evsel;
 639
 640        evlist__for_each_entry(pt->session->evlist, evsel) {
 641                if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
 642                    !evsel->attr.exclude_kernel)
 643                        return false;
 644        }
 645        return true;
 646}
 647
 648static bool intel_pt_return_compression(struct intel_pt *pt)
 649{
 650        struct perf_evsel *evsel;
 651        u64 config;
 652
 653        if (!pt->noretcomp_bit)
 654                return true;
 655
 656        evlist__for_each_entry(pt->session->evlist, evsel) {
 657                if (intel_pt_get_config(pt, &evsel->attr, &config) &&
 658                    (config & pt->noretcomp_bit))
 659                        return false;
 660        }
 661        return true;
 662}
 663
 664static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
 665{
 666        struct perf_evsel *evsel;
 667        unsigned int shift;
 668        u64 config;
 669
 670        if (!pt->mtc_freq_bits)
 671                return 0;
 672
 673        for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
 674                config >>= 1;
 675
 676        evlist__for_each_entry(pt->session->evlist, evsel) {
 677                if (intel_pt_get_config(pt, &evsel->attr, &config))
 678                        return (config & pt->mtc_freq_bits) >> shift;
 679        }
 680        return 0;
 681}
 682
 683static bool intel_pt_timeless_decoding(struct intel_pt *pt)
 684{
 685        struct perf_evsel *evsel;
 686        bool timeless_decoding = true;
 687        u64 config;
 688
 689        if (!pt->tsc_bit || !pt->cap_user_time_zero)
 690                return true;
 691
 692        evlist__for_each_entry(pt->session->evlist, evsel) {
 693                if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
 694                        return true;
 695                if (intel_pt_get_config(pt, &evsel->attr, &config)) {
 696                        if (config & pt->tsc_bit)
 697                                timeless_decoding = false;
 698                        else
 699                                return true;
 700                }
 701        }
 702        return timeless_decoding;
 703}
 704
 705static bool intel_pt_tracing_kernel(struct intel_pt *pt)
 706{
 707        struct perf_evsel *evsel;
 708
 709        evlist__for_each_entry(pt->session->evlist, evsel) {
 710                if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
 711                    !evsel->attr.exclude_kernel)
 712                        return true;
 713        }
 714        return false;
 715}
 716
 717static bool intel_pt_have_tsc(struct intel_pt *pt)
 718{
 719        struct perf_evsel *evsel;
 720        bool have_tsc = false;
 721        u64 config;
 722
 723        if (!pt->tsc_bit)
 724                return false;
 725
 726        evlist__for_each_entry(pt->session->evlist, evsel) {
 727                if (intel_pt_get_config(pt, &evsel->attr, &config)) {
 728                        if (config & pt->tsc_bit)
 729                                have_tsc = true;
 730                        else
 731                                return false;
 732                }
 733        }
 734        return have_tsc;
 735}
 736
 737static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
 738{
 739        u64 quot, rem;
 740
 741        quot = ns / pt->tc.time_mult;
 742        rem  = ns % pt->tc.time_mult;
 743        return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
 744                pt->tc.time_mult;
 745}
 746
 747static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 748                                                   unsigned int queue_nr)
 749{
 750        struct intel_pt_params params = { .get_trace = 0, };
 751        struct intel_pt_queue *ptq;
 752
 753        ptq = zalloc(sizeof(struct intel_pt_queue));
 754        if (!ptq)
 755                return NULL;
 756
 757        if (pt->synth_opts.callchain) {
 758                size_t sz = sizeof(struct ip_callchain);
 759
 760                sz += pt->synth_opts.callchain_sz * sizeof(u64);
 761                ptq->chain = zalloc(sz);
 762                if (!ptq->chain)
 763                        goto out_free;
 764        }
 765
 766        if (pt->synth_opts.last_branch) {
 767                size_t sz = sizeof(struct branch_stack);
 768
 769                sz += pt->synth_opts.last_branch_sz *
 770                      sizeof(struct branch_entry);
 771                ptq->last_branch = zalloc(sz);
 772                if (!ptq->last_branch)
 773                        goto out_free;
 774                ptq->last_branch_rb = zalloc(sz);
 775                if (!ptq->last_branch_rb)
 776                        goto out_free;
 777        }
 778
 779        ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 780        if (!ptq->event_buf)
 781                goto out_free;
 782
 783        ptq->pt = pt;
 784        ptq->queue_nr = queue_nr;
 785        ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
 786        ptq->pid = -1;
 787        ptq->tid = -1;
 788        ptq->cpu = -1;
 789        ptq->next_tid = -1;
 790
 791        params.get_trace = intel_pt_get_trace;
 792        params.walk_insn = intel_pt_walk_next_insn;
 793        params.data = ptq;
 794        params.return_compression = intel_pt_return_compression(pt);
 795        params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
 796        params.mtc_period = intel_pt_mtc_period(pt);
 797        params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
 798        params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
 799
 800        if (pt->filts.cnt > 0)
 801                params.pgd_ip = intel_pt_pgd_ip;
 802
 803        if (pt->synth_opts.instructions) {
 804                if (pt->synth_opts.period) {
 805                        switch (pt->synth_opts.period_type) {
 806                        case PERF_ITRACE_PERIOD_INSTRUCTIONS:
 807                                params.period_type =
 808                                                INTEL_PT_PERIOD_INSTRUCTIONS;
 809                                params.period = pt->synth_opts.period;
 810                                break;
 811                        case PERF_ITRACE_PERIOD_TICKS:
 812                                params.period_type = INTEL_PT_PERIOD_TICKS;
 813                                params.period = pt->synth_opts.period;
 814                                break;
 815                        case PERF_ITRACE_PERIOD_NANOSECS:
 816                                params.period_type = INTEL_PT_PERIOD_TICKS;
 817                                params.period = intel_pt_ns_to_ticks(pt,
 818                                                        pt->synth_opts.period);
 819                                break;
 820                        default:
 821                                break;
 822                        }
 823                }
 824
 825                if (!params.period) {
 826                        params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
 827                        params.period = 1;
 828                }
 829        }
 830
 831        ptq->decoder = intel_pt_decoder_new(&params);
 832        if (!ptq->decoder)
 833                goto out_free;
 834
 835        return ptq;
 836
 837out_free:
 838        zfree(&ptq->event_buf);
 839        zfree(&ptq->last_branch);
 840        zfree(&ptq->last_branch_rb);
 841        zfree(&ptq->chain);
 842        free(ptq);
 843        return NULL;
 844}
 845
 846static void intel_pt_free_queue(void *priv)
 847{
 848        struct intel_pt_queue *ptq = priv;
 849
 850        if (!ptq)
 851                return;
 852        thread__zput(ptq->thread);
 853        intel_pt_decoder_free(ptq->decoder);
 854        zfree(&ptq->event_buf);
 855        zfree(&ptq->last_branch);
 856        zfree(&ptq->last_branch_rb);
 857        zfree(&ptq->chain);
 858        free(ptq);
 859}
 860
 861static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
 862                                     struct auxtrace_queue *queue)
 863{
 864        struct intel_pt_queue *ptq = queue->priv;
 865
 866        if (queue->tid == -1 || pt->have_sched_switch) {
 867                ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
 868                thread__zput(ptq->thread);
 869        }
 870
 871        if (!ptq->thread && ptq->tid != -1)
 872                ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
 873
 874        if (ptq->thread) {
 875                ptq->pid = ptq->thread->pid_;
 876                if (queue->cpu == -1)
 877                        ptq->cpu = ptq->thread->cpu;
 878        }
 879}
 880
 881static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
 882{
 883        if (ptq->state->flags & INTEL_PT_ABORT_TX) {
 884                ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
 885        } else if (ptq->state->flags & INTEL_PT_ASYNC) {
 886                if (ptq->state->to_ip)
 887                        ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
 888                                     PERF_IP_FLAG_ASYNC |
 889                                     PERF_IP_FLAG_INTERRUPT;
 890                else
 891                        ptq->flags = PERF_IP_FLAG_BRANCH |
 892                                     PERF_IP_FLAG_TRACE_END;
 893                ptq->insn_len = 0;
 894        } else {
 895                if (ptq->state->from_ip)
 896                        ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
 897                else
 898                        ptq->flags = PERF_IP_FLAG_BRANCH |
 899                                     PERF_IP_FLAG_TRACE_BEGIN;
 900                if (ptq->state->flags & INTEL_PT_IN_TX)
 901                        ptq->flags |= PERF_IP_FLAG_IN_TX;
 902                ptq->insn_len = ptq->state->insn_len;
 903        }
 904}
 905
 906static int intel_pt_setup_queue(struct intel_pt *pt,
 907                                struct auxtrace_queue *queue,
 908                                unsigned int queue_nr)
 909{
 910        struct intel_pt_queue *ptq = queue->priv;
 911
 912        if (list_empty(&queue->head))
 913                return 0;
 914
 915        if (!ptq) {
 916                ptq = intel_pt_alloc_queue(pt, queue_nr);
 917                if (!ptq)
 918                        return -ENOMEM;
 919                queue->priv = ptq;
 920
 921                if (queue->cpu != -1)
 922                        ptq->cpu = queue->cpu;
 923                ptq->tid = queue->tid;
 924
 925                if (pt->sampling_mode) {
 926                        if (pt->timeless_decoding)
 927                                ptq->step_through_buffers = true;
 928                        if (pt->timeless_decoding || !pt->have_sched_switch)
 929                                ptq->use_buffer_pid_tid = true;
 930                }
 931        }
 932
 933        if (!ptq->on_heap &&
 934            (!pt->sync_switch ||
 935             ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
 936                const struct intel_pt_state *state;
 937                int ret;
 938
 939                if (pt->timeless_decoding)
 940                        return 0;
 941
 942                intel_pt_log("queue %u getting timestamp\n", queue_nr);
 943                intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
 944                             queue_nr, ptq->cpu, ptq->pid, ptq->tid);
 945                while (1) {
 946                        state = intel_pt_decode(ptq->decoder);
 947                        if (state->err) {
 948                                if (state->err == INTEL_PT_ERR_NODATA) {
 949                                        intel_pt_log("queue %u has no timestamp\n",
 950                                                     queue_nr);
 951                                        return 0;
 952                                }
 953                                continue;
 954                        }
 955                        if (state->timestamp)
 956                                break;
 957                }
 958
 959                ptq->timestamp = state->timestamp;
 960                intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
 961                             queue_nr, ptq->timestamp);
 962                ptq->state = state;
 963                ptq->have_sample = true;
 964                intel_pt_sample_flags(ptq);
 965                ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
 966                if (ret)
 967                        return ret;
 968                ptq->on_heap = true;
 969        }
 970
 971        return 0;
 972}
 973
 974static int intel_pt_setup_queues(struct intel_pt *pt)
 975{
 976        unsigned int i;
 977        int ret;
 978
 979        for (i = 0; i < pt->queues.nr_queues; i++) {
 980                ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
 981                if (ret)
 982                        return ret;
 983        }
 984        return 0;
 985}
 986
 987static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
 988{
 989        struct branch_stack *bs_src = ptq->last_branch_rb;
 990        struct branch_stack *bs_dst = ptq->last_branch;
 991        size_t nr = 0;
 992
 993        bs_dst->nr = bs_src->nr;
 994
 995        if (!bs_src->nr)
 996                return;
 997
 998        nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
 999        memcpy(&bs_dst->entries[0],
1000               &bs_src->entries[ptq->last_branch_pos],
1001               sizeof(struct branch_entry) * nr);
1002
1003        if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
1004                memcpy(&bs_dst->entries[nr],
1005                       &bs_src->entries[0],
1006                       sizeof(struct branch_entry) * ptq->last_branch_pos);
1007        }
1008}
1009
1010static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
1011{
1012        ptq->last_branch_pos = 0;
1013        ptq->last_branch_rb->nr = 0;
1014}
1015
1016static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
1017{
1018        const struct intel_pt_state *state = ptq->state;
1019        struct branch_stack *bs = ptq->last_branch_rb;
1020        struct branch_entry *be;
1021
1022        if (!ptq->last_branch_pos)
1023                ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
1024
1025        ptq->last_branch_pos -= 1;
1026
1027        be              = &bs->entries[ptq->last_branch_pos];
1028        be->from        = state->from_ip;
1029        be->to          = state->to_ip;
1030        be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
1031        be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
1032        /* No support for mispredict */
1033        be->flags.mispred = ptq->pt->mispred_all;
1034
1035        if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
1036                bs->nr += 1;
1037}
1038
1039static int intel_pt_inject_event(union perf_event *event,
1040                                 struct perf_sample *sample, u64 type,
1041                                 bool swapped)
1042{
1043        event->header.size = perf_event__sample_event_size(sample, type, 0);
1044        return perf_event__synthesize_sample(event, type, 0, sample, swapped);
1045}
1046
1047static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
1048{
1049        int ret;
1050        struct intel_pt *pt = ptq->pt;
1051        union perf_event *event = ptq->event_buf;
1052        struct perf_sample sample = { .ip = 0, };
1053        struct dummy_branch_stack {
1054                u64                     nr;
1055                struct branch_entry     entries;
1056        } dummy_bs;
1057
1058        if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
1059                return 0;
1060
1061        if (pt->synth_opts.initial_skip &&
1062            pt->num_events++ < pt->synth_opts.initial_skip)
1063                return 0;
1064
1065        event->sample.header.type = PERF_RECORD_SAMPLE;
1066        event->sample.header.misc = PERF_RECORD_MISC_USER;
1067        event->sample.header.size = sizeof(struct perf_event_header);
1068
1069        if (!pt->timeless_decoding)
1070                sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1071
1072        sample.cpumode = PERF_RECORD_MISC_USER;
1073        sample.ip = ptq->state->from_ip;
1074        sample.pid = ptq->pid;
1075        sample.tid = ptq->tid;
1076        sample.addr = ptq->state->to_ip;
1077        sample.id = ptq->pt->branches_id;
1078        sample.stream_id = ptq->pt->branches_id;
1079        sample.period = 1;
1080        sample.cpu = ptq->cpu;
1081        sample.flags = ptq->flags;
1082        sample.insn_len = ptq->insn_len;
1083
1084        /*
1085         * perf report cannot handle events without a branch stack when using
1086         * SORT_MODE__BRANCH so make a dummy one.
1087         */
1088        if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
1089                dummy_bs = (struct dummy_branch_stack){
1090                        .nr = 1,
1091                        .entries = {
1092                                .from = sample.ip,
1093                                .to = sample.addr,
1094                        },
1095                };
1096                sample.branch_stack = (struct branch_stack *)&dummy_bs;
1097        }
1098
1099        if (pt->synth_opts.inject) {
1100                ret = intel_pt_inject_event(event, &sample,
1101                                            pt->branches_sample_type,
1102                                            pt->synth_needs_swap);
1103                if (ret)
1104                        return ret;
1105        }
1106
1107        ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1108        if (ret)
1109                pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
1110                       ret);
1111
1112        return ret;
1113}
1114
1115static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1116{
1117        int ret;
1118        struct intel_pt *pt = ptq->pt;
1119        union perf_event *event = ptq->event_buf;
1120        struct perf_sample sample = { .ip = 0, };
1121
1122        if (pt->synth_opts.initial_skip &&
1123            pt->num_events++ < pt->synth_opts.initial_skip)
1124                return 0;
1125
1126        event->sample.header.type = PERF_RECORD_SAMPLE;
1127        event->sample.header.misc = PERF_RECORD_MISC_USER;
1128        event->sample.header.size = sizeof(struct perf_event_header);
1129
1130        if (!pt->timeless_decoding)
1131                sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1132
1133        sample.cpumode = PERF_RECORD_MISC_USER;
1134        sample.ip = ptq->state->from_ip;
1135        sample.pid = ptq->pid;
1136        sample.tid = ptq->tid;
1137        sample.addr = ptq->state->to_ip;
1138        sample.id = ptq->pt->instructions_id;
1139        sample.stream_id = ptq->pt->instructions_id;
1140        sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1141        sample.cpu = ptq->cpu;
1142        sample.flags = ptq->flags;
1143        sample.insn_len = ptq->insn_len;
1144
1145        ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1146
1147        if (pt->synth_opts.callchain) {
1148                thread_stack__sample(ptq->thread, ptq->chain,
1149                                     pt->synth_opts.callchain_sz, sample.ip);
1150                sample.callchain = ptq->chain;
1151        }
1152
1153        if (pt->synth_opts.last_branch) {
1154                intel_pt_copy_last_branch_rb(ptq);
1155                sample.branch_stack = ptq->last_branch;
1156        }
1157
1158        if (pt->synth_opts.inject) {
1159                ret = intel_pt_inject_event(event, &sample,
1160                                            pt->instructions_sample_type,
1161                                            pt->synth_needs_swap);
1162                if (ret)
1163                        return ret;
1164        }
1165
1166        ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1167        if (ret)
1168                pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
1169                       ret);
1170
1171        if (pt->synth_opts.last_branch)
1172                intel_pt_reset_last_branch_rb(ptq);
1173
1174        return ret;
1175}
1176
1177static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
1178{
1179        int ret;
1180        struct intel_pt *pt = ptq->pt;
1181        union perf_event *event = ptq->event_buf;
1182        struct perf_sample sample = { .ip = 0, };
1183
1184        if (pt->synth_opts.initial_skip &&
1185            pt->num_events++ < pt->synth_opts.initial_skip)
1186                return 0;
1187
1188        event->sample.header.type = PERF_RECORD_SAMPLE;
1189        event->sample.header.misc = PERF_RECORD_MISC_USER;
1190        event->sample.header.size = sizeof(struct perf_event_header);
1191
1192        if (!pt->timeless_decoding)
1193                sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1194
1195        sample.cpumode = PERF_RECORD_MISC_USER;
1196        sample.ip = ptq->state->from_ip;
1197        sample.pid = ptq->pid;
1198        sample.tid = ptq->tid;
1199        sample.addr = ptq->state->to_ip;
1200        sample.id = ptq->pt->transactions_id;
1201        sample.stream_id = ptq->pt->transactions_id;
1202        sample.period = 1;
1203        sample.cpu = ptq->cpu;
1204        sample.flags = ptq->flags;
1205        sample.insn_len = ptq->insn_len;
1206
1207        if (pt->synth_opts.callchain) {
1208                thread_stack__sample(ptq->thread, ptq->chain,
1209                                     pt->synth_opts.callchain_sz, sample.ip);
1210                sample.callchain = ptq->chain;
1211        }
1212
1213        if (pt->synth_opts.last_branch) {
1214                intel_pt_copy_last_branch_rb(ptq);
1215                sample.branch_stack = ptq->last_branch;
1216        }
1217
1218        if (pt->synth_opts.inject) {
1219                ret = intel_pt_inject_event(event, &sample,
1220                                            pt->transactions_sample_type,
1221                                            pt->synth_needs_swap);
1222                if (ret)
1223                        return ret;
1224        }
1225
1226        ret = perf_session__deliver_synth_event(pt->session, event, &sample);
1227        if (ret)
1228                pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
1229                       ret);
1230
1231        if (pt->synth_opts.last_branch)
1232                intel_pt_reset_last_branch_rb(ptq);
1233
1234        return ret;
1235}
1236
1237static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1238                                pid_t pid, pid_t tid, u64 ip)
1239{
1240        union perf_event event;
1241        char msg[MAX_AUXTRACE_ERROR_MSG];
1242        int err;
1243
1244        intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1245
1246        auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1247                             code, cpu, pid, tid, ip, msg);
1248
1249        err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1250        if (err)
1251                pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
1252                       err);
1253
1254        return err;
1255}
1256
1257static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1258{
1259        struct auxtrace_queue *queue;
1260        pid_t tid = ptq->next_tid;
1261        int err;
1262
1263        if (tid == -1)
1264                return 0;
1265
1266        intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
1267
1268        err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
1269
1270        queue = &pt->queues.queue_array[ptq->queue_nr];
1271        intel_pt_set_pid_tid_cpu(pt, queue);
1272
1273        ptq->next_tid = -1;
1274
1275        return err;
1276}
1277
1278static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1279{
1280        struct intel_pt *pt = ptq->pt;
1281
1282        return ip == pt->switch_ip &&
1283               (ptq->flags & PERF_IP_FLAG_BRANCH) &&
1284               !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
1285                               PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
1286}
1287
1288static int intel_pt_sample(struct intel_pt_queue *ptq)
1289{
1290        const struct intel_pt_state *state = ptq->state;
1291        struct intel_pt *pt = ptq->pt;
1292        int err;
1293
1294        if (!ptq->have_sample)
1295                return 0;
1296
1297        ptq->have_sample = false;
1298
1299        if (pt->sample_instructions &&
1300            (state->type & INTEL_PT_INSTRUCTION) &&
1301            (!pt->synth_opts.initial_skip ||
1302             pt->num_events++ >= pt->synth_opts.initial_skip)) {
1303                err = intel_pt_synth_instruction_sample(ptq);
1304                if (err)
1305                        return err;
1306        }
1307
1308        if (pt->sample_transactions &&
1309            (state->type & INTEL_PT_TRANSACTION) &&
1310            (!pt->synth_opts.initial_skip ||
1311             pt->num_events++ >= pt->synth_opts.initial_skip)) {
1312                err = intel_pt_synth_transaction_sample(ptq);
1313                if (err)
1314                        return err;
1315        }
1316
1317        if (!(state->type & INTEL_PT_BRANCH))
1318                return 0;
1319
1320        if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
1321                thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
1322                                    state->to_ip, ptq->insn_len,
1323                                    state->trace_nr);
1324        else
1325                thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
1326
1327        if (pt->sample_branches) {
1328                err = intel_pt_synth_branch_sample(ptq);
1329                if (err)
1330                        return err;
1331        }
1332
1333        if (pt->synth_opts.last_branch)
1334                intel_pt_update_last_branch_rb(ptq);
1335
1336        if (!pt->sync_switch)
1337                return 0;
1338
1339        if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
1340                switch (ptq->switch_state) {
1341                case INTEL_PT_SS_UNKNOWN:
1342                case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1343                        err = intel_pt_next_tid(pt, ptq);
1344                        if (err)
1345                                return err;
1346                        ptq->switch_state = INTEL_PT_SS_TRACING;
1347                        break;
1348                default:
1349                        ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
1350                        return 1;
1351                }
1352        } else if (!state->to_ip) {
1353                ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
1354        } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
1355                ptq->switch_state = INTEL_PT_SS_UNKNOWN;
1356        } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1357                   state->to_ip == pt->ptss_ip &&
1358                   (ptq->flags & PERF_IP_FLAG_CALL)) {
1359                ptq->switch_state = INTEL_PT_SS_TRACING;
1360        }
1361
1362        return 0;
1363}
1364
1365static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
1366{
1367        struct machine *machine = pt->machine;
1368        struct map *map;
1369        struct symbol *sym, *start;
1370        u64 ip, switch_ip = 0;
1371        const char *ptss;
1372
1373        if (ptss_ip)
1374                *ptss_ip = 0;
1375
1376        map = machine__kernel_map(machine);
1377        if (!map)
1378                return 0;
1379
1380        if (map__load(map))
1381                return 0;
1382
1383        start = dso__first_symbol(map->dso, MAP__FUNCTION);
1384
1385        for (sym = start; sym; sym = dso__next_symbol(sym)) {
1386                if (sym->binding == STB_GLOBAL &&
1387                    !strcmp(sym->name, "__switch_to")) {
1388                        ip = map->unmap_ip(map, sym->start);
1389                        if (ip >= map->start && ip < map->end) {
1390                                switch_ip = ip;
1391                                break;
1392                        }
1393                }
1394        }
1395
1396        if (!switch_ip || !ptss_ip)
1397                return 0;
1398
1399        if (pt->have_sched_switch == 1)
1400                ptss = "perf_trace_sched_switch";
1401        else
1402                ptss = "__perf_event_task_sched_out";
1403
1404        for (sym = start; sym; sym = dso__next_symbol(sym)) {
1405                if (!strcmp(sym->name, ptss)) {
1406                        ip = map->unmap_ip(map, sym->start);
1407                        if (ip >= map->start && ip < map->end) {
1408                                *ptss_ip = ip;
1409                                break;
1410                        }
1411                }
1412        }
1413
1414        return switch_ip;
1415}
1416
1417static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1418{
1419        const struct intel_pt_state *state = ptq->state;
1420        struct intel_pt *pt = ptq->pt;
1421        int err;
1422
1423        if (!pt->kernel_start) {
1424                pt->kernel_start = machine__kernel_start(pt->machine);
1425                if (pt->per_cpu_mmaps &&
1426                    (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
1427                    !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
1428                    !pt->sampling_mode) {
1429                        pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
1430                        if (pt->switch_ip) {
1431                                intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
1432                                             pt->switch_ip, pt->ptss_ip);
1433                                pt->sync_switch = true;
1434                        }
1435                }
1436        }
1437
1438        intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
1439                     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1440        while (1) {
1441                err = intel_pt_sample(ptq);
1442                if (err)
1443                        return err;
1444
1445                state = intel_pt_decode(ptq->decoder);
1446                if (state->err) {
1447                        if (state->err == INTEL_PT_ERR_NODATA)
1448                                return 1;
1449                        if (pt->sync_switch &&
1450                            state->from_ip >= pt->kernel_start) {
1451                                pt->sync_switch = false;
1452                                intel_pt_next_tid(pt, ptq);
1453                        }
1454                        if (pt->synth_opts.errors) {
1455                                err = intel_pt_synth_error(pt, state->err,
1456                                                           ptq->cpu, ptq->pid,
1457                                                           ptq->tid,
1458                                                           state->from_ip);
1459                                if (err)
1460                                        return err;
1461                        }
1462                        continue;
1463                }
1464
1465                ptq->state = state;
1466                ptq->have_sample = true;
1467                intel_pt_sample_flags(ptq);
1468
1469                /* Use estimated TSC upon return to user space */
1470                if (pt->est_tsc &&
1471                    (state->from_ip >= pt->kernel_start || !state->from_ip) &&
1472                    state->to_ip && state->to_ip < pt->kernel_start) {
1473                        intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1474                                     state->timestamp, state->est_timestamp);
1475                        ptq->timestamp = state->est_timestamp;
1476                /* Use estimated TSC in unknown switch state */
1477                } else if (pt->sync_switch &&
1478                           ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
1479                           intel_pt_is_switch_ip(ptq, state->to_ip) &&
1480                           ptq->next_tid == -1) {
1481                        intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
1482                                     state->timestamp, state->est_timestamp);
1483                        ptq->timestamp = state->est_timestamp;
1484                } else if (state->timestamp > ptq->timestamp) {
1485                        ptq->timestamp = state->timestamp;
1486                }
1487
1488                if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1489                        *timestamp = ptq->timestamp;
1490                        return 0;
1491                }
1492        }
1493        return 0;
1494}
1495
1496static inline int intel_pt_update_queues(struct intel_pt *pt)
1497{
1498        if (pt->queues.new_data) {
1499                pt->queues.new_data = false;
1500                return intel_pt_setup_queues(pt);
1501        }
1502        return 0;
1503}
1504
1505static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
1506{
1507        unsigned int queue_nr;
1508        u64 ts;
1509        int ret;
1510
1511        while (1) {
1512                struct auxtrace_queue *queue;
1513                struct intel_pt_queue *ptq;
1514
1515                if (!pt->heap.heap_cnt)
1516                        return 0;
1517
1518                if (pt->heap.heap_array[0].ordinal >= timestamp)
1519                        return 0;
1520
1521                queue_nr = pt->heap.heap_array[0].queue_nr;
1522                queue = &pt->queues.queue_array[queue_nr];
1523                ptq = queue->priv;
1524
1525                intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
1526                             queue_nr, pt->heap.heap_array[0].ordinal,
1527                             timestamp);
1528
1529                auxtrace_heap__pop(&pt->heap);
1530
1531                if (pt->heap.heap_cnt) {
1532                        ts = pt->heap.heap_array[0].ordinal + 1;
1533                        if (ts > timestamp)
1534                                ts = timestamp;
1535                } else {
1536                        ts = timestamp;
1537                }
1538
1539                intel_pt_set_pid_tid_cpu(pt, queue);
1540
1541                ret = intel_pt_run_decoder(ptq, &ts);
1542
1543                if (ret < 0) {
1544                        auxtrace_heap__add(&pt->heap, queue_nr, ts);
1545                        return ret;
1546                }
1547
1548                if (!ret) {
1549                        ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
1550                        if (ret < 0)
1551                                return ret;
1552                } else {
1553                        ptq->on_heap = false;
1554                }
1555        }
1556
1557        return 0;
1558}
1559
1560static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1561                                            u64 time_)
1562{
1563        struct auxtrace_queues *queues = &pt->queues;
1564        unsigned int i;
1565        u64 ts = 0;
1566
1567        for (i = 0; i < queues->nr_queues; i++) {
1568                struct auxtrace_queue *queue = &pt->queues.queue_array[i];
1569                struct intel_pt_queue *ptq = queue->priv;
1570
1571                if (ptq && (tid == -1 || ptq->tid == tid)) {
1572                        ptq->time = time_;
1573                        intel_pt_set_pid_tid_cpu(pt, queue);
1574                        intel_pt_run_decoder(ptq, &ts);
1575                }
1576        }
1577        return 0;
1578}
1579
1580static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1581{
1582        return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1583                                    sample->pid, sample->tid, 0);
1584}
1585
1586static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
1587{
1588        unsigned i, j;
1589
1590        if (cpu < 0 || !pt->queues.nr_queues)
1591                return NULL;
1592
1593        if ((unsigned)cpu >= pt->queues.nr_queues)
1594                i = pt->queues.nr_queues - 1;
1595        else
1596                i = cpu;
1597
1598        if (pt->queues.queue_array[i].cpu == cpu)
1599                return pt->queues.queue_array[i].priv;
1600
1601        for (j = 0; i > 0; j++) {
1602                if (pt->queues.queue_array[--i].cpu == cpu)
1603                        return pt->queues.queue_array[i].priv;
1604        }
1605
1606        for (; j < pt->queues.nr_queues; j++) {
1607                if (pt->queues.queue_array[j].cpu == cpu)
1608                        return pt->queues.queue_array[j].priv;
1609        }
1610
1611        return NULL;
1612}
1613
1614static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
1615                                u64 timestamp)
1616{
1617        struct intel_pt_queue *ptq;
1618        int err;
1619
1620        if (!pt->sync_switch)
1621                return 1;
1622
1623        ptq = intel_pt_cpu_to_ptq(pt, cpu);
1624        if (!ptq)
1625                return 1;
1626
1627        switch (ptq->switch_state) {
1628        case INTEL_PT_SS_NOT_TRACING:
1629                ptq->next_tid = -1;
1630                break;
1631        case INTEL_PT_SS_UNKNOWN:
1632        case INTEL_PT_SS_TRACING:
1633                ptq->next_tid = tid;
1634                ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
1635                return 0;
1636        case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
1637                if (!ptq->on_heap) {
1638                        ptq->timestamp = perf_time_to_tsc(timestamp,
1639                                                          &pt->tc);
1640                        err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
1641                                                 ptq->timestamp);
1642                        if (err)
1643                                return err;
1644                        ptq->on_heap = true;
1645                }
1646                ptq->switch_state = INTEL_PT_SS_TRACING;
1647                break;
1648        case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1649                ptq->next_tid = tid;
1650                intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1651                break;
1652        default:
1653                break;
1654        }
1655
1656        return 1;
1657}
1658
1659static int intel_pt_process_switch(struct intel_pt *pt,
1660                                   struct perf_sample *sample)
1661{
1662        struct perf_evsel *evsel;
1663        pid_t tid;
1664        int cpu, ret;
1665
1666        evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
1667        if (evsel != pt->switch_evsel)
1668                return 0;
1669
1670        tid = perf_evsel__intval(evsel, sample, "next_pid");
1671        cpu = sample->cpu;
1672
1673        intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1674                     cpu, tid, sample->time, perf_time_to_tsc(sample->time,
1675                     &pt->tc));
1676
1677        ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1678        if (ret <= 0)
1679                return ret;
1680
1681        return machine__set_current_tid(pt->machine, cpu, -1, tid);
1682}
1683
1684static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
1685                                   struct perf_sample *sample)
1686{
1687        bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
1688        pid_t pid, tid;
1689        int cpu, ret;
1690
1691        cpu = sample->cpu;
1692
1693        if (pt->have_sched_switch == 3) {
1694                if (!out)
1695                        return 0;
1696                if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
1697                        pr_err("Expecting CPU-wide context switch event\n");
1698                        return -EINVAL;
1699                }
1700                pid = event->context_switch.next_prev_pid;
1701                tid = event->context_switch.next_prev_tid;
1702        } else {
1703                if (out)
1704                        return 0;
1705                pid = sample->pid;
1706                tid = sample->tid;
1707        }
1708
1709        if (tid == -1) {
1710                pr_err("context_switch event has no tid\n");
1711                return -EINVAL;
1712        }
1713
1714        intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1715                     cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
1716                     &pt->tc));
1717
1718        ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
1719        if (ret <= 0)
1720                return ret;
1721
1722        return machine__set_current_tid(pt->machine, cpu, pid, tid);
1723}
1724
1725static int intel_pt_process_itrace_start(struct intel_pt *pt,
1726                                         union perf_event *event,
1727                                         struct perf_sample *sample)
1728{
1729        if (!pt->per_cpu_mmaps)
1730                return 0;
1731
1732        intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
1733                     sample->cpu, event->itrace_start.pid,
1734                     event->itrace_start.tid, sample->time,
1735                     perf_time_to_tsc(sample->time, &pt->tc));
1736
1737        return machine__set_current_tid(pt->machine, sample->cpu,
1738                                        event->itrace_start.pid,
1739                                        event->itrace_start.tid);
1740}
1741
1742static int intel_pt_process_event(struct perf_session *session,
1743                                  union perf_event *event,
1744                                  struct perf_sample *sample,
1745                                  struct perf_tool *tool)
1746{
1747        struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1748                                           auxtrace);
1749        u64 timestamp;
1750        int err = 0;
1751
1752        if (dump_trace)
1753                return 0;
1754
1755        if (!tool->ordered_events) {
1756                pr_err("Intel Processor Trace requires ordered events\n");
1757                return -EINVAL;
1758        }
1759
1760        if (sample->time && sample->time != (u64)-1)
1761                timestamp = perf_time_to_tsc(sample->time, &pt->tc);
1762        else
1763                timestamp = 0;
1764
1765        if (timestamp || pt->timeless_decoding) {
1766                err = intel_pt_update_queues(pt);
1767                if (err)
1768                        return err;
1769        }
1770
1771        if (pt->timeless_decoding) {
1772                if (event->header.type == PERF_RECORD_EXIT) {
1773                        err = intel_pt_process_timeless_queues(pt,
1774                                                               event->fork.tid,
1775                                                               sample->time);
1776                }
1777        } else if (timestamp) {
1778                err = intel_pt_process_queues(pt, timestamp);
1779        }
1780        if (err)
1781                return err;
1782
1783        if (event->header.type == PERF_RECORD_AUX &&
1784            (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
1785            pt->synth_opts.errors) {
1786                err = intel_pt_lost(pt, sample);
1787                if (err)
1788                        return err;
1789        }
1790
1791        if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
1792                err = intel_pt_process_switch(pt, sample);
1793        else if (event->header.type == PERF_RECORD_ITRACE_START)
1794                err = intel_pt_process_itrace_start(pt, event, sample);
1795        else if (event->header.type == PERF_RECORD_SWITCH ||
1796                 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
1797                err = intel_pt_context_switch(pt, event, sample);
1798
1799        intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
1800                     perf_event__name(event->header.type), event->header.type,
1801                     sample->cpu, sample->time, timestamp);
1802
1803        return err;
1804}
1805
1806static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
1807{
1808        struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1809                                           auxtrace);
1810        int ret;
1811
1812        if (dump_trace)
1813                return 0;
1814
1815        if (!tool->ordered_events)
1816                return -EINVAL;
1817
1818        ret = intel_pt_update_queues(pt);
1819        if (ret < 0)
1820                return ret;
1821
1822        if (pt->timeless_decoding)
1823                return intel_pt_process_timeless_queues(pt, -1,
1824                                                        MAX_TIMESTAMP - 1);
1825
1826        return intel_pt_process_queues(pt, MAX_TIMESTAMP);
1827}
1828
1829static void intel_pt_free_events(struct perf_session *session)
1830{
1831        struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1832                                           auxtrace);
1833        struct auxtrace_queues *queues = &pt->queues;
1834        unsigned int i;
1835
1836        for (i = 0; i < queues->nr_queues; i++) {
1837                intel_pt_free_queue(queues->queue_array[i].priv);
1838                queues->queue_array[i].priv = NULL;
1839        }
1840        intel_pt_log_disable();
1841        auxtrace_queues__free(queues);
1842}
1843
1844static void intel_pt_free(struct perf_session *session)
1845{
1846        struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1847                                           auxtrace);
1848
1849        auxtrace_heap__free(&pt->heap);
1850        intel_pt_free_events(session);
1851        session->auxtrace = NULL;
1852        thread__put(pt->unknown_thread);
1853        addr_filters__exit(&pt->filts);
1854        zfree(&pt->filter);
1855        free(pt);
1856}
1857
1858static int intel_pt_process_auxtrace_event(struct perf_session *session,
1859                                           union perf_event *event,
1860                                           struct perf_tool *tool __maybe_unused)
1861{
1862        struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
1863                                           auxtrace);
1864
1865        if (pt->sampling_mode)
1866                return 0;
1867
1868        if (!pt->data_queued) {
1869                struct auxtrace_buffer *buffer;
1870                off_t data_offset;
1871                int fd = perf_data_file__fd(session->file);
1872                int err;
1873
1874                if (perf_data_file__is_pipe(session->file)) {
1875                        data_offset = 0;
1876                } else {
1877                        data_offset = lseek(fd, 0, SEEK_CUR);
1878                        if (data_offset == -1)
1879                                return -errno;
1880                }
1881
1882                err = auxtrace_queues__add_event(&pt->queues, session, event,
1883                                                 data_offset, &buffer);
1884                if (err)
1885                        return err;
1886
1887                /* Dump here now we have copied a piped trace out of the pipe */
1888                if (dump_trace) {
1889                        if (auxtrace_buffer__get_data(buffer, fd)) {
1890                                intel_pt_dump_event(pt, buffer->data,
1891                                                    buffer->size);
1892                                auxtrace_buffer__put_data(buffer);
1893                        }
1894                }
1895        }
1896
1897        return 0;
1898}
1899
1900struct intel_pt_synth {
1901        struct perf_tool dummy_tool;
1902        struct perf_session *session;
1903};
1904
1905static int intel_pt_event_synth(struct perf_tool *tool,
1906                                union perf_event *event,
1907                                struct perf_sample *sample __maybe_unused,
1908                                struct machine *machine __maybe_unused)
1909{
1910        struct intel_pt_synth *intel_pt_synth =
1911                        container_of(tool, struct intel_pt_synth, dummy_tool);
1912
1913        return perf_session__deliver_synth_event(intel_pt_synth->session, event,
1914                                                 NULL);
1915}
1916
1917static int intel_pt_synth_event(struct perf_session *session,
1918                                struct perf_event_attr *attr, u64 id)
1919{
1920        struct intel_pt_synth intel_pt_synth;
1921
1922        memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
1923        intel_pt_synth.session = session;
1924
1925        return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
1926                                           &id, intel_pt_event_synth);
1927}
1928
1929static int intel_pt_synth_events(struct intel_pt *pt,
1930                                 struct perf_session *session)
1931{
1932        struct perf_evlist *evlist = session->evlist;
1933        struct perf_evsel *evsel;
1934        struct perf_event_attr attr;
1935        bool found = false;
1936        u64 id;
1937        int err;
1938
1939        evlist__for_each_entry(evlist, evsel) {
1940                if (evsel->attr.type == pt->pmu_type && evsel->ids) {
1941                        found = true;
1942                        break;
1943                }
1944        }
1945
1946        if (!found) {
1947                pr_debug("There are no selected events with Intel Processor Trace data\n");
1948                return 0;
1949        }
1950
1951        memset(&attr, 0, sizeof(struct perf_event_attr));
1952        attr.size = sizeof(struct perf_event_attr);
1953        attr.type = PERF_TYPE_HARDWARE;
1954        attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
1955        attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1956                            PERF_SAMPLE_PERIOD;
1957        if (pt->timeless_decoding)
1958                attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1959        else
1960                attr.sample_type |= PERF_SAMPLE_TIME;
1961        if (!pt->per_cpu_mmaps)
1962                attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
1963        attr.exclude_user = evsel->attr.exclude_user;
1964        attr.exclude_kernel = evsel->attr.exclude_kernel;
1965        attr.exclude_hv = evsel->attr.exclude_hv;
1966        attr.exclude_host = evsel->attr.exclude_host;
1967        attr.exclude_guest = evsel->attr.exclude_guest;
1968        attr.sample_id_all = evsel->attr.sample_id_all;
1969        attr.read_format = evsel->attr.read_format;
1970
1971        id = evsel->id[0] + 1000000000;
1972        if (!id)
1973                id = 1;
1974
1975        if (pt->synth_opts.instructions) {
1976                attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1977                if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
1978                        attr.sample_period =
1979                                intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
1980                else
1981                        attr.sample_period = pt->synth_opts.period;
1982                pt->instructions_sample_period = attr.sample_period;
1983                if (pt->synth_opts.callchain)
1984                        attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
1985                if (pt->synth_opts.last_branch)
1986                        attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1987                pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
1988                         id, (u64)attr.sample_type);
1989                err = intel_pt_synth_event(session, &attr, id);
1990                if (err) {
1991                        pr_err("%s: failed to synthesize 'instructions' event type\n",
1992                               __func__);
1993                        return err;
1994                }
1995                pt->sample_instructions = true;
1996                pt->instructions_sample_type = attr.sample_type;
1997                pt->instructions_id = id;
1998                id += 1;
1999        }
2000
2001        if (pt->synth_opts.transactions) {
2002                attr.config = PERF_COUNT_HW_INSTRUCTIONS;
2003                attr.sample_period = 1;
2004                if (pt->synth_opts.callchain)
2005                        attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
2006                if (pt->synth_opts.last_branch)
2007                        attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
2008                pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2009                         id, (u64)attr.sample_type);
2010                err = intel_pt_synth_event(session, &attr, id);
2011                if (err) {
2012                        pr_err("%s: failed to synthesize 'transactions' event type\n",
2013                               __func__);
2014                        return err;
2015                }
2016                pt->sample_transactions = true;
2017                pt->transactions_id = id;
2018                id += 1;
2019                evlist__for_each_entry(evlist, evsel) {
2020                        if (evsel->id && evsel->id[0] == pt->transactions_id) {
2021                                if (evsel->name)
2022                                        zfree(&evsel->name);
2023                                evsel->name = strdup("transactions");
2024                                break;
2025                        }
2026                }
2027        }
2028
2029        if (pt->synth_opts.branches) {
2030                attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
2031                attr.sample_period = 1;
2032                attr.sample_type |= PERF_SAMPLE_ADDR;
2033                attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
2034                attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
2035                pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
2036                         id, (u64)attr.sample_type);
2037                err = intel_pt_synth_event(session, &attr, id);
2038                if (err) {
2039                        pr_err("%s: failed to synthesize 'branches' event type\n",
2040                               __func__);
2041                        return err;
2042                }
2043                pt->sample_branches = true;
2044                pt->branches_sample_type = attr.sample_type;
2045                pt->branches_id = id;
2046        }
2047
2048        pt->synth_needs_swap = evsel->needs_swap;
2049
2050        return 0;
2051}
2052
2053static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
2054{
2055        struct perf_evsel *evsel;
2056
2057        evlist__for_each_entry_reverse(evlist, evsel) {
2058                const char *name = perf_evsel__name(evsel);
2059
2060                if (!strcmp(name, "sched:sched_switch"))
2061                        return evsel;
2062        }
2063
2064        return NULL;
2065}
2066
2067static bool intel_pt_find_switch(struct perf_evlist *evlist)
2068{
2069        struct perf_evsel *evsel;
2070
2071        evlist__for_each_entry(evlist, evsel) {
2072                if (evsel->attr.context_switch)
2073                        return true;
2074        }
2075
2076        return false;
2077}
2078
2079static int intel_pt_perf_config(const char *var, const char *value, void *data)
2080{
2081        struct intel_pt *pt = data;
2082
2083        if (!strcmp(var, "intel-pt.mispred-all"))
2084                pt->mispred_all = perf_config_bool(var, value);
2085
2086        return 0;
2087}
2088
2089static const char * const intel_pt_info_fmts[] = {
2090        [INTEL_PT_PMU_TYPE]             = "  PMU Type            %"PRId64"\n",
2091        [INTEL_PT_TIME_SHIFT]           = "  Time Shift          %"PRIu64"\n",
2092        [INTEL_PT_TIME_MULT]            = "  Time Muliplier      %"PRIu64"\n",
2093        [INTEL_PT_TIME_ZERO]            = "  Time Zero           %"PRIu64"\n",
2094        [INTEL_PT_CAP_USER_TIME_ZERO]   = "  Cap Time Zero       %"PRId64"\n",
2095        [INTEL_PT_TSC_BIT]              = "  TSC bit             %#"PRIx64"\n",
2096        [INTEL_PT_NORETCOMP_BIT]        = "  NoRETComp bit       %#"PRIx64"\n",
2097        [INTEL_PT_HAVE_SCHED_SWITCH]    = "  Have sched_switch   %"PRId64"\n",
2098        [INTEL_PT_SNAPSHOT_MODE]        = "  Snapshot mode       %"PRId64"\n",
2099        [INTEL_PT_PER_CPU_MMAPS]        = "  Per-cpu maps        %"PRId64"\n",
2100        [INTEL_PT_MTC_BIT]              = "  MTC bit             %#"PRIx64"\n",
2101        [INTEL_PT_TSC_CTC_N]            = "  TSC:CTC numerator   %"PRIu64"\n",
2102        [INTEL_PT_TSC_CTC_D]            = "  TSC:CTC denominator %"PRIu64"\n",
2103        [INTEL_PT_CYC_BIT]              = "  CYC bit             %#"PRIx64"\n",
2104        [INTEL_PT_MAX_NONTURBO_RATIO]   = "  Max non-turbo ratio %"PRIu64"\n",
2105        [INTEL_PT_FILTER_STR_LEN]       = "  Filter string len.  %"PRIu64"\n",
2106};
2107
2108static void intel_pt_print_info(u64 *arr, int start, int finish)
2109{
2110        int i;
2111
2112        if (!dump_trace)
2113                return;
2114
2115        for (i = start; i <= finish; i++)
2116                fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
2117}
2118
2119static void intel_pt_print_info_str(const char *name, const char *str)
2120{
2121        if (!dump_trace)
2122                return;
2123
2124        fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
2125}
2126
2127static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
2128{
2129        return auxtrace_info->header.size >=
2130                sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
2131}
2132
2133int intel_pt_process_auxtrace_info(union perf_event *event,
2134                                   struct perf_session *session)
2135{
2136        struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
2137        size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
2138        struct intel_pt *pt;
2139        void *info_end;
2140        u64 *info;
2141        int err;
2142
2143        if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
2144                                        min_sz)
2145                return -EINVAL;
2146
2147        pt = zalloc(sizeof(struct intel_pt));
2148        if (!pt)
2149                return -ENOMEM;
2150
2151        addr_filters__init(&pt->filts);
2152
2153        perf_config(intel_pt_perf_config, pt);
2154
2155        err = auxtrace_queues__init(&pt->queues);
2156        if (err)
2157                goto err_free;
2158
2159        intel_pt_log_set_name(INTEL_PT_PMU_NAME);
2160
2161        pt->session = session;
2162        pt->machine = &session->machines.host; /* No kvm support */
2163        pt->auxtrace_type = auxtrace_info->type;
2164        pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
2165        pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
2166        pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
2167        pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
2168        pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
2169        pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
2170        pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
2171        pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
2172        pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
2173        pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
2174        intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
2175                            INTEL_PT_PER_CPU_MMAPS);
2176
2177        if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
2178                pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
2179                pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
2180                pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
2181                pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
2182                pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
2183                intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
2184                                    INTEL_PT_CYC_BIT);
2185        }
2186
2187        if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
2188                pt->max_non_turbo_ratio =
2189                        auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
2190                intel_pt_print_info(&auxtrace_info->priv[0],
2191                                    INTEL_PT_MAX_NONTURBO_RATIO,
2192                                    INTEL_PT_MAX_NONTURBO_RATIO);
2193        }
2194
2195        info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
2196        info_end = (void *)info + auxtrace_info->header.size;
2197
2198        if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
2199                size_t len;
2200
2201                len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
2202                intel_pt_print_info(&auxtrace_info->priv[0],
2203                                    INTEL_PT_FILTER_STR_LEN,
2204                                    INTEL_PT_FILTER_STR_LEN);
2205                if (len) {
2206                        const char *filter = (const char *)info;
2207
2208                        len = roundup(len + 1, 8);
2209                        info += len >> 3;
2210                        if ((void *)info > info_end) {
2211                                pr_err("%s: bad filter string length\n", __func__);
2212                                err = -EINVAL;
2213                                goto err_free_queues;
2214                        }
2215                        pt->filter = memdup(filter, len);
2216                        if (!pt->filter) {
2217                                err = -ENOMEM;
2218                                goto err_free_queues;
2219                        }
2220                        if (session->header.needs_swap)
2221                                mem_bswap_64(pt->filter, len);
2222                        if (pt->filter[len - 1]) {
2223                                pr_err("%s: filter string not null terminated\n", __func__);
2224                                err = -EINVAL;
2225                                goto err_free_queues;
2226                        }
2227                        err = addr_filters__parse_bare_filter(&pt->filts,
2228                                                              filter);
2229                        if (err)
2230                                goto err_free_queues;
2231                }
2232                intel_pt_print_info_str("Filter string", pt->filter);
2233        }
2234
2235        pt->timeless_decoding = intel_pt_timeless_decoding(pt);
2236        pt->have_tsc = intel_pt_have_tsc(pt);
2237        pt->sampling_mode = false;
2238        pt->est_tsc = !pt->timeless_decoding;
2239
2240        pt->unknown_thread = thread__new(999999999, 999999999);
2241        if (!pt->unknown_thread) {
2242                err = -ENOMEM;
2243                goto err_free_queues;
2244        }
2245
2246        /*
2247         * Since this thread will not be kept in any rbtree not in a
2248         * list, initialize its list node so that at thread__put() the
2249         * current thread lifetime assuption is kept and we don't segfault
2250         * at list_del_init().
2251         */
2252        INIT_LIST_HEAD(&pt->unknown_thread->node);
2253
2254        err = thread__set_comm(pt->unknown_thread, "unknown", 0);
2255        if (err)
2256                goto err_delete_thread;
2257        if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
2258                err = -ENOMEM;
2259                goto err_delete_thread;
2260        }
2261
2262        pt->auxtrace.process_event = intel_pt_process_event;
2263        pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
2264        pt->auxtrace.flush_events = intel_pt_flush;
2265        pt->auxtrace.free_events = intel_pt_free_events;
2266        pt->auxtrace.free = intel_pt_free;
2267        session->auxtrace = &pt->auxtrace;
2268
2269        if (dump_trace)
2270                return 0;
2271
2272        if (pt->have_sched_switch == 1) {
2273                pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
2274                if (!pt->switch_evsel) {
2275                        pr_err("%s: missing sched_switch event\n", __func__);
2276                        err = -EINVAL;
2277                        goto err_delete_thread;
2278                }
2279        } else if (pt->have_sched_switch == 2 &&
2280                   !intel_pt_find_switch(session->evlist)) {
2281                pr_err("%s: missing context_switch attribute flag\n", __func__);
2282                err = -EINVAL;
2283                goto err_delete_thread;
2284        }
2285
2286        if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
2287                pt->synth_opts = *session->itrace_synth_opts;
2288        } else {
2289                itrace_synth_opts__set_default(&pt->synth_opts);
2290                if (use_browser != -1) {
2291                        pt->synth_opts.branches = false;
2292                        pt->synth_opts.callchain = true;
2293                }
2294                if (session->itrace_synth_opts)
2295                        pt->synth_opts.thread_stack =
2296                                session->itrace_synth_opts->thread_stack;
2297        }
2298
2299        if (pt->synth_opts.log)
2300                intel_pt_log_enable();
2301
2302        /* Maximum non-turbo ratio is TSC freq / 100 MHz */
2303        if (pt->tc.time_mult) {
2304                u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
2305
2306                if (!pt->max_non_turbo_ratio)
2307                        pt->max_non_turbo_ratio =
2308                                        (tsc_freq + 50000000) / 100000000;
2309                intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
2310                intel_pt_log("Maximum non-turbo ratio %u\n",
2311                             pt->max_non_turbo_ratio);
2312        }
2313
2314        if (pt->synth_opts.calls)
2315                pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
2316                                       PERF_IP_FLAG_TRACE_END;
2317        if (pt->synth_opts.returns)
2318                pt->branches_filter |= PERF_IP_FLAG_RETURN |
2319                                       PERF_IP_FLAG_TRACE_BEGIN;
2320
2321        if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
2322                symbol_conf.use_callchain = true;
2323                if (callchain_register_param(&callchain_param) < 0) {
2324                        symbol_conf.use_callchain = false;
2325                        pt->synth_opts.callchain = false;
2326                }
2327        }
2328
2329        err = intel_pt_synth_events(pt, session);
2330        if (err)
2331                goto err_delete_thread;
2332
2333        err = auxtrace_queues__process_index(&pt->queues, session);
2334        if (err)
2335                goto err_delete_thread;
2336
2337        if (pt->queues.populated)
2338                pt->data_queued = true;
2339
2340        if (pt->timeless_decoding)
2341                pr_debug2("Intel PT decoding without timestamps\n");
2342
2343        return 0;
2344
2345err_delete_thread:
2346        thread__zput(pt->unknown_thread);
2347err_free_queues:
2348        intel_pt_log_disable();
2349        auxtrace_queues__free(&pt->queues);
2350        session->auxtrace = NULL;
2351err_free:
2352        addr_filters__exit(&pt->filts);
2353        zfree(&pt->filter);
2354        free(pt);
2355        return err;
2356}
2357