linux/tools/perf/util/arm-spe.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Arm Statistical Profiling Extensions (SPE) support
   4 * Copyright (c) 2017-2018, Arm Ltd.
   5 */
   6
   7#include <byteswap.h>
   8#include <endian.h>
   9#include <errno.h>
  10#include <inttypes.h>
  11#include <linux/bitops.h>
  12#include <linux/kernel.h>
  13#include <linux/log2.h>
  14#include <linux/types.h>
  15#include <linux/zalloc.h>
  16#include <stdlib.h>
  17#include <unistd.h>
  18
  19#include "auxtrace.h"
  20#include "color.h"
  21#include "debug.h"
  22#include "evlist.h"
  23#include "evsel.h"
  24#include "machine.h"
  25#include "session.h"
  26#include "symbol.h"
  27#include "thread.h"
  28#include "thread-stack.h"
  29#include "tsc.h"
  30#include "tool.h"
  31#include "util/synthetic-events.h"
  32
  33#include "arm-spe.h"
  34#include "arm-spe-decoder/arm-spe-decoder.h"
  35#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
  36
  37#define MAX_TIMESTAMP (~0ULL)
  38
  39struct arm_spe {
  40        struct auxtrace                 auxtrace;
  41        struct auxtrace_queues          queues;
  42        struct auxtrace_heap            heap;
  43        struct itrace_synth_opts        synth_opts;
  44        u32                             auxtrace_type;
  45        struct perf_session             *session;
  46        struct machine                  *machine;
  47        u32                             pmu_type;
  48
  49        struct perf_tsc_conversion      tc;
  50
  51        u8                              timeless_decoding;
  52        u8                              data_queued;
  53
  54        u8                              sample_flc;
  55        u8                              sample_llc;
  56        u8                              sample_tlb;
  57        u8                              sample_branch;
  58        u8                              sample_remote_access;
  59        u8                              sample_memory;
  60
  61        u64                             l1d_miss_id;
  62        u64                             l1d_access_id;
  63        u64                             llc_miss_id;
  64        u64                             llc_access_id;
  65        u64                             tlb_miss_id;
  66        u64                             tlb_access_id;
  67        u64                             branch_miss_id;
  68        u64                             remote_access_id;
  69        u64                             memory_id;
  70
  71        u64                             kernel_start;
  72
  73        unsigned long                   num_events;
  74};
  75
  76struct arm_spe_queue {
  77        struct arm_spe                  *spe;
  78        unsigned int                    queue_nr;
  79        struct auxtrace_buffer          *buffer;
  80        struct auxtrace_buffer          *old_buffer;
  81        union perf_event                *event_buf;
  82        bool                            on_heap;
  83        bool                            done;
  84        pid_t                           pid;
  85        pid_t                           tid;
  86        int                             cpu;
  87        struct arm_spe_decoder          *decoder;
  88        u64                             time;
  89        u64                             timestamp;
  90        struct thread                   *thread;
  91};
  92
  93static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
  94                         unsigned char *buf, size_t len)
  95{
  96        struct arm_spe_pkt packet;
  97        size_t pos = 0;
  98        int ret, pkt_len, i;
  99        char desc[ARM_SPE_PKT_DESC_MAX];
 100        const char *color = PERF_COLOR_BLUE;
 101
 102        color_fprintf(stdout, color,
 103                      ". ... ARM SPE data: size %zu bytes\n",
 104                      len);
 105
 106        while (len) {
 107                ret = arm_spe_get_packet(buf, len, &packet);
 108                if (ret > 0)
 109                        pkt_len = ret;
 110                else
 111                        pkt_len = 1;
 112                printf(".");
 113                color_fprintf(stdout, color, "  %08x: ", pos);
 114                for (i = 0; i < pkt_len; i++)
 115                        color_fprintf(stdout, color, " %02x", buf[i]);
 116                for (; i < 16; i++)
 117                        color_fprintf(stdout, color, "   ");
 118                if (ret > 0) {
 119                        ret = arm_spe_pkt_desc(&packet, desc,
 120                                               ARM_SPE_PKT_DESC_MAX);
 121                        if (!ret)
 122                                color_fprintf(stdout, color, " %s\n", desc);
 123                } else {
 124                        color_fprintf(stdout, color, " Bad packet!\n");
 125                }
 126                pos += pkt_len;
 127                buf += pkt_len;
 128                len -= pkt_len;
 129        }
 130}
 131
 132static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
 133                               size_t len)
 134{
 135        printf(".\n");
 136        arm_spe_dump(spe, buf, len);
 137}
 138
 139static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
 140{
 141        struct arm_spe_queue *speq = data;
 142        struct auxtrace_buffer *buffer = speq->buffer;
 143        struct auxtrace_buffer *old_buffer = speq->old_buffer;
 144        struct auxtrace_queue *queue;
 145
 146        queue = &speq->spe->queues.queue_array[speq->queue_nr];
 147
 148        buffer = auxtrace_buffer__next(queue, buffer);
 149        /* If no more data, drop the previous auxtrace_buffer and return */
 150        if (!buffer) {
 151                if (old_buffer)
 152                        auxtrace_buffer__drop_data(old_buffer);
 153                b->len = 0;
 154                return 0;
 155        }
 156
 157        speq->buffer = buffer;
 158
 159        /* If the aux_buffer doesn't have data associated, try to load it */
 160        if (!buffer->data) {
 161                /* get the file desc associated with the perf data file */
 162                int fd = perf_data__fd(speq->spe->session->data);
 163
 164                buffer->data = auxtrace_buffer__get_data(buffer, fd);
 165                if (!buffer->data)
 166                        return -ENOMEM;
 167        }
 168
 169        b->len = buffer->size;
 170        b->buf = buffer->data;
 171
 172        if (b->len) {
 173                if (old_buffer)
 174                        auxtrace_buffer__drop_data(old_buffer);
 175                speq->old_buffer = buffer;
 176        } else {
 177                auxtrace_buffer__drop_data(buffer);
 178                return arm_spe_get_trace(b, data);
 179        }
 180
 181        return 0;
 182}
 183
 184static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
 185                unsigned int queue_nr)
 186{
 187        struct arm_spe_params params = { .get_trace = 0, };
 188        struct arm_spe_queue *speq;
 189
 190        speq = zalloc(sizeof(*speq));
 191        if (!speq)
 192                return NULL;
 193
 194        speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 195        if (!speq->event_buf)
 196                goto out_free;
 197
 198        speq->spe = spe;
 199        speq->queue_nr = queue_nr;
 200        speq->pid = -1;
 201        speq->tid = -1;
 202        speq->cpu = -1;
 203
 204        /* params set */
 205        params.get_trace = arm_spe_get_trace;
 206        params.data = speq;
 207
 208        /* create new decoder */
 209        speq->decoder = arm_spe_decoder_new(&params);
 210        if (!speq->decoder)
 211                goto out_free;
 212
 213        return speq;
 214
 215out_free:
 216        zfree(&speq->event_buf);
 217        free(speq);
 218
 219        return NULL;
 220}
 221
 222static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
 223{
 224        return ip >= spe->kernel_start ?
 225                PERF_RECORD_MISC_KERNEL :
 226                PERF_RECORD_MISC_USER;
 227}
 228
 229static void arm_spe_prep_sample(struct arm_spe *spe,
 230                                struct arm_spe_queue *speq,
 231                                union perf_event *event,
 232                                struct perf_sample *sample)
 233{
 234        struct arm_spe_record *record = &speq->decoder->record;
 235
 236        if (!spe->timeless_decoding)
 237                sample->time = tsc_to_perf_time(record->timestamp, &spe->tc);
 238
 239        sample->ip = record->from_ip;
 240        sample->cpumode = arm_spe_cpumode(spe, sample->ip);
 241        sample->pid = speq->pid;
 242        sample->tid = speq->tid;
 243        sample->period = 1;
 244        sample->cpu = speq->cpu;
 245
 246        event->sample.header.type = PERF_RECORD_SAMPLE;
 247        event->sample.header.misc = sample->cpumode;
 248        event->sample.header.size = sizeof(struct perf_event_header);
 249}
 250
 251static inline int
 252arm_spe_deliver_synth_event(struct arm_spe *spe,
 253                            struct arm_spe_queue *speq __maybe_unused,
 254                            union perf_event *event,
 255                            struct perf_sample *sample)
 256{
 257        int ret;
 258
 259        ret = perf_session__deliver_synth_event(spe->session, event, sample);
 260        if (ret)
 261                pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
 262
 263        return ret;
 264}
 265
 266static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
 267                                     u64 spe_events_id, u64 data_src)
 268{
 269        struct arm_spe *spe = speq->spe;
 270        struct arm_spe_record *record = &speq->decoder->record;
 271        union perf_event *event = speq->event_buf;
 272        struct perf_sample sample = { .ip = 0, };
 273
 274        arm_spe_prep_sample(spe, speq, event, &sample);
 275
 276        sample.id = spe_events_id;
 277        sample.stream_id = spe_events_id;
 278        sample.addr = record->virt_addr;
 279        sample.phys_addr = record->phys_addr;
 280        sample.data_src = data_src;
 281
 282        return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 283}
 284
 285static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
 286                                        u64 spe_events_id)
 287{
 288        struct arm_spe *spe = speq->spe;
 289        struct arm_spe_record *record = &speq->decoder->record;
 290        union perf_event *event = speq->event_buf;
 291        struct perf_sample sample = { .ip = 0, };
 292
 293        arm_spe_prep_sample(spe, speq, event, &sample);
 294
 295        sample.id = spe_events_id;
 296        sample.stream_id = spe_events_id;
 297        sample.addr = record->to_ip;
 298
 299        return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 300}
 301
 302#define SPE_MEM_TYPE    (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
 303                         ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
 304                         ARM_SPE_REMOTE_ACCESS)
 305
 306static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
 307{
 308        if (type & SPE_MEM_TYPE)
 309                return true;
 310
 311        return false;
 312}
 313
 314static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
 315{
 316        union perf_mem_data_src data_src = { 0 };
 317
 318        if (record->op == ARM_SPE_LD)
 319                data_src.mem_op = PERF_MEM_OP_LOAD;
 320        else
 321                data_src.mem_op = PERF_MEM_OP_STORE;
 322
 323        if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
 324                data_src.mem_lvl = PERF_MEM_LVL_L3;
 325
 326                if (record->type & ARM_SPE_LLC_MISS)
 327                        data_src.mem_lvl |= PERF_MEM_LVL_MISS;
 328                else
 329                        data_src.mem_lvl |= PERF_MEM_LVL_HIT;
 330        } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
 331                data_src.mem_lvl = PERF_MEM_LVL_L1;
 332
 333                if (record->type & ARM_SPE_L1D_MISS)
 334                        data_src.mem_lvl |= PERF_MEM_LVL_MISS;
 335                else
 336                        data_src.mem_lvl |= PERF_MEM_LVL_HIT;
 337        }
 338
 339        if (record->type & ARM_SPE_REMOTE_ACCESS)
 340                data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
 341
 342        if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
 343                data_src.mem_dtlb = PERF_MEM_TLB_WK;
 344
 345                if (record->type & ARM_SPE_TLB_MISS)
 346                        data_src.mem_dtlb |= PERF_MEM_TLB_MISS;
 347                else
 348                        data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
 349        }
 350
 351        return data_src.val;
 352}
 353
 354static int arm_spe_sample(struct arm_spe_queue *speq)
 355{
 356        const struct arm_spe_record *record = &speq->decoder->record;
 357        struct arm_spe *spe = speq->spe;
 358        u64 data_src;
 359        int err;
 360
 361        data_src = arm_spe__synth_data_source(record);
 362
 363        if (spe->sample_flc) {
 364                if (record->type & ARM_SPE_L1D_MISS) {
 365                        err = arm_spe__synth_mem_sample(speq, spe->l1d_miss_id,
 366                                                        data_src);
 367                        if (err)
 368                                return err;
 369                }
 370
 371                if (record->type & ARM_SPE_L1D_ACCESS) {
 372                        err = arm_spe__synth_mem_sample(speq, spe->l1d_access_id,
 373                                                        data_src);
 374                        if (err)
 375                                return err;
 376                }
 377        }
 378
 379        if (spe->sample_llc) {
 380                if (record->type & ARM_SPE_LLC_MISS) {
 381                        err = arm_spe__synth_mem_sample(speq, spe->llc_miss_id,
 382                                                        data_src);
 383                        if (err)
 384                                return err;
 385                }
 386
 387                if (record->type & ARM_SPE_LLC_ACCESS) {
 388                        err = arm_spe__synth_mem_sample(speq, spe->llc_access_id,
 389                                                        data_src);
 390                        if (err)
 391                                return err;
 392                }
 393        }
 394
 395        if (spe->sample_tlb) {
 396                if (record->type & ARM_SPE_TLB_MISS) {
 397                        err = arm_spe__synth_mem_sample(speq, spe->tlb_miss_id,
 398                                                        data_src);
 399                        if (err)
 400                                return err;
 401                }
 402
 403                if (record->type & ARM_SPE_TLB_ACCESS) {
 404                        err = arm_spe__synth_mem_sample(speq, spe->tlb_access_id,
 405                                                        data_src);
 406                        if (err)
 407                                return err;
 408                }
 409        }
 410
 411        if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
 412                err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id);
 413                if (err)
 414                        return err;
 415        }
 416
 417        if (spe->sample_remote_access &&
 418            (record->type & ARM_SPE_REMOTE_ACCESS)) {
 419                err = arm_spe__synth_mem_sample(speq, spe->remote_access_id,
 420                                                data_src);
 421                if (err)
 422                        return err;
 423        }
 424
 425        if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
 426                err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
 427                if (err)
 428                        return err;
 429        }
 430
 431        return 0;
 432}
 433
 434static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
 435{
 436        struct arm_spe *spe = speq->spe;
 437        struct arm_spe_record *record;
 438        int ret;
 439
 440        if (!spe->kernel_start)
 441                spe->kernel_start = machine__kernel_start(spe->machine);
 442
 443        while (1) {
 444                /*
 445                 * The usual logic is firstly to decode the packets, and then
 446                 * based the record to synthesize sample; but here the flow is
 447                 * reversed: it calls arm_spe_sample() for synthesizing samples
 448                 * prior to arm_spe_decode().
 449                 *
 450                 * Two reasons for this code logic:
 451                 * 1. Firstly, when setup queue in arm_spe__setup_queue(), it
 452                 * has decoded trace data and generated a record, but the record
 453                 * is left to generate sample until run to here, so it's correct
 454                 * to synthesize sample for the left record.
 455                 * 2. After decoding trace data, it needs to compare the record
 456                 * timestamp with the coming perf event, if the record timestamp
 457                 * is later than the perf event, it needs bail out and pushs the
 458                 * record into auxtrace heap, thus the record can be deferred to
 459                 * synthesize sample until run to here at the next time; so this
 460                 * can correlate samples between Arm SPE trace data and other
 461                 * perf events with correct time ordering.
 462                 */
 463                ret = arm_spe_sample(speq);
 464                if (ret)
 465                        return ret;
 466
 467                ret = arm_spe_decode(speq->decoder);
 468                if (!ret) {
 469                        pr_debug("No data or all data has been processed.\n");
 470                        return 1;
 471                }
 472
 473                /*
 474                 * Error is detected when decode SPE trace data, continue to
 475                 * the next trace data and find out more records.
 476                 */
 477                if (ret < 0)
 478                        continue;
 479
 480                record = &speq->decoder->record;
 481
 482                /* Update timestamp for the last record */
 483                if (record->timestamp > speq->timestamp)
 484                        speq->timestamp = record->timestamp;
 485
 486                /*
 487                 * If the timestamp of the queue is later than timestamp of the
 488                 * coming perf event, bail out so can allow the perf event to
 489                 * be processed ahead.
 490                 */
 491                if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
 492                        *timestamp = speq->timestamp;
 493                        return 0;
 494                }
 495        }
 496
 497        return 0;
 498}
 499
 500static int arm_spe__setup_queue(struct arm_spe *spe,
 501                               struct auxtrace_queue *queue,
 502                               unsigned int queue_nr)
 503{
 504        struct arm_spe_queue *speq = queue->priv;
 505        struct arm_spe_record *record;
 506
 507        if (list_empty(&queue->head) || speq)
 508                return 0;
 509
 510        speq = arm_spe__alloc_queue(spe, queue_nr);
 511
 512        if (!speq)
 513                return -ENOMEM;
 514
 515        queue->priv = speq;
 516
 517        if (queue->cpu != -1)
 518                speq->cpu = queue->cpu;
 519
 520        if (!speq->on_heap) {
 521                int ret;
 522
 523                if (spe->timeless_decoding)
 524                        return 0;
 525
 526retry:
 527                ret = arm_spe_decode(speq->decoder);
 528
 529                if (!ret)
 530                        return 0;
 531
 532                if (ret < 0)
 533                        goto retry;
 534
 535                record = &speq->decoder->record;
 536
 537                speq->timestamp = record->timestamp;
 538                ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
 539                if (ret)
 540                        return ret;
 541                speq->on_heap = true;
 542        }
 543
 544        return 0;
 545}
 546
 547static int arm_spe__setup_queues(struct arm_spe *spe)
 548{
 549        unsigned int i;
 550        int ret;
 551
 552        for (i = 0; i < spe->queues.nr_queues; i++) {
 553                ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
 554                if (ret)
 555                        return ret;
 556        }
 557
 558        return 0;
 559}
 560
 561static int arm_spe__update_queues(struct arm_spe *spe)
 562{
 563        if (spe->queues.new_data) {
 564                spe->queues.new_data = false;
 565                return arm_spe__setup_queues(spe);
 566        }
 567
 568        return 0;
 569}
 570
 571static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
 572{
 573        struct evsel *evsel;
 574        struct evlist *evlist = spe->session->evlist;
 575        bool timeless_decoding = true;
 576
 577        /*
 578         * Circle through the list of event and complain if we find one
 579         * with the time bit set.
 580         */
 581        evlist__for_each_entry(evlist, evsel) {
 582                if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
 583                        timeless_decoding = false;
 584        }
 585
 586        return timeless_decoding;
 587}
 588
 589static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
 590                                    struct auxtrace_queue *queue)
 591{
 592        struct arm_spe_queue *speq = queue->priv;
 593        pid_t tid;
 594
 595        tid = machine__get_current_tid(spe->machine, speq->cpu);
 596        if (tid != -1) {
 597                speq->tid = tid;
 598                thread__zput(speq->thread);
 599        } else
 600                speq->tid = queue->tid;
 601
 602        if ((!speq->thread) && (speq->tid != -1)) {
 603                speq->thread = machine__find_thread(spe->machine, -1,
 604                                                    speq->tid);
 605        }
 606
 607        if (speq->thread) {
 608                speq->pid = speq->thread->pid_;
 609                if (queue->cpu == -1)
 610                        speq->cpu = speq->thread->cpu;
 611        }
 612}
 613
 614static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
 615{
 616        unsigned int queue_nr;
 617        u64 ts;
 618        int ret;
 619
 620        while (1) {
 621                struct auxtrace_queue *queue;
 622                struct arm_spe_queue *speq;
 623
 624                if (!spe->heap.heap_cnt)
 625                        return 0;
 626
 627                if (spe->heap.heap_array[0].ordinal >= timestamp)
 628                        return 0;
 629
 630                queue_nr = spe->heap.heap_array[0].queue_nr;
 631                queue = &spe->queues.queue_array[queue_nr];
 632                speq = queue->priv;
 633
 634                auxtrace_heap__pop(&spe->heap);
 635
 636                if (spe->heap.heap_cnt) {
 637                        ts = spe->heap.heap_array[0].ordinal + 1;
 638                        if (ts > timestamp)
 639                                ts = timestamp;
 640                } else {
 641                        ts = timestamp;
 642                }
 643
 644                arm_spe_set_pid_tid_cpu(spe, queue);
 645
 646                ret = arm_spe_run_decoder(speq, &ts);
 647                if (ret < 0) {
 648                        auxtrace_heap__add(&spe->heap, queue_nr, ts);
 649                        return ret;
 650                }
 651
 652                if (!ret) {
 653                        ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
 654                        if (ret < 0)
 655                                return ret;
 656                } else {
 657                        speq->on_heap = false;
 658                }
 659        }
 660
 661        return 0;
 662}
 663
 664static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
 665                                            u64 time_)
 666{
 667        struct auxtrace_queues *queues = &spe->queues;
 668        unsigned int i;
 669        u64 ts = 0;
 670
 671        for (i = 0; i < queues->nr_queues; i++) {
 672                struct auxtrace_queue *queue = &spe->queues.queue_array[i];
 673                struct arm_spe_queue *speq = queue->priv;
 674
 675                if (speq && (tid == -1 || speq->tid == tid)) {
 676                        speq->time = time_;
 677                        arm_spe_set_pid_tid_cpu(spe, queue);
 678                        arm_spe_run_decoder(speq, &ts);
 679                }
 680        }
 681        return 0;
 682}
 683
 684static int arm_spe_process_event(struct perf_session *session,
 685                                 union perf_event *event,
 686                                 struct perf_sample *sample,
 687                                 struct perf_tool *tool)
 688{
 689        int err = 0;
 690        u64 timestamp;
 691        struct arm_spe *spe = container_of(session->auxtrace,
 692                        struct arm_spe, auxtrace);
 693
 694        if (dump_trace)
 695                return 0;
 696
 697        if (!tool->ordered_events) {
 698                pr_err("SPE trace requires ordered events\n");
 699                return -EINVAL;
 700        }
 701
 702        if (sample->time && (sample->time != (u64) -1))
 703                timestamp = perf_time_to_tsc(sample->time, &spe->tc);
 704        else
 705                timestamp = 0;
 706
 707        if (timestamp || spe->timeless_decoding) {
 708                err = arm_spe__update_queues(spe);
 709                if (err)
 710                        return err;
 711        }
 712
 713        if (spe->timeless_decoding) {
 714                if (event->header.type == PERF_RECORD_EXIT) {
 715                        err = arm_spe_process_timeless_queues(spe,
 716                                        event->fork.tid,
 717                                        sample->time);
 718                }
 719        } else if (timestamp) {
 720                err = arm_spe_process_queues(spe, timestamp);
 721        }
 722
 723        return err;
 724}
 725
 726static int arm_spe_process_auxtrace_event(struct perf_session *session,
 727                                          union perf_event *event,
 728                                          struct perf_tool *tool __maybe_unused)
 729{
 730        struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 731                                             auxtrace);
 732
 733        if (!spe->data_queued) {
 734                struct auxtrace_buffer *buffer;
 735                off_t data_offset;
 736                int fd = perf_data__fd(session->data);
 737                int err;
 738
 739                if (perf_data__is_pipe(session->data)) {
 740                        data_offset = 0;
 741                } else {
 742                        data_offset = lseek(fd, 0, SEEK_CUR);
 743                        if (data_offset == -1)
 744                                return -errno;
 745                }
 746
 747                err = auxtrace_queues__add_event(&spe->queues, session, event,
 748                                data_offset, &buffer);
 749                if (err)
 750                        return err;
 751
 752                /* Dump here now we have copied a piped trace out of the pipe */
 753                if (dump_trace) {
 754                        if (auxtrace_buffer__get_data(buffer, fd)) {
 755                                arm_spe_dump_event(spe, buffer->data,
 756                                                buffer->size);
 757                                auxtrace_buffer__put_data(buffer);
 758                        }
 759                }
 760        }
 761
 762        return 0;
 763}
 764
 765static int arm_spe_flush(struct perf_session *session __maybe_unused,
 766                         struct perf_tool *tool __maybe_unused)
 767{
 768        struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 769                        auxtrace);
 770        int ret;
 771
 772        if (dump_trace)
 773                return 0;
 774
 775        if (!tool->ordered_events)
 776                return -EINVAL;
 777
 778        ret = arm_spe__update_queues(spe);
 779        if (ret < 0)
 780                return ret;
 781
 782        if (spe->timeless_decoding)
 783                return arm_spe_process_timeless_queues(spe, -1,
 784                                MAX_TIMESTAMP - 1);
 785
 786        return arm_spe_process_queues(spe, MAX_TIMESTAMP);
 787}
 788
 789static void arm_spe_free_queue(void *priv)
 790{
 791        struct arm_spe_queue *speq = priv;
 792
 793        if (!speq)
 794                return;
 795        thread__zput(speq->thread);
 796        arm_spe_decoder_free(speq->decoder);
 797        zfree(&speq->event_buf);
 798        free(speq);
 799}
 800
 801static void arm_spe_free_events(struct perf_session *session)
 802{
 803        struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 804                                             auxtrace);
 805        struct auxtrace_queues *queues = &spe->queues;
 806        unsigned int i;
 807
 808        for (i = 0; i < queues->nr_queues; i++) {
 809                arm_spe_free_queue(queues->queue_array[i].priv);
 810                queues->queue_array[i].priv = NULL;
 811        }
 812        auxtrace_queues__free(queues);
 813}
 814
 815static void arm_spe_free(struct perf_session *session)
 816{
 817        struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
 818                                             auxtrace);
 819
 820        auxtrace_heap__free(&spe->heap);
 821        arm_spe_free_events(session);
 822        session->auxtrace = NULL;
 823        free(spe);
 824}
 825
 826static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
 827                                      struct evsel *evsel)
 828{
 829        struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
 830
 831        return evsel->core.attr.type == spe->pmu_type;
 832}
 833
 834static const char * const arm_spe_info_fmts[] = {
 835        [ARM_SPE_PMU_TYPE]              = "  PMU Type           %"PRId64"\n",
 836};
 837
 838static void arm_spe_print_info(__u64 *arr)
 839{
 840        if (!dump_trace)
 841                return;
 842
 843        fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
 844}
 845
 846struct arm_spe_synth {
 847        struct perf_tool dummy_tool;
 848        struct perf_session *session;
 849};
 850
 851static int arm_spe_event_synth(struct perf_tool *tool,
 852                               union perf_event *event,
 853                               struct perf_sample *sample __maybe_unused,
 854                               struct machine *machine __maybe_unused)
 855{
 856        struct arm_spe_synth *arm_spe_synth =
 857                      container_of(tool, struct arm_spe_synth, dummy_tool);
 858
 859        return perf_session__deliver_synth_event(arm_spe_synth->session,
 860                                                 event, NULL);
 861}
 862
 863static int arm_spe_synth_event(struct perf_session *session,
 864                               struct perf_event_attr *attr, u64 id)
 865{
 866        struct arm_spe_synth arm_spe_synth;
 867
 868        memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
 869        arm_spe_synth.session = session;
 870
 871        return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
 872                                           &id, arm_spe_event_synth);
 873}
 874
 875static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
 876                                    const char *name)
 877{
 878        struct evsel *evsel;
 879
 880        evlist__for_each_entry(evlist, evsel) {
 881                if (evsel->core.id && evsel->core.id[0] == id) {
 882                        if (evsel->name)
 883                                zfree(&evsel->name);
 884                        evsel->name = strdup(name);
 885                        break;
 886                }
 887        }
 888}
 889
 890static int
 891arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
 892{
 893        struct evlist *evlist = session->evlist;
 894        struct evsel *evsel;
 895        struct perf_event_attr attr;
 896        bool found = false;
 897        u64 id;
 898        int err;
 899
 900        evlist__for_each_entry(evlist, evsel) {
 901                if (evsel->core.attr.type == spe->pmu_type) {
 902                        found = true;
 903                        break;
 904                }
 905        }
 906
 907        if (!found) {
 908                pr_debug("No selected events with SPE trace data\n");
 909                return 0;
 910        }
 911
 912        memset(&attr, 0, sizeof(struct perf_event_attr));
 913        attr.size = sizeof(struct perf_event_attr);
 914        attr.type = PERF_TYPE_HARDWARE;
 915        attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
 916        attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
 917                            PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
 918        if (spe->timeless_decoding)
 919                attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
 920        else
 921                attr.sample_type |= PERF_SAMPLE_TIME;
 922
 923        attr.exclude_user = evsel->core.attr.exclude_user;
 924        attr.exclude_kernel = evsel->core.attr.exclude_kernel;
 925        attr.exclude_hv = evsel->core.attr.exclude_hv;
 926        attr.exclude_host = evsel->core.attr.exclude_host;
 927        attr.exclude_guest = evsel->core.attr.exclude_guest;
 928        attr.sample_id_all = evsel->core.attr.sample_id_all;
 929        attr.read_format = evsel->core.attr.read_format;
 930
 931        /* create new id val to be a fixed offset from evsel id */
 932        id = evsel->core.id[0] + 1000000000;
 933
 934        if (!id)
 935                id = 1;
 936
 937        if (spe->synth_opts.flc) {
 938                spe->sample_flc = true;
 939
 940                /* Level 1 data cache miss */
 941                err = arm_spe_synth_event(session, &attr, id);
 942                if (err)
 943                        return err;
 944                spe->l1d_miss_id = id;
 945                arm_spe_set_event_name(evlist, id, "l1d-miss");
 946                id += 1;
 947
 948                /* Level 1 data cache access */
 949                err = arm_spe_synth_event(session, &attr, id);
 950                if (err)
 951                        return err;
 952                spe->l1d_access_id = id;
 953                arm_spe_set_event_name(evlist, id, "l1d-access");
 954                id += 1;
 955        }
 956
 957        if (spe->synth_opts.llc) {
 958                spe->sample_llc = true;
 959
 960                /* Last level cache miss */
 961                err = arm_spe_synth_event(session, &attr, id);
 962                if (err)
 963                        return err;
 964                spe->llc_miss_id = id;
 965                arm_spe_set_event_name(evlist, id, "llc-miss");
 966                id += 1;
 967
 968                /* Last level cache access */
 969                err = arm_spe_synth_event(session, &attr, id);
 970                if (err)
 971                        return err;
 972                spe->llc_access_id = id;
 973                arm_spe_set_event_name(evlist, id, "llc-access");
 974                id += 1;
 975        }
 976
 977        if (spe->synth_opts.tlb) {
 978                spe->sample_tlb = true;
 979
 980                /* TLB miss */
 981                err = arm_spe_synth_event(session, &attr, id);
 982                if (err)
 983                        return err;
 984                spe->tlb_miss_id = id;
 985                arm_spe_set_event_name(evlist, id, "tlb-miss");
 986                id += 1;
 987
 988                /* TLB access */
 989                err = arm_spe_synth_event(session, &attr, id);
 990                if (err)
 991                        return err;
 992                spe->tlb_access_id = id;
 993                arm_spe_set_event_name(evlist, id, "tlb-access");
 994                id += 1;
 995        }
 996
 997        if (spe->synth_opts.branches) {
 998                spe->sample_branch = true;
 999
1000                /* Branch miss */
1001                err = arm_spe_synth_event(session, &attr, id);
1002                if (err)
1003                        return err;
1004                spe->branch_miss_id = id;
1005                arm_spe_set_event_name(evlist, id, "branch-miss");
1006                id += 1;
1007        }
1008
1009        if (spe->synth_opts.remote_access) {
1010                spe->sample_remote_access = true;
1011
1012                /* Remote access */
1013                err = arm_spe_synth_event(session, &attr, id);
1014                if (err)
1015                        return err;
1016                spe->remote_access_id = id;
1017                arm_spe_set_event_name(evlist, id, "remote-access");
1018                id += 1;
1019        }
1020
1021        if (spe->synth_opts.mem) {
1022                spe->sample_memory = true;
1023
1024                err = arm_spe_synth_event(session, &attr, id);
1025                if (err)
1026                        return err;
1027                spe->memory_id = id;
1028                arm_spe_set_event_name(evlist, id, "memory");
1029        }
1030
1031        return 0;
1032}
1033
1034int arm_spe_process_auxtrace_info(union perf_event *event,
1035                                  struct perf_session *session)
1036{
1037        struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
1038        size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
1039        struct perf_record_time_conv *tc = &session->time_conv;
1040        struct arm_spe *spe;
1041        int err;
1042
1043        if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) +
1044                                        min_sz)
1045                return -EINVAL;
1046
1047        spe = zalloc(sizeof(struct arm_spe));
1048        if (!spe)
1049                return -ENOMEM;
1050
1051        err = auxtrace_queues__init(&spe->queues);
1052        if (err)
1053                goto err_free;
1054
1055        spe->session = session;
1056        spe->machine = &session->machines.host; /* No kvm support */
1057        spe->auxtrace_type = auxtrace_info->type;
1058        spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
1059
1060        spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
1061
1062        /*
1063         * The synthesized event PERF_RECORD_TIME_CONV has been handled ahead
1064         * and the parameters for hardware clock are stored in the session
1065         * context.  Passes these parameters to the struct perf_tsc_conversion
1066         * in "spe->tc", which is used for later conversion between clock
1067         * counter and timestamp.
1068         *
1069         * For backward compatibility, copies the fields starting from
1070         * "time_cycles" only if they are contained in the event.
1071         */
1072        spe->tc.time_shift = tc->time_shift;
1073        spe->tc.time_mult = tc->time_mult;
1074        spe->tc.time_zero = tc->time_zero;
1075
1076        if (event_contains(*tc, time_cycles)) {
1077                spe->tc.time_cycles = tc->time_cycles;
1078                spe->tc.time_mask = tc->time_mask;
1079                spe->tc.cap_user_time_zero = tc->cap_user_time_zero;
1080                spe->tc.cap_user_time_short = tc->cap_user_time_short;
1081        }
1082
1083        spe->auxtrace.process_event = arm_spe_process_event;
1084        spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
1085        spe->auxtrace.flush_events = arm_spe_flush;
1086        spe->auxtrace.free_events = arm_spe_free_events;
1087        spe->auxtrace.free = arm_spe_free;
1088        spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
1089        session->auxtrace = &spe->auxtrace;
1090
1091        arm_spe_print_info(&auxtrace_info->priv[0]);
1092
1093        if (dump_trace)
1094                return 0;
1095
1096        if (session->itrace_synth_opts && session->itrace_synth_opts->set)
1097                spe->synth_opts = *session->itrace_synth_opts;
1098        else
1099                itrace_synth_opts__set_default(&spe->synth_opts, false);
1100
1101        err = arm_spe_synth_events(spe, session);
1102        if (err)
1103                goto err_free_queues;
1104
1105        err = auxtrace_queues__process_index(&spe->queues, session);
1106        if (err)
1107                goto err_free_queues;
1108
1109        if (spe->queues.populated)
1110                spe->data_queued = true;
1111
1112        return 0;
1113
1114err_free_queues:
1115        auxtrace_queues__free(&spe->queues);
1116        session->auxtrace = NULL;
1117err_free:
1118        free(spe);
1119        return err;
1120}
1121