linux/tools/perf/util/s390-cpumsf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright IBM Corp. 2018
   4 * Auxtrace support for s390 CPU-Measurement Sampling Facility
   5 *
   6 * Author(s):  Thomas Richter <tmricht@linux.ibm.com>
   7 *
   8 * Auxiliary traces are collected during 'perf record' using rbd000 event.
   9 * Several PERF_RECORD_XXX are generated during recording:
  10 *
  11 * PERF_RECORD_AUX:
  12 *      Records that new data landed in the AUX buffer part.
  13 * PERF_RECORD_AUXTRACE:
  14 *      Defines auxtrace data. Followed by the actual data. The contents of
  15 *      the auxtrace data is dependent on the event and the CPU.
  16 *      This record is generated by perf record command. For details
  17 *      see Documentation/perf.data-file-format.txt.
  18 * PERF_RECORD_AUXTRACE_INFO:
  19 *      Defines a table of contains for PERF_RECORD_AUXTRACE records. This
  20 *      record is generated during 'perf record' command. Each record contains up
  21 *      to 256 entries describing offset and size of the AUXTRACE data in the
  22 *      perf.data file.
  23 * PERF_RECORD_AUXTRACE_ERROR:
  24 *      Indicates an error during AUXTRACE collection such as buffer overflow.
  25 * PERF_RECORD_FINISHED_ROUND:
  26 *      Perf events are not necessarily in time stamp order, as they can be
  27 *      collected in parallel on different CPUs. If the events should be
  28 *      processed in time order they need to be sorted first.
  29 *      Perf report guarantees that there is no reordering over a
  30 *      PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
  31 *      time stamp lower than this record are processed (and displayed) before
  32 *      the succeeding perf record are processed.
  33 *
  34 * These records are evaluated during perf report command.
  35 *
  36 * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
  37 * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
  38 * below.
  39 * Auxiliary trace data is collected per CPU. To merge the data into the report
  40 * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
  41 * data is in ascending order.
  42 *
  43 * Each queue has a double linked list of auxtrace_buffers. This list contains
  44 * the offset and size of a CPU's auxtrace data. During auxtrace processing
  45 * the data portion is mmap()'ed.
  46 *
  47 * To sort the queues in chronological order, all queue access is controlled
  48 * by the auxtrace_heap. This is basicly a stack, each stack element has two
  49 * entries, the queue number and a time stamp. However the stack is sorted by
  50 * the time stamps. The highest time stamp is at the bottom the lowest
  51 * (nearest) time stamp is at the top. That sort order is maintained at all
  52 * times!
  53 *
  54 * After the auxtrace infrastructure has been setup, the auxtrace queues are
  55 * filled with data (offset/size pairs) and the auxtrace_heap is populated.
  56 *
  57 * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
  58 * Each record is handled by s390_cpumsf_process_event(). The time stamp of
  59 * the perf record is compared with the time stamp located on the auxtrace_heap
  60 * top element. If that time stamp is lower than the time stamp from the
  61 * record sample, the auxtrace queues will be processed. As auxtrace queues
  62 * control many auxtrace_buffers and each buffer can be quite large, the
  63 * auxtrace buffer might be processed only partially. In this case the
  64 * position in the auxtrace_buffer of that queue is remembered and the time
  65 * stamp of the last processed entry of the auxtrace_buffer replaces the
  66 * current auxtrace_heap top.
  67 *
  68 * 3. Auxtrace_queues might run of out data and are feeded by the
  69 * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
  70 *
  71 * Event Generation
  72 * Each sampling-data entry in the auxilary trace data generates a perf sample.
  73 * This sample is filled
  74 * with data from the auxtrace such as PID/TID, instruction address, CPU state,
  75 * etc. This sample is processed with perf_session__deliver_synth_event() to
  76 * be included into the GUI.
  77 *
  78 * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
  79 * auxiliary traces entries until the time stamp of this record is reached
  80 * auxtrace_heap top. This is triggered by ordered_event->deliver().
  81 *
  82 *
  83 * Perf event processing.
  84 * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
  85 * This is the function call sequence:
  86 *
  87 * __cmd_report()
  88 * |
  89 * perf_session__process_events()
  90 * |
  91 * __perf_session__process_events()
  92 * |
  93 * perf_session__process_event()
  94 * |  This functions splits the PERF_RECORD_XXX records.
  95 * |  - Those generated by perf record command (type number equal or higher
  96 * |    than PERF_RECORD_USER_TYPE_START) are handled by
  97 * |    perf_session__process_user_event(see below)
  98 * |  - Those generated by the kernel are handled by
  99 * |    perf_evlist__parse_sample_timestamp()
 100 * |
 101 * perf_evlist__parse_sample_timestamp()
 102 * |  Extract time stamp from sample data.
 103 * |
 104 * perf_session__queue_event()
 105 * |  If timestamp is positive the sample is entered into an ordered_event
 106 * |  list, sort order is the timestamp. The event processing is deferred until
 107 * |  later (see perf_session__process_user_event()).
 108 * |  Other timestamps (0 or -1) are handled immediately by
 109 * |  perf_session__deliver_event(). These are events generated at start up
 110 * |  of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
 111 * |  records. They are needed to create a list of running processes and its
 112 * |  memory mappings and layout. They are needed at the beginning to enable
 113 * |  command perf report to create process trees and memory mappings.
 114 * |
 115 * perf_session__deliver_event()
 116 * |  Delivers a PERF_RECORD_XXX entry for handling.
 117 * |
 118 * auxtrace__process_event()
 119 * |  The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
 120 * |  time stamps from the auxiliary trace buffers. This enables
 121 * |  synchronization between auxiliary trace data and the events on the
 122 * |  perf.data file.
 123 * |
 124 * machine__deliver_event()
 125 * |  Handles the PERF_RECORD_XXX event. This depends on the record type.
 126 *    It might update the process tree, update a process memory map or enter
 127 *    a sample with IP and call back chain data into GUI data pool.
 128 *
 129 *
 130 * Deferred processing determined by perf_session__process_user_event() is
 131 * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
 132 * are generated during command perf record.
 133 * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
 134 * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
 135 * built up while reading the perf.data file.
 136 * Each event is now processed by calling perf_session__deliver_event().
 137 * This enables time synchronization between the data in the perf.data file and
 138 * the data in the auxiliary trace buffers.
 139 */
 140
 141#include <endian.h>
 142#include <errno.h>
 143#include <byteswap.h>
 144#include <inttypes.h>
 145#include <linux/kernel.h>
 146#include <linux/types.h>
 147#include <linux/bitops.h>
 148#include <linux/log2.h>
 149
 150#include <sys/stat.h>
 151#include <sys/types.h>
 152
 153#include "cpumap.h"
 154#include "color.h"
 155#include "evsel.h"
 156#include "evlist.h"
 157#include "machine.h"
 158#include "session.h"
 159#include "util.h"
 160#include "thread.h"
 161#include "debug.h"
 162#include "auxtrace.h"
 163#include "s390-cpumsf.h"
 164#include "s390-cpumsf-kernel.h"
 165#include "config.h"
 166
 167struct s390_cpumsf {
 168        struct auxtrace         auxtrace;
 169        struct auxtrace_queues  queues;
 170        struct auxtrace_heap    heap;
 171        struct perf_session     *session;
 172        struct machine          *machine;
 173        u32                     auxtrace_type;
 174        u32                     pmu_type;
 175        u16                     machine_type;
 176        bool                    data_queued;
 177        bool                    use_logfile;
 178        char                    *logdir;
 179};
 180
 181struct s390_cpumsf_queue {
 182        struct s390_cpumsf      *sf;
 183        unsigned int            queue_nr;
 184        struct auxtrace_buffer  *buffer;
 185        int                     cpu;
 186        FILE                    *logfile;
 187};
 188
 189/* Display s390 CPU measurement facility basic-sampling data entry */
 190static bool s390_cpumsf_basic_show(const char *color, size_t pos,
 191                                   struct hws_basic_entry *basic)
 192{
 193        if (basic->def != 1) {
 194                pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
 195                return false;
 196        }
 197        color_fprintf(stdout, color, "    [%#08zx] Basic   Def:%04x Inst:%#04x"
 198                      " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
 199                      "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
 200                      pos, basic->def, basic->U,
 201                      basic->T ? 'T' : ' ',
 202                      basic->W ? 'W' : ' ',
 203                      basic->P ? 'P' : ' ',
 204                      basic->I ? 'I' : ' ',
 205                      basic->AS, basic->prim_asn, basic->ia, basic->CL,
 206                      basic->hpp, basic->gpp);
 207        return true;
 208}
 209
 210/* Display s390 CPU measurement facility diagnostic-sampling data entry */
 211static bool s390_cpumsf_diag_show(const char *color, size_t pos,
 212                                  struct hws_diag_entry *diag)
 213{
 214        if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
 215                pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
 216                return false;
 217        }
 218        color_fprintf(stdout, color, "    [%#08zx] Diag    Def:%04x %c\n",
 219                      pos, diag->def, diag->I ? 'I' : ' ');
 220        return true;
 221}
 222
 223/* Return TOD timestamp contained in an trailer entry */
 224static unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
 225{
 226        /* te->t set: TOD in STCKE format, bytes 8-15
 227         * to->t not set: TOD in STCK format, bytes 0-7
 228         */
 229        unsigned long long ts;
 230
 231        memcpy(&ts, &te->timestamp[te->t], sizeof(ts));
 232        return ts;
 233}
 234
 235/* Display s390 CPU measurement facility trailer entry */
 236static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
 237                                     struct hws_trailer_entry *te)
 238{
 239        if (te->bsdes != sizeof(struct hws_basic_entry)) {
 240                pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
 241                return false;
 242        }
 243        color_fprintf(stdout, color, "    [%#08zx] Trailer %c%c%c bsdes:%d"
 244                      " dsdes:%d Overflow:%lld Time:%#llx\n"
 245                      "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n",
 246                      pos,
 247                      te->f ? 'F' : ' ',
 248                      te->a ? 'A' : ' ',
 249                      te->t ? 'T' : ' ',
 250                      te->bsdes, te->dsdes, te->overflow,
 251                      trailer_timestamp(te), te->clock_base, te->progusage2,
 252                      te->progusage[0], te->progusage[1]);
 253        return true;
 254}
 255
 256/* Test a sample data block. It must be 4KB or a multiple thereof in size and
 257 * 4KB page aligned. Each sample data page has a trailer entry at the
 258 * end which contains the sample entry data sizes.
 259 *
 260 * Return true if the sample data block passes the checks and set the
 261 * basic set entry size and diagnostic set entry size.
 262 *
 263 * Return false on failure.
 264 *
 265 * Note: Old hardware does not set the basic or diagnostic entry sizes
 266 * in the trailer entry. Use the type number instead.
 267 */
 268static bool s390_cpumsf_validate(int machine_type,
 269                                 unsigned char *buf, size_t len,
 270                                 unsigned short *bsdes,
 271                                 unsigned short *dsdes)
 272{
 273        struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
 274        struct hws_trailer_entry *te;
 275
 276        *dsdes = *bsdes = 0;
 277        if (len & (S390_CPUMSF_PAGESZ - 1))     /* Illegal size */
 278                return false;
 279        if (basic->def != 1)            /* No basic set entry, must be first */
 280                return false;
 281        /* Check for trailer entry at end of SDB */
 282        te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
 283                                              - sizeof(*te));
 284        *bsdes = te->bsdes;
 285        *dsdes = te->dsdes;
 286        if (!te->bsdes && !te->dsdes) {
 287                /* Very old hardware, use CPUID */
 288                switch (machine_type) {
 289                case 2097:
 290                case 2098:
 291                        *dsdes = 64;
 292                        *bsdes = 32;
 293                        break;
 294                case 2817:
 295                case 2818:
 296                        *dsdes = 74;
 297                        *bsdes = 32;
 298                        break;
 299                case 2827:
 300                case 2828:
 301                        *dsdes = 85;
 302                        *bsdes = 32;
 303                        break;
 304                default:
 305                        /* Illegal trailer entry */
 306                        return false;
 307                }
 308        }
 309        return true;
 310}
 311
 312/* Return true if there is room for another entry */
 313static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
 314{
 315        size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
 316
 317        if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
 318                return false;
 319        return true;
 320}
 321
 322/* Dump an auxiliary buffer. These buffers are multiple of
 323 * 4KB SDB pages.
 324 */
 325static void s390_cpumsf_dump(struct s390_cpumsf *sf,
 326                             unsigned char *buf, size_t len)
 327{
 328        const char *color = PERF_COLOR_BLUE;
 329        struct hws_basic_entry *basic;
 330        struct hws_diag_entry *diag;
 331        unsigned short bsdes, dsdes;
 332        size_t pos = 0;
 333
 334        color_fprintf(stdout, color,
 335                      ". ... s390 AUX data: size %zu bytes\n",
 336                      len);
 337
 338        if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
 339                                  &dsdes)) {
 340                pr_err("Invalid AUX trace data block size:%zu"
 341                       " (type:%d bsdes:%hd dsdes:%hd)\n",
 342                       len, sf->machine_type, bsdes, dsdes);
 343                return;
 344        }
 345
 346        /* s390 kernel always returns 4KB blocks fully occupied,
 347         * no partially filled SDBs.
 348         */
 349        while (pos < len) {
 350                /* Handle Basic entry */
 351                basic = (struct hws_basic_entry *)(buf + pos);
 352                if (s390_cpumsf_basic_show(color, pos, basic))
 353                        pos += bsdes;
 354                else
 355                        return;
 356
 357                /* Handle Diagnostic entry */
 358                diag = (struct hws_diag_entry *)(buf + pos);
 359                if (s390_cpumsf_diag_show(color, pos, diag))
 360                        pos += dsdes;
 361                else
 362                        return;
 363
 364                /* Check for trailer entry */
 365                if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
 366                        /* Show trailer entry */
 367                        struct hws_trailer_entry te;
 368
 369                        pos = (pos + S390_CPUMSF_PAGESZ)
 370                               & ~(S390_CPUMSF_PAGESZ - 1);
 371                        pos -= sizeof(te);
 372                        memcpy(&te, buf + pos, sizeof(te));
 373                        /* Set descriptor sizes in case of old hardware
 374                         * where these values are not set.
 375                         */
 376                        te.bsdes = bsdes;
 377                        te.dsdes = dsdes;
 378                        if (s390_cpumsf_trailer_show(color, pos, &te))
 379                                pos += sizeof(te);
 380                        else
 381                                return;
 382                }
 383        }
 384}
 385
 386static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
 387                                   size_t len)
 388{
 389        printf(".\n");
 390        s390_cpumsf_dump(sf, buf, len);
 391}
 392
 393#define S390_LPP_PID_MASK       0xffffffff
 394
 395static bool s390_cpumsf_make_event(size_t pos,
 396                                   struct hws_basic_entry *basic,
 397                                   struct s390_cpumsf_queue *sfq)
 398{
 399        struct perf_sample sample = {
 400                                .ip = basic->ia,
 401                                .pid = basic->hpp & S390_LPP_PID_MASK,
 402                                .tid = basic->hpp & S390_LPP_PID_MASK,
 403                                .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
 404                                .cpu = sfq->cpu,
 405                                .period = 1
 406                            };
 407        union perf_event event;
 408
 409        memset(&event, 0, sizeof(event));
 410        if (basic->CL == 1)     /* Native LPAR mode */
 411                sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
 412                                          : PERF_RECORD_MISC_KERNEL;
 413        else if (basic->CL == 2)        /* Guest kernel/user space */
 414                sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
 415                                          : PERF_RECORD_MISC_GUEST_KERNEL;
 416        else if (basic->gpp || basic->prim_asn != 0xffff)
 417                /* Use heuristics on old hardware */
 418                sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
 419                                          : PERF_RECORD_MISC_GUEST_KERNEL;
 420        else
 421                sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
 422                                          : PERF_RECORD_MISC_KERNEL;
 423
 424        event.sample.header.type = PERF_RECORD_SAMPLE;
 425        event.sample.header.misc = sample.cpumode;
 426        event.sample.header.size = sizeof(struct perf_event_header);
 427
 428        pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
 429                 __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
 430                 sample.tid, sample.cpumode, sample.cpu);
 431        if (perf_session__deliver_synth_event(sfq->sf->session, &event,
 432                                              &sample)) {
 433                pr_err("s390 Auxiliary Trace: failed to deliver event\n");
 434                return false;
 435        }
 436        return true;
 437}
 438
 439static unsigned long long get_trailer_time(const unsigned char *buf)
 440{
 441        struct hws_trailer_entry *te;
 442        unsigned long long aux_time;
 443
 444        te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
 445                                              - sizeof(*te));
 446
 447        if (!te->clock_base)    /* TOD_CLOCK_BASE value missing */
 448                return 0;
 449
 450        /* Correct calculation to convert time stamp in trailer entry to
 451         * nano seconds (taken from arch/s390 function tod_to_ns()).
 452         * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
 453         */
 454        aux_time = trailer_timestamp(te) - te->progusage2;
 455        aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
 456        return aux_time;
 457}
 458
 459/* Process the data samples of a single queue. The first parameter is a
 460 * pointer to the queue, the second parameter is the time stamp. This
 461 * is the time stamp:
 462 * - of the event that triggered this processing.
 463 * - or the time stamp when the last proccesing of this queue stopped.
 464 *   In this case it stopped at a 4KB page boundary and record the
 465 *   position on where to continue processing on the next invocation
 466 *   (see buffer->use_data and buffer->use_size).
 467 *
 468 * When this function returns the second parameter is updated to
 469 * reflect the time stamp of the last processed auxiliary data entry
 470 * (taken from the trailer entry of that page). The caller uses this
 471 * returned time stamp to record the last processed entry in this
 472 * queue.
 473 *
 474 * The function returns:
 475 * 0:  Processing successful. The second parameter returns the
 476 *     time stamp from the trailer entry until which position
 477 *     processing took place. Subsequent calls resume from this
 478 *     position.
 479 * <0: An error occurred during processing. The second parameter
 480 *     returns the maximum time stamp.
 481 * >0: Done on this queue. The second parameter returns the
 482 *     maximum time stamp.
 483 */
 484static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
 485{
 486        struct s390_cpumsf *sf = sfq->sf;
 487        unsigned char *buf = sfq->buffer->use_data;
 488        size_t len = sfq->buffer->use_size;
 489        struct hws_basic_entry *basic;
 490        unsigned short bsdes, dsdes;
 491        size_t pos = 0;
 492        int err = 1;
 493        u64 aux_ts;
 494
 495        if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
 496                                  &dsdes)) {
 497                *ts = ~0ULL;
 498                return -1;
 499        }
 500
 501        /* Get trailer entry time stamp and check if entries in
 502         * this auxiliary page are ready for processing. If the
 503         * time stamp of the first entry is too high, whole buffer
 504         * can be skipped. In this case return time stamp.
 505         */
 506        aux_ts = get_trailer_time(buf);
 507        if (!aux_ts) {
 508                pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
 509                       (s64)sfq->buffer->data_offset);
 510                aux_ts = ~0ULL;
 511                goto out;
 512        }
 513        if (aux_ts > *ts) {
 514                *ts = aux_ts;
 515                return 0;
 516        }
 517
 518        while (pos < len) {
 519                /* Handle Basic entry */
 520                basic = (struct hws_basic_entry *)(buf + pos);
 521                if (s390_cpumsf_make_event(pos, basic, sfq))
 522                        pos += bsdes;
 523                else {
 524                        err = -EBADF;
 525                        goto out;
 526                }
 527
 528                pos += dsdes;   /* Skip diagnositic entry */
 529
 530                /* Check for trailer entry */
 531                if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
 532                        pos = (pos + S390_CPUMSF_PAGESZ)
 533                               & ~(S390_CPUMSF_PAGESZ - 1);
 534                        /* Check existence of next page */
 535                        if (pos >= len)
 536                                break;
 537                        aux_ts = get_trailer_time(buf + pos);
 538                        if (!aux_ts) {
 539                                aux_ts = ~0ULL;
 540                                goto out;
 541                        }
 542                        if (aux_ts > *ts) {
 543                                *ts = aux_ts;
 544                                sfq->buffer->use_data += pos;
 545                                sfq->buffer->use_size -= pos;
 546                                return 0;
 547                        }
 548                }
 549        }
 550out:
 551        *ts = aux_ts;
 552        sfq->buffer->use_size = 0;
 553        sfq->buffer->use_data = NULL;
 554        return err;     /* Buffer completely scanned or error */
 555}
 556
 557/* Run the s390 auxiliary trace decoder.
 558 * Select the queue buffer to operate on, the caller already selected
 559 * the proper queue, depending on second parameter 'ts'.
 560 * This is the time stamp until which the auxiliary entries should
 561 * be processed. This value is updated by called functions and
 562 * returned to the caller.
 563 *
 564 * Resume processing in the current buffer. If there is no buffer
 565 * get a new buffer from the queue and setup start position for
 566 * processing.
 567 * When a buffer is completely processed remove it from the queue
 568 * before returning.
 569 *
 570 * This function returns
 571 * 1: When the queue is empty. Second parameter will be set to
 572 *    maximum time stamp.
 573 * 0: Normal processing done.
 574 * <0: Error during queue buffer setup. This causes the caller
 575 *     to stop processing completely.
 576 */
 577static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
 578                                   u64 *ts)
 579{
 580
 581        struct auxtrace_buffer *buffer;
 582        struct auxtrace_queue *queue;
 583        int err;
 584
 585        queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
 586
 587        /* Get buffer and last position in buffer to resume
 588         * decoding the auxiliary entries. One buffer might be large
 589         * and decoding might stop in between. This depends on the time
 590         * stamp of the trailer entry in each page of the auxiliary
 591         * data and the time stamp of the event triggering the decoding.
 592         */
 593        if (sfq->buffer == NULL) {
 594                sfq->buffer = buffer = auxtrace_buffer__next(queue,
 595                                                             sfq->buffer);
 596                if (!buffer) {
 597                        *ts = ~0ULL;
 598                        return 1;       /* Processing done on this queue */
 599                }
 600                /* Start with a new buffer on this queue */
 601                if (buffer->data) {
 602                        buffer->use_size = buffer->size;
 603                        buffer->use_data = buffer->data;
 604                }
 605                if (sfq->logfile) {     /* Write into log file */
 606                        size_t rc = fwrite(buffer->data, buffer->size, 1,
 607                                           sfq->logfile);
 608                        if (rc != 1)
 609                                pr_err("Failed to write auxiliary data\n");
 610                }
 611        } else
 612                buffer = sfq->buffer;
 613
 614        if (!buffer->data) {
 615                int fd = perf_data__fd(sfq->sf->session->data);
 616
 617                buffer->data = auxtrace_buffer__get_data(buffer, fd);
 618                if (!buffer->data)
 619                        return -ENOMEM;
 620                buffer->use_size = buffer->size;
 621                buffer->use_data = buffer->data;
 622
 623                if (sfq->logfile) {     /* Write into log file */
 624                        size_t rc = fwrite(buffer->data, buffer->size, 1,
 625                                           sfq->logfile);
 626                        if (rc != 1)
 627                                pr_err("Failed to write auxiliary data\n");
 628                }
 629        }
 630        pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
 631                  __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
 632                  buffer->size, buffer->use_size);
 633        err = s390_cpumsf_samples(sfq, ts);
 634
 635        /* If non-zero, there is either an error (err < 0) or the buffer is
 636         * completely done (err > 0). The error is unrecoverable, usually
 637         * some descriptors could not be read successfully, so continue with
 638         * the next buffer.
 639         * In both cases the parameter 'ts' has been updated.
 640         */
 641        if (err) {
 642                sfq->buffer = NULL;
 643                list_del(&buffer->list);
 644                auxtrace_buffer__free(buffer);
 645                if (err > 0)            /* Buffer done, no error */
 646                        err = 0;
 647        }
 648        return err;
 649}
 650
 651static struct s390_cpumsf_queue *
 652s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
 653{
 654        struct s390_cpumsf_queue *sfq;
 655
 656        sfq = zalloc(sizeof(struct s390_cpumsf_queue));
 657        if (sfq == NULL)
 658                return NULL;
 659
 660        sfq->sf = sf;
 661        sfq->queue_nr = queue_nr;
 662        sfq->cpu = -1;
 663        if (sf->use_logfile) {
 664                char *name;
 665                int rc;
 666
 667                rc = (sf->logdir)
 668                        ? asprintf(&name, "%s/aux.smp.%02x",
 669                                 sf->logdir, queue_nr)
 670                        : asprintf(&name, "aux.smp.%02x", queue_nr);
 671                if (rc > 0)
 672                        sfq->logfile = fopen(name, "w");
 673                if (sfq->logfile == NULL) {
 674                        pr_err("Failed to open auxiliary log file %s,"
 675                               "continue...\n", name);
 676                        sf->use_logfile = false;
 677                }
 678                free(name);
 679        }
 680        return sfq;
 681}
 682
 683static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
 684                                   struct auxtrace_queue *queue,
 685                                   unsigned int queue_nr, u64 ts)
 686{
 687        struct s390_cpumsf_queue *sfq = queue->priv;
 688
 689        if (list_empty(&queue->head))
 690                return 0;
 691
 692        if (sfq == NULL) {
 693                sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
 694                if (!sfq)
 695                        return -ENOMEM;
 696                queue->priv = sfq;
 697
 698                if (queue->cpu != -1)
 699                        sfq->cpu = queue->cpu;
 700        }
 701        return auxtrace_heap__add(&sf->heap, queue_nr, ts);
 702}
 703
 704static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
 705{
 706        unsigned int i;
 707        int ret = 0;
 708
 709        for (i = 0; i < sf->queues.nr_queues; i++) {
 710                ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
 711                                              i, ts);
 712                if (ret)
 713                        break;
 714        }
 715        return ret;
 716}
 717
 718static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
 719{
 720        if (!sf->queues.new_data)
 721                return 0;
 722
 723        sf->queues.new_data = false;
 724        return s390_cpumsf_setup_queues(sf, ts);
 725}
 726
 727static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
 728{
 729        unsigned int queue_nr;
 730        u64 ts;
 731        int ret;
 732
 733        while (1) {
 734                struct auxtrace_queue *queue;
 735                struct s390_cpumsf_queue *sfq;
 736
 737                if (!sf->heap.heap_cnt)
 738                        return 0;
 739
 740                if (sf->heap.heap_array[0].ordinal >= timestamp)
 741                        return 0;
 742
 743                queue_nr = sf->heap.heap_array[0].queue_nr;
 744                queue = &sf->queues.queue_array[queue_nr];
 745                sfq = queue->priv;
 746
 747                auxtrace_heap__pop(&sf->heap);
 748                if (sf->heap.heap_cnt) {
 749                        ts = sf->heap.heap_array[0].ordinal + 1;
 750                        if (ts > timestamp)
 751                                ts = timestamp;
 752                } else {
 753                        ts = timestamp;
 754                }
 755
 756                ret = s390_cpumsf_run_decoder(sfq, &ts);
 757                if (ret < 0) {
 758                        auxtrace_heap__add(&sf->heap, queue_nr, ts);
 759                        return ret;
 760                }
 761                if (!ret) {
 762                        ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
 763                        if (ret < 0)
 764                                return ret;
 765                }
 766        }
 767        return 0;
 768}
 769
 770static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
 771                                   pid_t pid, pid_t tid, u64 ip)
 772{
 773        char msg[MAX_AUXTRACE_ERROR_MSG];
 774        union perf_event event;
 775        int err;
 776
 777        strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
 778        auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
 779                             code, cpu, pid, tid, ip, msg);
 780
 781        err = perf_session__deliver_synth_event(sf->session, &event, NULL);
 782        if (err)
 783                pr_err("s390 Auxiliary Trace: failed to deliver error event,"
 784                        "error %d\n", err);
 785        return err;
 786}
 787
 788static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
 789{
 790        return s390_cpumsf_synth_error(sf, 1, sample->cpu,
 791                                       sample->pid, sample->tid, 0);
 792}
 793
 794static int
 795s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
 796                          union perf_event *event,
 797                          struct perf_sample *sample,
 798                          struct perf_tool *tool)
 799{
 800        struct s390_cpumsf *sf = container_of(session->auxtrace,
 801                                              struct s390_cpumsf,
 802                                              auxtrace);
 803        u64 timestamp = sample->time;
 804        int err = 0;
 805
 806        if (dump_trace)
 807                return 0;
 808
 809        if (!tool->ordered_events) {
 810                pr_err("s390 Auxiliary Trace requires ordered events\n");
 811                return -EINVAL;
 812        }
 813
 814        if (event->header.type == PERF_RECORD_AUX &&
 815            event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
 816                return s390_cpumsf_lost(sf, sample);
 817
 818        if (timestamp) {
 819                err = s390_cpumsf_update_queues(sf, timestamp);
 820                if (!err)
 821                        err = s390_cpumsf_process_queues(sf, timestamp);
 822        }
 823        return err;
 824}
 825
 826struct s390_cpumsf_synth {
 827        struct perf_tool cpumsf_tool;
 828        struct perf_session *session;
 829};
 830
 831static int
 832s390_cpumsf_process_auxtrace_event(struct perf_session *session,
 833                                   union perf_event *event __maybe_unused,
 834                                   struct perf_tool *tool __maybe_unused)
 835{
 836        struct s390_cpumsf *sf = container_of(session->auxtrace,
 837                                              struct s390_cpumsf,
 838                                              auxtrace);
 839
 840        int fd = perf_data__fd(session->data);
 841        struct auxtrace_buffer *buffer;
 842        off_t data_offset;
 843        int err;
 844
 845        if (sf->data_queued)
 846                return 0;
 847
 848        if (perf_data__is_pipe(session->data)) {
 849                data_offset = 0;
 850        } else {
 851                data_offset = lseek(fd, 0, SEEK_CUR);
 852                if (data_offset == -1)
 853                        return -errno;
 854        }
 855
 856        err = auxtrace_queues__add_event(&sf->queues, session, event,
 857                                         data_offset, &buffer);
 858        if (err)
 859                return err;
 860
 861        /* Dump here after copying piped trace out of the pipe */
 862        if (dump_trace) {
 863                if (auxtrace_buffer__get_data(buffer, fd)) {
 864                        s390_cpumsf_dump_event(sf, buffer->data,
 865                                               buffer->size);
 866                        auxtrace_buffer__put_data(buffer);
 867                }
 868        }
 869        return 0;
 870}
 871
 872static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
 873{
 874}
 875
 876static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
 877                             struct perf_tool *tool __maybe_unused)
 878{
 879        return 0;
 880}
 881
 882static void s390_cpumsf_free_queues(struct perf_session *session)
 883{
 884        struct s390_cpumsf *sf = container_of(session->auxtrace,
 885                                              struct s390_cpumsf,
 886                                              auxtrace);
 887        struct auxtrace_queues *queues = &sf->queues;
 888        unsigned int i;
 889
 890        for (i = 0; i < queues->nr_queues; i++) {
 891                struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
 892                                                queues->queue_array[i].priv;
 893
 894                if (sfq != NULL && sfq->logfile) {
 895                        fclose(sfq->logfile);
 896                        sfq->logfile = NULL;
 897                }
 898                zfree(&queues->queue_array[i].priv);
 899        }
 900        auxtrace_queues__free(queues);
 901}
 902
 903static void s390_cpumsf_free(struct perf_session *session)
 904{
 905        struct s390_cpumsf *sf = container_of(session->auxtrace,
 906                                              struct s390_cpumsf,
 907                                              auxtrace);
 908
 909        auxtrace_heap__free(&sf->heap);
 910        s390_cpumsf_free_queues(session);
 911        session->auxtrace = NULL;
 912        free(sf->logdir);
 913        free(sf);
 914}
 915
 916static int s390_cpumsf_get_type(const char *cpuid)
 917{
 918        int ret, family = 0;
 919
 920        ret = sscanf(cpuid, "%*[^,],%u", &family);
 921        return (ret == 1) ? family : 0;
 922}
 923
 924/* Check itrace options set on perf report command.
 925 * Return true, if none are set or all options specified can be
 926 * handled on s390 (currently only option 'd' for logging.
 927 * Return false otherwise.
 928 */
 929static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
 930{
 931        bool ison = false;
 932
 933        if (!itops || !itops->set)
 934                return true;
 935        ison = itops->inject || itops->instructions || itops->branches ||
 936                itops->transactions || itops->ptwrites ||
 937                itops->pwr_events || itops->errors ||
 938                itops->dont_decode || itops->calls || itops->returns ||
 939                itops->callchain || itops->thread_stack ||
 940                itops->last_branch;
 941        if (!ison)
 942                return true;
 943        pr_err("Unsupported --itrace options specified\n");
 944        return false;
 945}
 946
 947/* Check for AUXTRACE dump directory if it is needed.
 948 * On failure print an error message but continue.
 949 * Return 0 on wrong keyword in config file and 1 otherwise.
 950 */
 951static int s390_cpumsf__config(const char *var, const char *value, void *cb)
 952{
 953        struct s390_cpumsf *sf = cb;
 954        struct stat stbuf;
 955        int rc;
 956
 957        if (strcmp(var, "auxtrace.dumpdir"))
 958                return 0;
 959        sf->logdir = strdup(value);
 960        if (sf->logdir == NULL) {
 961                pr_err("Failed to find auxtrace log directory %s,"
 962                       " continue with current directory...\n", value);
 963                return 1;
 964        }
 965        rc = stat(sf->logdir, &stbuf);
 966        if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
 967                pr_err("Missing auxtrace log directory %s,"
 968                       " continue with current directory...\n", value);
 969                free(sf->logdir);
 970                sf->logdir = NULL;
 971        }
 972        return 1;
 973}
 974
 975int s390_cpumsf_process_auxtrace_info(union perf_event *event,
 976                                      struct perf_session *session)
 977{
 978        struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
 979        struct s390_cpumsf *sf;
 980        int err;
 981
 982        if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event))
 983                return -EINVAL;
 984
 985        sf = zalloc(sizeof(struct s390_cpumsf));
 986        if (sf == NULL)
 987                return -ENOMEM;
 988
 989        if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
 990                err = -EINVAL;
 991                goto err_free;
 992        }
 993        sf->use_logfile = session->itrace_synth_opts->log;
 994        if (sf->use_logfile)
 995                perf_config(s390_cpumsf__config, sf);
 996
 997        err = auxtrace_queues__init(&sf->queues);
 998        if (err)
 999                goto err_free;
1000
1001        sf->session = session;
1002        sf->machine = &session->machines.host; /* No kvm support */
1003        sf->auxtrace_type = auxtrace_info->type;
1004        sf->pmu_type = PERF_TYPE_RAW;
1005        sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
1006
1007        sf->auxtrace.process_event = s390_cpumsf_process_event;
1008        sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
1009        sf->auxtrace.flush_events = s390_cpumsf_flush;
1010        sf->auxtrace.free_events = s390_cpumsf_free_events;
1011        sf->auxtrace.free = s390_cpumsf_free;
1012        session->auxtrace = &sf->auxtrace;
1013
1014        if (dump_trace)
1015                return 0;
1016
1017        err = auxtrace_queues__process_index(&sf->queues, session);
1018        if (err)
1019                goto err_free_queues;
1020
1021        if (sf->queues.populated)
1022                sf->data_queued = true;
1023
1024        return 0;
1025
1026err_free_queues:
1027        auxtrace_queues__free(&sf->queues);
1028        session->auxtrace = NULL;
1029err_free:
1030        free(sf->logdir);
1031        free(sf);
1032        return err;
1033}
1034