linux/tools/perf/util/evlist.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
   4 *
   5 * Parts came from builtin-{top,stat,record}.c, see those files for further
   6 * copyright notes.
   7 */
   8#include <api/fs/fs.h>
   9#include <errno.h>
  10#include <inttypes.h>
  11#include <poll.h>
  12#include "cpumap.h"
  13#include "thread_map.h"
  14#include "target.h"
  15#include "evlist.h"
  16#include "evsel.h"
  17#include "debug.h"
  18#include "units.h"
  19#include "asm/bug.h"
  20#include "bpf-event.h"
  21#include <signal.h>
  22#include <unistd.h>
  23
  24#include "parse-events.h"
  25#include <subcmd/parse-options.h>
  26
  27#include <fcntl.h>
  28#include <sys/ioctl.h>
  29#include <sys/mman.h>
  30
  31#include <linux/bitops.h>
  32#include <linux/hash.h>
  33#include <linux/log2.h>
  34#include <linux/err.h>
  35#include <linux/zalloc.h>
  36
  37#ifdef LACKS_SIGQUEUE_PROTOTYPE
  38int sigqueue(pid_t pid, int sig, const union sigval value);
  39#endif
  40
  41#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
  42#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
  43
  44void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
  45                       struct thread_map *threads)
  46{
  47        int i;
  48
  49        for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
  50                INIT_HLIST_HEAD(&evlist->heads[i]);
  51        INIT_LIST_HEAD(&evlist->entries);
  52        perf_evlist__set_maps(evlist, cpus, threads);
  53        fdarray__init(&evlist->pollfd, 64);
  54        evlist->workload.pid = -1;
  55        evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
  56}
  57
  58struct perf_evlist *perf_evlist__new(void)
  59{
  60        struct perf_evlist *evlist = zalloc(sizeof(*evlist));
  61
  62        if (evlist != NULL)
  63                perf_evlist__init(evlist, NULL, NULL);
  64
  65        return evlist;
  66}
  67
  68struct perf_evlist *perf_evlist__new_default(void)
  69{
  70        struct perf_evlist *evlist = perf_evlist__new();
  71
  72        if (evlist && perf_evlist__add_default(evlist)) {
  73                perf_evlist__delete(evlist);
  74                evlist = NULL;
  75        }
  76
  77        return evlist;
  78}
  79
  80struct perf_evlist *perf_evlist__new_dummy(void)
  81{
  82        struct perf_evlist *evlist = perf_evlist__new();
  83
  84        if (evlist && perf_evlist__add_dummy(evlist)) {
  85                perf_evlist__delete(evlist);
  86                evlist = NULL;
  87        }
  88
  89        return evlist;
  90}
  91
  92/**
  93 * perf_evlist__set_id_pos - set the positions of event ids.
  94 * @evlist: selected event list
  95 *
  96 * Events with compatible sample types all have the same id_pos
  97 * and is_pos.  For convenience, put a copy on evlist.
  98 */
  99void perf_evlist__set_id_pos(struct perf_evlist *evlist)
 100{
 101        struct perf_evsel *first = perf_evlist__first(evlist);
 102
 103        evlist->id_pos = first->id_pos;
 104        evlist->is_pos = first->is_pos;
 105}
 106
 107static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
 108{
 109        struct perf_evsel *evsel;
 110
 111        evlist__for_each_entry(evlist, evsel)
 112                perf_evsel__calc_id_pos(evsel);
 113
 114        perf_evlist__set_id_pos(evlist);
 115}
 116
 117static void perf_evlist__purge(struct perf_evlist *evlist)
 118{
 119        struct perf_evsel *pos, *n;
 120
 121        evlist__for_each_entry_safe(evlist, n, pos) {
 122                list_del_init(&pos->node);
 123                pos->evlist = NULL;
 124                perf_evsel__delete(pos);
 125        }
 126
 127        evlist->nr_entries = 0;
 128}
 129
 130void perf_evlist__exit(struct perf_evlist *evlist)
 131{
 132        zfree(&evlist->mmap);
 133        zfree(&evlist->overwrite_mmap);
 134        fdarray__exit(&evlist->pollfd);
 135}
 136
 137void perf_evlist__delete(struct perf_evlist *evlist)
 138{
 139        if (evlist == NULL)
 140                return;
 141
 142        perf_evlist__munmap(evlist);
 143        perf_evlist__close(evlist);
 144        cpu_map__put(evlist->cpus);
 145        thread_map__put(evlist->threads);
 146        evlist->cpus = NULL;
 147        evlist->threads = NULL;
 148        perf_evlist__purge(evlist);
 149        perf_evlist__exit(evlist);
 150        free(evlist);
 151}
 152
 153static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
 154                                          struct perf_evsel *evsel)
 155{
 156        /*
 157         * We already have cpus for evsel (via PMU sysfs) so
 158         * keep it, if there's no target cpu list defined.
 159         */
 160        if (!evsel->own_cpus || evlist->has_user_cpus) {
 161                cpu_map__put(evsel->cpus);
 162                evsel->cpus = cpu_map__get(evlist->cpus);
 163        } else if (evsel->cpus != evsel->own_cpus) {
 164                cpu_map__put(evsel->cpus);
 165                evsel->cpus = cpu_map__get(evsel->own_cpus);
 166        }
 167
 168        thread_map__put(evsel->threads);
 169        evsel->threads = thread_map__get(evlist->threads);
 170}
 171
 172static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
 173{
 174        struct perf_evsel *evsel;
 175
 176        evlist__for_each_entry(evlist, evsel)
 177                __perf_evlist__propagate_maps(evlist, evsel);
 178}
 179
 180void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
 181{
 182        entry->evlist = evlist;
 183        list_add_tail(&entry->node, &evlist->entries);
 184        entry->idx = evlist->nr_entries;
 185        entry->tracking = !entry->idx;
 186
 187        if (!evlist->nr_entries++)
 188                perf_evlist__set_id_pos(evlist);
 189
 190        __perf_evlist__propagate_maps(evlist, entry);
 191}
 192
 193void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel)
 194{
 195        evsel->evlist = NULL;
 196        list_del_init(&evsel->node);
 197        evlist->nr_entries -= 1;
 198}
 199
 200void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
 201                                   struct list_head *list)
 202{
 203        struct perf_evsel *evsel, *temp;
 204
 205        __evlist__for_each_entry_safe(list, temp, evsel) {
 206                list_del_init(&evsel->node);
 207                perf_evlist__add(evlist, evsel);
 208        }
 209}
 210
 211void __perf_evlist__set_leader(struct list_head *list)
 212{
 213        struct perf_evsel *evsel, *leader;
 214
 215        leader = list_entry(list->next, struct perf_evsel, node);
 216        evsel = list_entry(list->prev, struct perf_evsel, node);
 217
 218        leader->nr_members = evsel->idx - leader->idx + 1;
 219
 220        __evlist__for_each_entry(list, evsel) {
 221                evsel->leader = leader;
 222        }
 223}
 224
 225void perf_evlist__set_leader(struct perf_evlist *evlist)
 226{
 227        if (evlist->nr_entries) {
 228                evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
 229                __perf_evlist__set_leader(&evlist->entries);
 230        }
 231}
 232
 233int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise)
 234{
 235        struct perf_evsel *evsel = perf_evsel__new_cycles(precise);
 236
 237        if (evsel == NULL)
 238                return -ENOMEM;
 239
 240        perf_evlist__add(evlist, evsel);
 241        return 0;
 242}
 243
 244int perf_evlist__add_dummy(struct perf_evlist *evlist)
 245{
 246        struct perf_event_attr attr = {
 247                .type   = PERF_TYPE_SOFTWARE,
 248                .config = PERF_COUNT_SW_DUMMY,
 249                .size   = sizeof(attr), /* to capture ABI version */
 250        };
 251        struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries);
 252
 253        if (evsel == NULL)
 254                return -ENOMEM;
 255
 256        perf_evlist__add(evlist, evsel);
 257        return 0;
 258}
 259
 260static int perf_evlist__add_attrs(struct perf_evlist *evlist,
 261                                  struct perf_event_attr *attrs, size_t nr_attrs)
 262{
 263        struct perf_evsel *evsel, *n;
 264        LIST_HEAD(head);
 265        size_t i;
 266
 267        for (i = 0; i < nr_attrs; i++) {
 268                evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
 269                if (evsel == NULL)
 270                        goto out_delete_partial_list;
 271                list_add_tail(&evsel->node, &head);
 272        }
 273
 274        perf_evlist__splice_list_tail(evlist, &head);
 275
 276        return 0;
 277
 278out_delete_partial_list:
 279        __evlist__for_each_entry_safe(&head, n, evsel)
 280                perf_evsel__delete(evsel);
 281        return -1;
 282}
 283
 284int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
 285                                     struct perf_event_attr *attrs, size_t nr_attrs)
 286{
 287        size_t i;
 288
 289        for (i = 0; i < nr_attrs; i++)
 290                event_attr_init(attrs + i);
 291
 292        return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
 293}
 294
 295struct perf_evsel *
 296perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
 297{
 298        struct perf_evsel *evsel;
 299
 300        evlist__for_each_entry(evlist, evsel) {
 301                if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
 302                    (int)evsel->attr.config == id)
 303                        return evsel;
 304        }
 305
 306        return NULL;
 307}
 308
 309struct perf_evsel *
 310perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
 311                                     const char *name)
 312{
 313        struct perf_evsel *evsel;
 314
 315        evlist__for_each_entry(evlist, evsel) {
 316                if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
 317                    (strcmp(evsel->name, name) == 0))
 318                        return evsel;
 319        }
 320
 321        return NULL;
 322}
 323
 324int perf_evlist__add_newtp(struct perf_evlist *evlist,
 325                           const char *sys, const char *name, void *handler)
 326{
 327        struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
 328
 329        if (IS_ERR(evsel))
 330                return -1;
 331
 332        evsel->handler = handler;
 333        perf_evlist__add(evlist, evsel);
 334        return 0;
 335}
 336
 337static int perf_evlist__nr_threads(struct perf_evlist *evlist,
 338                                   struct perf_evsel *evsel)
 339{
 340        if (evsel->system_wide)
 341                return 1;
 342        else
 343                return thread_map__nr(evlist->threads);
 344}
 345
 346void perf_evlist__disable(struct perf_evlist *evlist)
 347{
 348        struct perf_evsel *pos;
 349
 350        evlist__for_each_entry(evlist, pos) {
 351                if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd)
 352                        continue;
 353                perf_evsel__disable(pos);
 354        }
 355
 356        evlist->enabled = false;
 357}
 358
 359void perf_evlist__enable(struct perf_evlist *evlist)
 360{
 361        struct perf_evsel *pos;
 362
 363        evlist__for_each_entry(evlist, pos) {
 364                if (!perf_evsel__is_group_leader(pos) || !pos->fd)
 365                        continue;
 366                perf_evsel__enable(pos);
 367        }
 368
 369        evlist->enabled = true;
 370}
 371
 372void perf_evlist__toggle_enable(struct perf_evlist *evlist)
 373{
 374        (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
 375}
 376
 377static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
 378                                         struct perf_evsel *evsel, int cpu)
 379{
 380        int thread;
 381        int nr_threads = perf_evlist__nr_threads(evlist, evsel);
 382
 383        if (!evsel->fd)
 384                return -EINVAL;
 385
 386        for (thread = 0; thread < nr_threads; thread++) {
 387                int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
 388                if (err)
 389                        return err;
 390        }
 391        return 0;
 392}
 393
 394static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
 395                                            struct perf_evsel *evsel,
 396                                            int thread)
 397{
 398        int cpu;
 399        int nr_cpus = cpu_map__nr(evlist->cpus);
 400
 401        if (!evsel->fd)
 402                return -EINVAL;
 403
 404        for (cpu = 0; cpu < nr_cpus; cpu++) {
 405                int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
 406                if (err)
 407                        return err;
 408        }
 409        return 0;
 410}
 411
 412int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
 413                                  struct perf_evsel *evsel, int idx)
 414{
 415        bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);
 416
 417        if (per_cpu_mmaps)
 418                return perf_evlist__enable_event_cpu(evlist, evsel, idx);
 419        else
 420                return perf_evlist__enable_event_thread(evlist, evsel, idx);
 421}
 422
 423int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
 424{
 425        int nr_cpus = cpu_map__nr(evlist->cpus);
 426        int nr_threads = thread_map__nr(evlist->threads);
 427        int nfds = 0;
 428        struct perf_evsel *evsel;
 429
 430        evlist__for_each_entry(evlist, evsel) {
 431                if (evsel->system_wide)
 432                        nfds += nr_cpus;
 433                else
 434                        nfds += nr_cpus * nr_threads;
 435        }
 436
 437        if (fdarray__available_entries(&evlist->pollfd) < nfds &&
 438            fdarray__grow(&evlist->pollfd, nfds) < 0)
 439                return -ENOMEM;
 440
 441        return 0;
 442}
 443
 444static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
 445                                     struct perf_mmap *map, short revent)
 446{
 447        int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
 448        /*
 449         * Save the idx so that when we filter out fds POLLHUP'ed we can
 450         * close the associated evlist->mmap[] entry.
 451         */
 452        if (pos >= 0) {
 453                evlist->pollfd.priv[pos].ptr = map;
 454
 455                fcntl(fd, F_SETFL, O_NONBLOCK);
 456        }
 457
 458        return pos;
 459}
 460
 461int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
 462{
 463        return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
 464}
 465
 466static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
 467                                         void *arg __maybe_unused)
 468{
 469        struct perf_mmap *map = fda->priv[fd].ptr;
 470
 471        if (map)
 472                perf_mmap__put(map);
 473}
 474
 475int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
 476{
 477        return fdarray__filter(&evlist->pollfd, revents_and_mask,
 478                               perf_evlist__munmap_filtered, NULL);
 479}
 480
 481int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
 482{
 483        return fdarray__poll(&evlist->pollfd, timeout);
 484}
 485
 486static void perf_evlist__id_hash(struct perf_evlist *evlist,
 487                                 struct perf_evsel *evsel,
 488                                 int cpu, int thread, u64 id)
 489{
 490        int hash;
 491        struct perf_sample_id *sid = SID(evsel, cpu, thread);
 492
 493        sid->id = id;
 494        sid->evsel = evsel;
 495        hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
 496        hlist_add_head(&sid->node, &evlist->heads[hash]);
 497}
 498
 499void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
 500                         int cpu, int thread, u64 id)
 501{
 502        perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
 503        evsel->id[evsel->ids++] = id;
 504}
 505
 506int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 507                           struct perf_evsel *evsel,
 508                           int cpu, int thread, int fd)
 509{
 510        u64 read_data[4] = { 0, };
 511        int id_idx = 1; /* The first entry is the counter value */
 512        u64 id;
 513        int ret;
 514
 515        ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
 516        if (!ret)
 517                goto add;
 518
 519        if (errno != ENOTTY)
 520                return -1;
 521
 522        /* Legacy way to get event id.. All hail to old kernels! */
 523
 524        /*
 525         * This way does not work with group format read, so bail
 526         * out in that case.
 527         */
 528        if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
 529                return -1;
 530
 531        if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
 532            read(fd, &read_data, sizeof(read_data)) == -1)
 533                return -1;
 534
 535        if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
 536                ++id_idx;
 537        if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 538                ++id_idx;
 539
 540        id = read_data[id_idx];
 541
 542 add:
 543        perf_evlist__id_add(evlist, evsel, cpu, thread, id);
 544        return 0;
 545}
 546
 547static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
 548                                     struct perf_evsel *evsel, int idx, int cpu,
 549                                     int thread)
 550{
 551        struct perf_sample_id *sid = SID(evsel, cpu, thread);
 552        sid->idx = idx;
 553        if (evlist->cpus && cpu >= 0)
 554                sid->cpu = evlist->cpus->map[cpu];
 555        else
 556                sid->cpu = -1;
 557        if (!evsel->system_wide && evlist->threads && thread >= 0)
 558                sid->tid = thread_map__pid(evlist->threads, thread);
 559        else
 560                sid->tid = -1;
 561}
 562
 563struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
 564{
 565        struct hlist_head *head;
 566        struct perf_sample_id *sid;
 567        int hash;
 568
 569        hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
 570        head = &evlist->heads[hash];
 571
 572        hlist_for_each_entry(sid, head, node)
 573                if (sid->id == id)
 574                        return sid;
 575
 576        return NULL;
 577}
 578
 579struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
 580{
 581        struct perf_sample_id *sid;
 582
 583        if (evlist->nr_entries == 1 || !id)
 584                return perf_evlist__first(evlist);
 585
 586        sid = perf_evlist__id2sid(evlist, id);
 587        if (sid)
 588                return sid->evsel;
 589
 590        if (!perf_evlist__sample_id_all(evlist))
 591                return perf_evlist__first(evlist);
 592
 593        return NULL;
 594}
 595
 596struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
 597                                                u64 id)
 598{
 599        struct perf_sample_id *sid;
 600
 601        if (!id)
 602                return NULL;
 603
 604        sid = perf_evlist__id2sid(evlist, id);
 605        if (sid)
 606                return sid->evsel;
 607
 608        return NULL;
 609}
 610
 611static int perf_evlist__event2id(struct perf_evlist *evlist,
 612                                 union perf_event *event, u64 *id)
 613{
 614        const u64 *array = event->sample.array;
 615        ssize_t n;
 616
 617        n = (event->header.size - sizeof(event->header)) >> 3;
 618
 619        if (event->header.type == PERF_RECORD_SAMPLE) {
 620                if (evlist->id_pos >= n)
 621                        return -1;
 622                *id = array[evlist->id_pos];
 623        } else {
 624                if (evlist->is_pos > n)
 625                        return -1;
 626                n -= evlist->is_pos;
 627                *id = array[n];
 628        }
 629        return 0;
 630}
 631
 632struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
 633                                            union perf_event *event)
 634{
 635        struct perf_evsel *first = perf_evlist__first(evlist);
 636        struct hlist_head *head;
 637        struct perf_sample_id *sid;
 638        int hash;
 639        u64 id;
 640
 641        if (evlist->nr_entries == 1)
 642                return first;
 643
 644        if (!first->attr.sample_id_all &&
 645            event->header.type != PERF_RECORD_SAMPLE)
 646                return first;
 647
 648        if (perf_evlist__event2id(evlist, event, &id))
 649                return NULL;
 650
 651        /* Synthesized events have an id of zero */
 652        if (!id)
 653                return first;
 654
 655        hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
 656        head = &evlist->heads[hash];
 657
 658        hlist_for_each_entry(sid, head, node) {
 659                if (sid->id == id)
 660                        return sid->evsel;
 661        }
 662        return NULL;
 663}
 664
 665static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
 666{
 667        int i;
 668
 669        if (!evlist->overwrite_mmap)
 670                return 0;
 671
 672        for (i = 0; i < evlist->nr_mmaps; i++) {
 673                int fd = evlist->overwrite_mmap[i].fd;
 674                int err;
 675
 676                if (fd < 0)
 677                        continue;
 678                err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
 679                if (err)
 680                        return err;
 681        }
 682        return 0;
 683}
 684
 685static int perf_evlist__pause(struct perf_evlist *evlist)
 686{
 687        return perf_evlist__set_paused(evlist, true);
 688}
 689
 690static int perf_evlist__resume(struct perf_evlist *evlist)
 691{
 692        return perf_evlist__set_paused(evlist, false);
 693}
 694
 695static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
 696{
 697        int i;
 698
 699        if (evlist->mmap)
 700                for (i = 0; i < evlist->nr_mmaps; i++)
 701                        perf_mmap__munmap(&evlist->mmap[i]);
 702
 703        if (evlist->overwrite_mmap)
 704                for (i = 0; i < evlist->nr_mmaps; i++)
 705                        perf_mmap__munmap(&evlist->overwrite_mmap[i]);
 706}
 707
 708void perf_evlist__munmap(struct perf_evlist *evlist)
 709{
 710        perf_evlist__munmap_nofree(evlist);
 711        zfree(&evlist->mmap);
 712        zfree(&evlist->overwrite_mmap);
 713}
 714
 715static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist,
 716                                                 bool overwrite)
 717{
 718        int i;
 719        struct perf_mmap *map;
 720
 721        evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
 722        if (cpu_map__empty(evlist->cpus))
 723                evlist->nr_mmaps = thread_map__nr(evlist->threads);
 724        map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
 725        if (!map)
 726                return NULL;
 727
 728        for (i = 0; i < evlist->nr_mmaps; i++) {
 729                map[i].fd = -1;
 730                map[i].overwrite = overwrite;
 731                /*
 732                 * When the perf_mmap() call is made we grab one refcount, plus
 733                 * one extra to let perf_mmap__consume() get the last
 734                 * events after all real references (perf_mmap__get()) are
 735                 * dropped.
 736                 *
 737                 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
 738                 * thus does perf_mmap__get() on it.
 739                 */
 740                refcount_set(&map[i].refcnt, 0);
 741        }
 742        return map;
 743}
 744
 745static bool
 746perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
 747                         struct perf_evsel *evsel)
 748{
 749        if (evsel->attr.write_backward)
 750                return false;
 751        return true;
 752}
 753
 754static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
 755                                       struct mmap_params *mp, int cpu_idx,
 756                                       int thread, int *_output, int *_output_overwrite)
 757{
 758        struct perf_evsel *evsel;
 759        int revent;
 760        int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
 761
 762        evlist__for_each_entry(evlist, evsel) {
 763                struct perf_mmap *maps = evlist->mmap;
 764                int *output = _output;
 765                int fd;
 766                int cpu;
 767
 768                mp->prot = PROT_READ | PROT_WRITE;
 769                if (evsel->attr.write_backward) {
 770                        output = _output_overwrite;
 771                        maps = evlist->overwrite_mmap;
 772
 773                        if (!maps) {
 774                                maps = perf_evlist__alloc_mmap(evlist, true);
 775                                if (!maps)
 776                                        return -1;
 777                                evlist->overwrite_mmap = maps;
 778                                if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
 779                                        perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
 780                        }
 781                        mp->prot &= ~PROT_WRITE;
 782                }
 783
 784                if (evsel->system_wide && thread)
 785                        continue;
 786
 787                cpu = cpu_map__idx(evsel->cpus, evlist_cpu);
 788                if (cpu == -1)
 789                        continue;
 790
 791                fd = FD(evsel, cpu, thread);
 792
 793                if (*output == -1) {
 794                        *output = fd;
 795
 796                        if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
 797                                return -1;
 798                } else {
 799                        if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
 800                                return -1;
 801
 802                        perf_mmap__get(&maps[idx]);
 803                }
 804
 805                revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;
 806
 807                /*
 808                 * The system_wide flag causes a selected event to be opened
 809                 * always without a pid.  Consequently it will never get a
 810                 * POLLHUP, but it is used for tracking in combination with
 811                 * other events, so it should not need to be polled anyway.
 812                 * Therefore don't add it for polling.
 813                 */
 814                if (!evsel->system_wide &&
 815                    __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
 816                        perf_mmap__put(&maps[idx]);
 817                        return -1;
 818                }
 819
 820                if (evsel->attr.read_format & PERF_FORMAT_ID) {
 821                        if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
 822                                                   fd) < 0)
 823                                return -1;
 824                        perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
 825                                                 thread);
 826                }
 827        }
 828
 829        return 0;
 830}
 831
 832static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
 833                                     struct mmap_params *mp)
 834{
 835        int cpu, thread;
 836        int nr_cpus = cpu_map__nr(evlist->cpus);
 837        int nr_threads = thread_map__nr(evlist->threads);
 838
 839        pr_debug2("perf event ring buffer mmapped per cpu\n");
 840        for (cpu = 0; cpu < nr_cpus; cpu++) {
 841                int output = -1;
 842                int output_overwrite = -1;
 843
 844                auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
 845                                              true);
 846
 847                for (thread = 0; thread < nr_threads; thread++) {
 848                        if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
 849                                                        thread, &output, &output_overwrite))
 850                                goto out_unmap;
 851                }
 852        }
 853
 854        return 0;
 855
 856out_unmap:
 857        perf_evlist__munmap_nofree(evlist);
 858        return -1;
 859}
 860
 861static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
 862                                        struct mmap_params *mp)
 863{
 864        int thread;
 865        int nr_threads = thread_map__nr(evlist->threads);
 866
 867        pr_debug2("perf event ring buffer mmapped per thread\n");
 868        for (thread = 0; thread < nr_threads; thread++) {
 869                int output = -1;
 870                int output_overwrite = -1;
 871
 872                auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
 873                                              false);
 874
 875                if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
 876                                                &output, &output_overwrite))
 877                        goto out_unmap;
 878        }
 879
 880        return 0;
 881
 882out_unmap:
 883        perf_evlist__munmap_nofree(evlist);
 884        return -1;
 885}
 886
 887unsigned long perf_event_mlock_kb_in_pages(void)
 888{
 889        unsigned long pages;
 890        int max;
 891
 892        if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
 893                /*
 894                 * Pick a once upon a time good value, i.e. things look
 895                 * strange since we can't read a sysctl value, but lets not
 896                 * die yet...
 897                 */
 898                max = 512;
 899        } else {
 900                max -= (page_size / 1024);
 901        }
 902
 903        pages = (max * 1024) / page_size;
 904        if (!is_power_of_2(pages))
 905                pages = rounddown_pow_of_two(pages);
 906
 907        return pages;
 908}
 909
 910size_t perf_evlist__mmap_size(unsigned long pages)
 911{
 912        if (pages == UINT_MAX)
 913                pages = perf_event_mlock_kb_in_pages();
 914        else if (!is_power_of_2(pages))
 915                return 0;
 916
 917        return (pages + 1) * page_size;
 918}
 919
 920static long parse_pages_arg(const char *str, unsigned long min,
 921                            unsigned long max)
 922{
 923        unsigned long pages, val;
 924        static struct parse_tag tags[] = {
 925                { .tag  = 'B', .mult = 1       },
 926                { .tag  = 'K', .mult = 1 << 10 },
 927                { .tag  = 'M', .mult = 1 << 20 },
 928                { .tag  = 'G', .mult = 1 << 30 },
 929                { .tag  = 0 },
 930        };
 931
 932        if (str == NULL)
 933                return -EINVAL;
 934
 935        val = parse_tag_value(str, tags);
 936        if (val != (unsigned long) -1) {
 937                /* we got file size value */
 938                pages = PERF_ALIGN(val, page_size) / page_size;
 939        } else {
 940                /* we got pages count value */
 941                char *eptr;
 942                pages = strtoul(str, &eptr, 10);
 943                if (*eptr != '\0')
 944                        return -EINVAL;
 945        }
 946
 947        if (pages == 0 && min == 0) {
 948                /* leave number of pages at 0 */
 949        } else if (!is_power_of_2(pages)) {
 950                char buf[100];
 951
 952                /* round pages up to next power of 2 */
 953                pages = roundup_pow_of_two(pages);
 954                if (!pages)
 955                        return -EINVAL;
 956
 957                unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
 958                pr_info("rounding mmap pages size to %s (%lu pages)\n",
 959                        buf, pages);
 960        }
 961
 962        if (pages > max)
 963                return -EINVAL;
 964
 965        return pages;
 966}
 967
 968int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
 969{
 970        unsigned long max = UINT_MAX;
 971        long pages;
 972
 973        if (max > SIZE_MAX / page_size)
 974                max = SIZE_MAX / page_size;
 975
 976        pages = parse_pages_arg(str, 1, max);
 977        if (pages < 0) {
 978                pr_err("Invalid argument for --mmap_pages/-m\n");
 979                return -1;
 980        }
 981
 982        *mmap_pages = pages;
 983        return 0;
 984}
 985
 986int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
 987                                  int unset __maybe_unused)
 988{
 989        return __perf_evlist__parse_mmap_pages(opt->value, str);
 990}
 991
 992/**
 993 * perf_evlist__mmap_ex - Create mmaps to receive events.
 994 * @evlist: list of events
 995 * @pages: map length in pages
 996 * @overwrite: overwrite older events?
 997 * @auxtrace_pages - auxtrace map length in pages
 998 * @auxtrace_overwrite - overwrite older auxtrace data?
 999 *
1000 * If @overwrite is %false the user needs to signal event consumption using
1001 * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
1002 * automatically.
1003 *
1004 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
1005 * consumption using auxtrace_mmap__write_tail().
1006 *
1007 * Return: %0 on success, negative error code otherwise.
1008 */
1009int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1010                         unsigned int auxtrace_pages,
1011                         bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
1012                         int comp_level)
1013{
1014        struct perf_evsel *evsel;
1015        const struct cpu_map *cpus = evlist->cpus;
1016        const struct thread_map *threads = evlist->threads;
1017        /*
1018         * Delay setting mp.prot: set it before calling perf_mmap__mmap.
1019         * Its value is decided by evsel's write_backward.
1020         * So &mp should not be passed through const pointer.
1021         */
1022        struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
1023                                  .comp_level = comp_level };
1024
1025        if (!evlist->mmap)
1026                evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
1027        if (!evlist->mmap)
1028                return -ENOMEM;
1029
1030        if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1031                return -ENOMEM;
1032
1033        evlist->mmap_len = perf_evlist__mmap_size(pages);
1034        pr_debug("mmap size %zuB\n", evlist->mmap_len);
1035        mp.mask = evlist->mmap_len - page_size - 1;
1036
1037        auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
1038                                   auxtrace_pages, auxtrace_overwrite);
1039
1040        evlist__for_each_entry(evlist, evsel) {
1041                if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1042                    evsel->sample_id == NULL &&
1043                    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1044                        return -ENOMEM;
1045        }
1046
1047        if (cpu_map__empty(cpus))
1048                return perf_evlist__mmap_per_thread(evlist, &mp);
1049
1050        return perf_evlist__mmap_per_cpu(evlist, &mp);
1051}
1052
1053int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
1054{
1055        return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
1056}
1057
1058int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1059{
1060        bool all_threads = (target->per_thread && target->system_wide);
1061        struct cpu_map *cpus;
1062        struct thread_map *threads;
1063
1064        /*
1065         * If specify '-a' and '--per-thread' to perf record, perf record
1066         * will override '--per-thread'. target->per_thread = false and
1067         * target->system_wide = true.
1068         *
1069         * If specify '--per-thread' only to perf record,
1070         * target->per_thread = true and target->system_wide = false.
1071         *
1072         * So target->per_thread && target->system_wide is false.
1073         * For perf record, thread_map__new_str doesn't call
1074         * thread_map__new_all_cpus. That will keep perf record's
1075         * current behavior.
1076         *
1077         * For perf stat, it allows the case that target->per_thread and
1078         * target->system_wide are all true. It means to collect system-wide
1079         * per-thread data. thread_map__new_str will call
1080         * thread_map__new_all_cpus to enumerate all threads.
1081         */
1082        threads = thread_map__new_str(target->pid, target->tid, target->uid,
1083                                      all_threads);
1084
1085        if (!threads)
1086                return -1;
1087
1088        if (target__uses_dummy_map(target))
1089                cpus = cpu_map__dummy_new();
1090        else
1091                cpus = cpu_map__new(target->cpu_list);
1092
1093        if (!cpus)
1094                goto out_delete_threads;
1095
1096        evlist->has_user_cpus = !!target->cpu_list;
1097
1098        perf_evlist__set_maps(evlist, cpus, threads);
1099
1100        return 0;
1101
1102out_delete_threads:
1103        thread_map__put(threads);
1104        return -1;
1105}
1106
1107void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
1108                           struct thread_map *threads)
1109{
1110        /*
1111         * Allow for the possibility that one or another of the maps isn't being
1112         * changed i.e. don't put it.  Note we are assuming the maps that are
1113         * being applied are brand new and evlist is taking ownership of the
1114         * original reference count of 1.  If that is not the case it is up to
1115         * the caller to increase the reference count.
1116         */
1117        if (cpus != evlist->cpus) {
1118                cpu_map__put(evlist->cpus);
1119                evlist->cpus = cpu_map__get(cpus);
1120        }
1121
1122        if (threads != evlist->threads) {
1123                thread_map__put(evlist->threads);
1124                evlist->threads = thread_map__get(threads);
1125        }
1126
1127        perf_evlist__propagate_maps(evlist);
1128}
1129
1130void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
1131                                   enum perf_event_sample_format bit)
1132{
1133        struct perf_evsel *evsel;
1134
1135        evlist__for_each_entry(evlist, evsel)
1136                __perf_evsel__set_sample_bit(evsel, bit);
1137}
1138
1139void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
1140                                     enum perf_event_sample_format bit)
1141{
1142        struct perf_evsel *evsel;
1143
1144        evlist__for_each_entry(evlist, evsel)
1145                __perf_evsel__reset_sample_bit(evsel, bit);
1146}
1147
1148int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
1149{
1150        struct perf_evsel *evsel;
1151        int err = 0;
1152
1153        evlist__for_each_entry(evlist, evsel) {
1154                if (evsel->filter == NULL)
1155                        continue;
1156
1157                /*
1158                 * filters only work for tracepoint event, which doesn't have cpu limit.
1159                 * So evlist and evsel should always be same.
1160                 */
1161                err = perf_evsel__apply_filter(evsel, evsel->filter);
1162                if (err) {
1163                        *err_evsel = evsel;
1164                        break;
1165                }
1166        }
1167
1168        return err;
1169}
1170
1171int perf_evlist__set_tp_filter(struct perf_evlist *evlist, const char *filter)
1172{
1173        struct perf_evsel *evsel;
1174        int err = 0;
1175
1176        evlist__for_each_entry(evlist, evsel) {
1177                if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
1178                        continue;
1179
1180                err = perf_evsel__set_filter(evsel, filter);
1181                if (err)
1182                        break;
1183        }
1184
1185        return err;
1186}
1187
1188int perf_evlist__set_tp_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
1189{
1190        char *filter;
1191        int ret = -1;
1192        size_t i;
1193
1194        for (i = 0; i < npids; ++i) {
1195                if (i == 0) {
1196                        if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1197                                return -1;
1198                } else {
1199                        char *tmp;
1200
1201                        if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
1202                                goto out_free;
1203
1204                        free(filter);
1205                        filter = tmp;
1206                }
1207        }
1208
1209        ret = perf_evlist__set_tp_filter(evlist, filter);
1210out_free:
1211        free(filter);
1212        return ret;
1213}
1214
1215int perf_evlist__set_tp_filter_pid(struct perf_evlist *evlist, pid_t pid)
1216{
1217        return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
1218}
1219
1220bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
1221{
1222        struct perf_evsel *pos;
1223
1224        if (evlist->nr_entries == 1)
1225                return true;
1226
1227        if (evlist->id_pos < 0 || evlist->is_pos < 0)
1228                return false;
1229
1230        evlist__for_each_entry(evlist, pos) {
1231                if (pos->id_pos != evlist->id_pos ||
1232                    pos->is_pos != evlist->is_pos)
1233                        return false;
1234        }
1235
1236        return true;
1237}
1238
1239u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1240{
1241        struct perf_evsel *evsel;
1242
1243        if (evlist->combined_sample_type)
1244                return evlist->combined_sample_type;
1245
1246        evlist__for_each_entry(evlist, evsel)
1247                evlist->combined_sample_type |= evsel->attr.sample_type;
1248
1249        return evlist->combined_sample_type;
1250}
1251
1252u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1253{
1254        evlist->combined_sample_type = 0;
1255        return __perf_evlist__combined_sample_type(evlist);
1256}
1257
1258u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
1259{
1260        struct perf_evsel *evsel;
1261        u64 branch_type = 0;
1262
1263        evlist__for_each_entry(evlist, evsel)
1264                branch_type |= evsel->attr.branch_sample_type;
1265        return branch_type;
1266}
1267
1268bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
1269{
1270        struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1271        u64 read_format = first->attr.read_format;
1272        u64 sample_type = first->attr.sample_type;
1273
1274        evlist__for_each_entry(evlist, pos) {
1275                if (read_format != pos->attr.read_format)
1276                        return false;
1277        }
1278
1279        /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
1280        if ((sample_type & PERF_SAMPLE_READ) &&
1281            !(read_format & PERF_FORMAT_ID)) {
1282                return false;
1283        }
1284
1285        return true;
1286}
1287
1288u64 perf_evlist__read_format(struct perf_evlist *evlist)
1289{
1290        struct perf_evsel *first = perf_evlist__first(evlist);
1291        return first->attr.read_format;
1292}
1293
1294u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
1295{
1296        struct perf_evsel *first = perf_evlist__first(evlist);
1297        struct perf_sample *data;
1298        u64 sample_type;
1299        u16 size = 0;
1300
1301        if (!first->attr.sample_id_all)
1302                goto out;
1303
1304        sample_type = first->attr.sample_type;
1305
1306        if (sample_type & PERF_SAMPLE_TID)
1307                size += sizeof(data->tid) * 2;
1308
1309       if (sample_type & PERF_SAMPLE_TIME)
1310                size += sizeof(data->time);
1311
1312        if (sample_type & PERF_SAMPLE_ID)
1313                size += sizeof(data->id);
1314
1315        if (sample_type & PERF_SAMPLE_STREAM_ID)
1316                size += sizeof(data->stream_id);
1317
1318        if (sample_type & PERF_SAMPLE_CPU)
1319                size += sizeof(data->cpu) * 2;
1320
1321        if (sample_type & PERF_SAMPLE_IDENTIFIER)
1322                size += sizeof(data->id);
1323out:
1324        return size;
1325}
1326
1327bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
1328{
1329        struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1330
1331        evlist__for_each_entry_continue(evlist, pos) {
1332                if (first->attr.sample_id_all != pos->attr.sample_id_all)
1333                        return false;
1334        }
1335
1336        return true;
1337}
1338
1339bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
1340{
1341        struct perf_evsel *first = perf_evlist__first(evlist);
1342        return first->attr.sample_id_all;
1343}
1344
1345void perf_evlist__set_selected(struct perf_evlist *evlist,
1346                               struct perf_evsel *evsel)
1347{
1348        evlist->selected = evsel;
1349}
1350
1351void perf_evlist__close(struct perf_evlist *evlist)
1352{
1353        struct perf_evsel *evsel;
1354
1355        evlist__for_each_entry_reverse(evlist, evsel)
1356                perf_evsel__close(evsel);
1357}
1358
1359static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
1360{
1361        struct cpu_map    *cpus;
1362        struct thread_map *threads;
1363        int err = -ENOMEM;
1364
1365        /*
1366         * Try reading /sys/devices/system/cpu/online to get
1367         * an all cpus map.
1368         *
1369         * FIXME: -ENOMEM is the best we can do here, the cpu_map
1370         * code needs an overhaul to properly forward the
1371         * error, and we may not want to do that fallback to a
1372         * default cpu identity map :-\
1373         */
1374        cpus = cpu_map__new(NULL);
1375        if (!cpus)
1376                goto out;
1377
1378        threads = thread_map__new_dummy();
1379        if (!threads)
1380                goto out_put;
1381
1382        perf_evlist__set_maps(evlist, cpus, threads);
1383out:
1384        return err;
1385out_put:
1386        cpu_map__put(cpus);
1387        goto out;
1388}
1389
1390int perf_evlist__open(struct perf_evlist *evlist)
1391{
1392        struct perf_evsel *evsel;
1393        int err;
1394
1395        /*
1396         * Default: one fd per CPU, all threads, aka systemwide
1397         * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
1398         */
1399        if (evlist->threads == NULL && evlist->cpus == NULL) {
1400                err = perf_evlist__create_syswide_maps(evlist);
1401                if (err < 0)
1402                        goto out_err;
1403        }
1404
1405        perf_evlist__update_id_pos(evlist);
1406
1407        evlist__for_each_entry(evlist, evsel) {
1408                err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
1409                if (err < 0)
1410                        goto out_err;
1411        }
1412
1413        return 0;
1414out_err:
1415        perf_evlist__close(evlist);
1416        errno = -err;
1417        return err;
1418}
1419
1420int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
1421                                  const char *argv[], bool pipe_output,
1422                                  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1423{
1424        int child_ready_pipe[2], go_pipe[2];
1425        char bf;
1426
1427        if (pipe(child_ready_pipe) < 0) {
1428                perror("failed to create 'ready' pipe");
1429                return -1;
1430        }
1431
1432        if (pipe(go_pipe) < 0) {
1433                perror("failed to create 'go' pipe");
1434                goto out_close_ready_pipe;
1435        }
1436
1437        evlist->workload.pid = fork();
1438        if (evlist->workload.pid < 0) {
1439                perror("failed to fork");
1440                goto out_close_pipes;
1441        }
1442
1443        if (!evlist->workload.pid) {
1444                int ret;
1445
1446                if (pipe_output)
1447                        dup2(2, 1);
1448
1449                signal(SIGTERM, SIG_DFL);
1450
1451                close(child_ready_pipe[0]);
1452                close(go_pipe[1]);
1453                fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
1454
1455                /*
1456                 * Tell the parent we're ready to go
1457                 */
1458                close(child_ready_pipe[1]);
1459
1460                /*
1461                 * Wait until the parent tells us to go.
1462                 */
1463                ret = read(go_pipe[0], &bf, 1);
1464                /*
1465                 * The parent will ask for the execvp() to be performed by
1466                 * writing exactly one byte, in workload.cork_fd, usually via
1467                 * perf_evlist__start_workload().
1468                 *
1469                 * For cancelling the workload without actually running it,
1470                 * the parent will just close workload.cork_fd, without writing
1471                 * anything, i.e. read will return zero and we just exit()
1472                 * here.
1473                 */
1474                if (ret != 1) {
1475                        if (ret == -1)
1476                                perror("unable to read pipe");
1477                        exit(ret);
1478                }
1479
1480                execvp(argv[0], (char **)argv);
1481
1482                if (exec_error) {
1483                        union sigval val;
1484
1485                        val.sival_int = errno;
1486                        if (sigqueue(getppid(), SIGUSR1, val))
1487                                perror(argv[0]);
1488                } else
1489                        perror(argv[0]);
1490                exit(-1);
1491        }
1492
1493        if (exec_error) {
1494                struct sigaction act = {
1495                        .sa_flags     = SA_SIGINFO,
1496                        .sa_sigaction = exec_error,
1497                };
1498                sigaction(SIGUSR1, &act, NULL);
1499        }
1500
1501        if (target__none(target)) {
1502                if (evlist->threads == NULL) {
1503                        fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
1504                                __func__, __LINE__);
1505                        goto out_close_pipes;
1506                }
1507                thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
1508        }
1509
1510        close(child_ready_pipe[1]);
1511        close(go_pipe[0]);
1512        /*
1513         * wait for child to settle
1514         */
1515        if (read(child_ready_pipe[0], &bf, 1) == -1) {
1516                perror("unable to read pipe");
1517                goto out_close_pipes;
1518        }
1519
1520        fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1521        evlist->workload.cork_fd = go_pipe[1];
1522        close(child_ready_pipe[0]);
1523        return 0;
1524
1525out_close_pipes:
1526        close(go_pipe[0]);
1527        close(go_pipe[1]);
1528out_close_ready_pipe:
1529        close(child_ready_pipe[0]);
1530        close(child_ready_pipe[1]);
1531        return -1;
1532}
1533
1534int perf_evlist__start_workload(struct perf_evlist *evlist)
1535{
1536        if (evlist->workload.cork_fd > 0) {
1537                char bf = 0;
1538                int ret;
1539                /*
1540                 * Remove the cork, let it rip!
1541                 */
1542                ret = write(evlist->workload.cork_fd, &bf, 1);
1543                if (ret < 0)
1544                        perror("unable to write to pipe");
1545
1546                close(evlist->workload.cork_fd);
1547                return ret;
1548        }
1549
1550        return 0;
1551}
1552
1553int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
1554                              struct perf_sample *sample)
1555{
1556        struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
1557
1558        if (!evsel)
1559                return -EFAULT;
1560        return perf_evsel__parse_sample(evsel, event, sample);
1561}
1562
1563int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist,
1564                                        union perf_event *event,
1565                                        u64 *timestamp)
1566{
1567        struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
1568
1569        if (!evsel)
1570                return -EFAULT;
1571        return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
1572}
1573
1574size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
1575{
1576        struct perf_evsel *evsel;
1577        size_t printed = 0;
1578
1579        evlist__for_each_entry(evlist, evsel) {
1580                printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
1581                                   perf_evsel__name(evsel));
1582        }
1583
1584        return printed + fprintf(fp, "\n");
1585}
1586
1587int perf_evlist__strerror_open(struct perf_evlist *evlist,
1588                               int err, char *buf, size_t size)
1589{
1590        int printed, value;
1591        char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1592
1593        switch (err) {
1594        case EACCES:
1595        case EPERM:
1596                printed = scnprintf(buf, size,
1597                                    "Error:\t%s.\n"
1598                                    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
1599
1600                value = perf_event_paranoid();
1601
1602                printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
1603
1604                if (value >= 2) {
1605                        printed += scnprintf(buf + printed, size - printed,
1606                                             "For your workloads it needs to be <= 1\nHint:\t");
1607                }
1608                printed += scnprintf(buf + printed, size - printed,
1609                                     "For system wide tracing it needs to be set to -1.\n");
1610
1611                printed += scnprintf(buf + printed, size - printed,
1612                                    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
1613                                    "Hint:\tThe current value is %d.", value);
1614                break;
1615        case EINVAL: {
1616                struct perf_evsel *first = perf_evlist__first(evlist);
1617                int max_freq;
1618
1619                if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
1620                        goto out_default;
1621
1622                if (first->attr.sample_freq < (u64)max_freq)
1623                        goto out_default;
1624
1625                printed = scnprintf(buf, size,
1626                                    "Error:\t%s.\n"
1627                                    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
1628                                    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1629                                    emsg, max_freq, first->attr.sample_freq);
1630                break;
1631        }
1632        default:
1633out_default:
1634                scnprintf(buf, size, "%s", emsg);
1635                break;
1636        }
1637
1638        return 0;
1639}
1640
1641int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
1642{
1643        char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
1644        int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1645
1646        switch (err) {
1647        case EPERM:
1648                sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1649                printed += scnprintf(buf + printed, size - printed,
1650                                     "Error:\t%s.\n"
1651                                     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1652                                     "Hint:\tTried using %zd kB.\n",
1653                                     emsg, pages_max_per_user, pages_attempted);
1654
1655                if (pages_attempted >= pages_max_per_user) {
1656                        printed += scnprintf(buf + printed, size - printed,
1657                                             "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
1658                                             pages_max_per_user + pages_attempted);
1659                }
1660
1661                printed += scnprintf(buf + printed, size - printed,
1662                                     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1663                break;
1664        default:
1665                scnprintf(buf, size, "%s", emsg);
1666                break;
1667        }
1668
1669        return 0;
1670}
1671
1672void perf_evlist__to_front(struct perf_evlist *evlist,
1673                           struct perf_evsel *move_evsel)
1674{
1675        struct perf_evsel *evsel, *n;
1676        LIST_HEAD(move);
1677
1678        if (move_evsel == perf_evlist__first(evlist))
1679                return;
1680
1681        evlist__for_each_entry_safe(evlist, n, evsel) {
1682                if (evsel->leader == move_evsel->leader)
1683                        list_move_tail(&evsel->node, &move);
1684        }
1685
1686        list_splice(&move, &evlist->entries);
1687}
1688
1689void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
1690                                     struct perf_evsel *tracking_evsel)
1691{
1692        struct perf_evsel *evsel;
1693
1694        if (tracking_evsel->tracking)
1695                return;
1696
1697        evlist__for_each_entry(evlist, evsel) {
1698                if (evsel != tracking_evsel)
1699                        evsel->tracking = false;
1700        }
1701
1702        tracking_evsel->tracking = true;
1703}
1704
1705struct perf_evsel *
1706perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
1707                               const char *str)
1708{
1709        struct perf_evsel *evsel;
1710
1711        evlist__for_each_entry(evlist, evsel) {
1712                if (!evsel->name)
1713                        continue;
1714                if (strcmp(str, evsel->name) == 0)
1715                        return evsel;
1716        }
1717
1718        return NULL;
1719}
1720
1721void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
1722                                  enum bkw_mmap_state state)
1723{
1724        enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
1725        enum action {
1726                NONE,
1727                PAUSE,
1728                RESUME,
1729        } action = NONE;
1730
1731        if (!evlist->overwrite_mmap)
1732                return;
1733
1734        switch (old_state) {
1735        case BKW_MMAP_NOTREADY: {
1736                if (state != BKW_MMAP_RUNNING)
1737                        goto state_err;
1738                break;
1739        }
1740        case BKW_MMAP_RUNNING: {
1741                if (state != BKW_MMAP_DATA_PENDING)
1742                        goto state_err;
1743                action = PAUSE;
1744                break;
1745        }
1746        case BKW_MMAP_DATA_PENDING: {
1747                if (state != BKW_MMAP_EMPTY)
1748                        goto state_err;
1749                break;
1750        }
1751        case BKW_MMAP_EMPTY: {
1752                if (state != BKW_MMAP_RUNNING)
1753                        goto state_err;
1754                action = RESUME;
1755                break;
1756        }
1757        default:
1758                WARN_ONCE(1, "Shouldn't get there\n");
1759        }
1760
1761        evlist->bkw_mmap_state = state;
1762
1763        switch (action) {
1764        case PAUSE:
1765                perf_evlist__pause(evlist);
1766                break;
1767        case RESUME:
1768                perf_evlist__resume(evlist);
1769                break;
1770        case NONE:
1771        default:
1772                break;
1773        }
1774
1775state_err:
1776        return;
1777}
1778
1779bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
1780{
1781        struct perf_evsel *evsel;
1782
1783        evlist__for_each_entry(evlist, evsel) {
1784                if (!evsel->attr.exclude_kernel)
1785                        return false;
1786        }
1787
1788        return true;
1789}
1790
1791/*
1792 * Events in data file are not collect in groups, but we still want
1793 * the group display. Set the artificial group and set the leader's
1794 * forced_leader flag to notify the display code.
1795 */
1796void perf_evlist__force_leader(struct perf_evlist *evlist)
1797{
1798        if (!evlist->nr_groups) {
1799                struct perf_evsel *leader = perf_evlist__first(evlist);
1800
1801                perf_evlist__set_leader(evlist);
1802                leader->forced_leader = true;
1803        }
1804}
1805
1806struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list,
1807                                                 struct perf_evsel *evsel)
1808{
1809        struct perf_evsel *c2, *leader;
1810        bool is_open = true;
1811
1812        leader = evsel->leader;
1813        pr_debug("Weak group for %s/%d failed\n",
1814                        leader->name, leader->nr_members);
1815
1816        /*
1817         * for_each_group_member doesn't work here because it doesn't
1818         * include the first entry.
1819         */
1820        evlist__for_each_entry(evsel_list, c2) {
1821                if (c2 == evsel)
1822                        is_open = false;
1823                if (c2->leader == leader) {
1824                        if (is_open)
1825                                perf_evsel__close(c2);
1826                        c2->leader = c2;
1827                        c2->nr_members = 0;
1828                }
1829        }
1830        return leader;
1831}
1832
1833int perf_evlist__add_sb_event(struct perf_evlist **evlist,
1834                              struct perf_event_attr *attr,
1835                              perf_evsel__sb_cb_t cb,
1836                              void *data)
1837{
1838        struct perf_evsel *evsel;
1839        bool new_evlist = (*evlist) == NULL;
1840
1841        if (*evlist == NULL)
1842                *evlist = perf_evlist__new();
1843        if (*evlist == NULL)
1844                return -1;
1845
1846        if (!attr->sample_id_all) {
1847                pr_warning("enabling sample_id_all for all side band events\n");
1848                attr->sample_id_all = 1;
1849        }
1850
1851        evsel = perf_evsel__new_idx(attr, (*evlist)->nr_entries);
1852        if (!evsel)
1853                goto out_err;
1854
1855        evsel->side_band.cb = cb;
1856        evsel->side_band.data = data;
1857        perf_evlist__add(*evlist, evsel);
1858        return 0;
1859
1860out_err:
1861        if (new_evlist) {
1862                perf_evlist__delete(*evlist);
1863                *evlist = NULL;
1864        }
1865        return -1;
1866}
1867
1868static void *perf_evlist__poll_thread(void *arg)
1869{
1870        struct perf_evlist *evlist = arg;
1871        bool draining = false;
1872        int i, done = 0;
1873
1874        while (!done) {
1875                bool got_data = false;
1876
1877                if (evlist->thread.done)
1878                        draining = true;
1879
1880                if (!draining)
1881                        perf_evlist__poll(evlist, 1000);
1882
1883                for (i = 0; i < evlist->nr_mmaps; i++) {
1884                        struct perf_mmap *map = &evlist->mmap[i];
1885                        union perf_event *event;
1886
1887                        if (perf_mmap__read_init(map))
1888                                continue;
1889                        while ((event = perf_mmap__read_event(map)) != NULL) {
1890                                struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
1891
1892                                if (evsel && evsel->side_band.cb)
1893                                        evsel->side_band.cb(event, evsel->side_band.data);
1894                                else
1895                                        pr_warning("cannot locate proper evsel for the side band event\n");
1896
1897                                perf_mmap__consume(map);
1898                                got_data = true;
1899                        }
1900                        perf_mmap__read_done(map);
1901                }
1902
1903                if (draining && !got_data)
1904                        break;
1905        }
1906        return NULL;
1907}
1908
1909int perf_evlist__start_sb_thread(struct perf_evlist *evlist,
1910                                 struct target *target)
1911{
1912        struct perf_evsel *counter;
1913
1914        if (!evlist)
1915                return 0;
1916
1917        if (perf_evlist__create_maps(evlist, target))
1918                goto out_delete_evlist;
1919
1920        evlist__for_each_entry(evlist, counter) {
1921                if (perf_evsel__open(counter, evlist->cpus,
1922                                     evlist->threads) < 0)
1923                        goto out_delete_evlist;
1924        }
1925
1926        if (perf_evlist__mmap(evlist, UINT_MAX))
1927                goto out_delete_evlist;
1928
1929        evlist__for_each_entry(evlist, counter) {
1930                if (perf_evsel__enable(counter))
1931                        goto out_delete_evlist;
1932        }
1933
1934        evlist->thread.done = 0;
1935        if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
1936                goto out_delete_evlist;
1937
1938        return 0;
1939
1940out_delete_evlist:
1941        perf_evlist__delete(evlist);
1942        evlist = NULL;
1943        return -1;
1944}
1945
1946void perf_evlist__stop_sb_thread(struct perf_evlist *evlist)
1947{
1948        if (!evlist)
1949                return;
1950        evlist->thread.done = 1;
1951        pthread_join(evlist->thread.th, NULL);
1952        perf_evlist__delete(evlist);
1953}
1954