linux/tools/perf/util/evlist.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
   3 *
   4 * Parts came from builtin-{top,stat,record}.c, see those files for further
   5 * copyright notes.
   6 *
   7 * Released under the GPL v2. (and only v2, not any later version)
   8 */
   9#include "util.h"
  10#include <api/fs/fs.h>
  11#include <poll.h>
  12#include "cpumap.h"
  13#include "thread_map.h"
  14#include "target.h"
  15#include "evlist.h"
  16#include "evsel.h"
  17#include "debug.h"
  18#include <unistd.h>
  19
  20#include "parse-events.h"
  21#include <subcmd/parse-options.h>
  22
  23#include <sys/mman.h>
  24
  25#include <linux/bitops.h>
  26#include <linux/hash.h>
  27#include <linux/log2.h>
  28#include <linux/err.h>
  29
  30static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
  31static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
  32
  33#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
  34#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
  35
  36void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
  37                       struct thread_map *threads)
  38{
  39        int i;
  40
  41        for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
  42                INIT_HLIST_HEAD(&evlist->heads[i]);
  43        INIT_LIST_HEAD(&evlist->entries);
  44        perf_evlist__set_maps(evlist, cpus, threads);
  45        fdarray__init(&evlist->pollfd, 64);
  46        evlist->workload.pid = -1;
  47        evlist->backward = false;
  48}
  49
  50struct perf_evlist *perf_evlist__new(void)
  51{
  52        struct perf_evlist *evlist = zalloc(sizeof(*evlist));
  53
  54        if (evlist != NULL)
  55                perf_evlist__init(evlist, NULL, NULL);
  56
  57        return evlist;
  58}
  59
  60struct perf_evlist *perf_evlist__new_default(void)
  61{
  62        struct perf_evlist *evlist = perf_evlist__new();
  63
  64        if (evlist && perf_evlist__add_default(evlist)) {
  65                perf_evlist__delete(evlist);
  66                evlist = NULL;
  67        }
  68
  69        return evlist;
  70}
  71
  72struct perf_evlist *perf_evlist__new_dummy(void)
  73{
  74        struct perf_evlist *evlist = perf_evlist__new();
  75
  76        if (evlist && perf_evlist__add_dummy(evlist)) {
  77                perf_evlist__delete(evlist);
  78                evlist = NULL;
  79        }
  80
  81        return evlist;
  82}
  83
  84/**
  85 * perf_evlist__set_id_pos - set the positions of event ids.
  86 * @evlist: selected event list
  87 *
  88 * Events with compatible sample types all have the same id_pos
  89 * and is_pos.  For convenience, put a copy on evlist.
  90 */
  91void perf_evlist__set_id_pos(struct perf_evlist *evlist)
  92{
  93        struct perf_evsel *first = perf_evlist__first(evlist);
  94
  95        evlist->id_pos = first->id_pos;
  96        evlist->is_pos = first->is_pos;
  97}
  98
  99static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
 100{
 101        struct perf_evsel *evsel;
 102
 103        evlist__for_each(evlist, evsel)
 104                perf_evsel__calc_id_pos(evsel);
 105
 106        perf_evlist__set_id_pos(evlist);
 107}
 108
 109static void perf_evlist__purge(struct perf_evlist *evlist)
 110{
 111        struct perf_evsel *pos, *n;
 112
 113        evlist__for_each_safe(evlist, n, pos) {
 114                list_del_init(&pos->node);
 115                pos->evlist = NULL;
 116                perf_evsel__delete(pos);
 117        }
 118
 119        evlist->nr_entries = 0;
 120}
 121
 122void perf_evlist__exit(struct perf_evlist *evlist)
 123{
 124        zfree(&evlist->mmap);
 125        fdarray__exit(&evlist->pollfd);
 126}
 127
 128void perf_evlist__delete(struct perf_evlist *evlist)
 129{
 130        perf_evlist__munmap(evlist);
 131        perf_evlist__close(evlist);
 132        cpu_map__put(evlist->cpus);
 133        thread_map__put(evlist->threads);
 134        evlist->cpus = NULL;
 135        evlist->threads = NULL;
 136        perf_evlist__purge(evlist);
 137        perf_evlist__exit(evlist);
 138        free(evlist);
 139}
 140
 141static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
 142                                          struct perf_evsel *evsel)
 143{
 144        /*
 145         * We already have cpus for evsel (via PMU sysfs) so
 146         * keep it, if there's no target cpu list defined.
 147         */
 148        if (!evsel->own_cpus || evlist->has_user_cpus) {
 149                cpu_map__put(evsel->cpus);
 150                evsel->cpus = cpu_map__get(evlist->cpus);
 151        } else if (evsel->cpus != evsel->own_cpus) {
 152                cpu_map__put(evsel->cpus);
 153                evsel->cpus = cpu_map__get(evsel->own_cpus);
 154        }
 155
 156        thread_map__put(evsel->threads);
 157        evsel->threads = thread_map__get(evlist->threads);
 158}
 159
 160static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
 161{
 162        struct perf_evsel *evsel;
 163
 164        evlist__for_each(evlist, evsel)
 165                __perf_evlist__propagate_maps(evlist, evsel);
 166}
 167
 168void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
 169{
 170        entry->evlist = evlist;
 171        list_add_tail(&entry->node, &evlist->entries);
 172        entry->idx = evlist->nr_entries;
 173        entry->tracking = !entry->idx;
 174
 175        if (!evlist->nr_entries++)
 176                perf_evlist__set_id_pos(evlist);
 177
 178        __perf_evlist__propagate_maps(evlist, entry);
 179}
 180
 181void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel)
 182{
 183        evsel->evlist = NULL;
 184        list_del_init(&evsel->node);
 185        evlist->nr_entries -= 1;
 186}
 187
 188void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
 189                                   struct list_head *list)
 190{
 191        struct perf_evsel *evsel, *temp;
 192
 193        __evlist__for_each_safe(list, temp, evsel) {
 194                list_del_init(&evsel->node);
 195                perf_evlist__add(evlist, evsel);
 196        }
 197}
 198
 199void __perf_evlist__set_leader(struct list_head *list)
 200{
 201        struct perf_evsel *evsel, *leader;
 202
 203        leader = list_entry(list->next, struct perf_evsel, node);
 204        evsel = list_entry(list->prev, struct perf_evsel, node);
 205
 206        leader->nr_members = evsel->idx - leader->idx + 1;
 207
 208        __evlist__for_each(list, evsel) {
 209                evsel->leader = leader;
 210        }
 211}
 212
 213void perf_evlist__set_leader(struct perf_evlist *evlist)
 214{
 215        if (evlist->nr_entries) {
 216                evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
 217                __perf_evlist__set_leader(&evlist->entries);
 218        }
 219}
 220
 221void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr)
 222{
 223        attr->precise_ip = 3;
 224
 225        while (attr->precise_ip != 0) {
 226                int fd = sys_perf_event_open(attr, 0, -1, -1, 0);
 227                if (fd != -1) {
 228                        close(fd);
 229                        break;
 230                }
 231                --attr->precise_ip;
 232        }
 233}
 234
 235int perf_evlist__add_default(struct perf_evlist *evlist)
 236{
 237        struct perf_event_attr attr = {
 238                .type = PERF_TYPE_HARDWARE,
 239                .config = PERF_COUNT_HW_CPU_CYCLES,
 240        };
 241        struct perf_evsel *evsel;
 242
 243        event_attr_init(&attr);
 244
 245        perf_event_attr__set_max_precise_ip(&attr);
 246
 247        evsel = perf_evsel__new(&attr);
 248        if (evsel == NULL)
 249                goto error;
 250
 251        /* use asprintf() because free(evsel) assumes name is allocated */
 252        if (asprintf(&evsel->name, "cycles%.*s",
 253                     attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0)
 254                goto error_free;
 255
 256        perf_evlist__add(evlist, evsel);
 257        return 0;
 258error_free:
 259        perf_evsel__delete(evsel);
 260error:
 261        return -ENOMEM;
 262}
 263
 264int perf_evlist__add_dummy(struct perf_evlist *evlist)
 265{
 266        struct perf_event_attr attr = {
 267                .type   = PERF_TYPE_SOFTWARE,
 268                .config = PERF_COUNT_SW_DUMMY,
 269                .size   = sizeof(attr), /* to capture ABI version */
 270        };
 271        struct perf_evsel *evsel = perf_evsel__new(&attr);
 272
 273        if (evsel == NULL)
 274                return -ENOMEM;
 275
 276        perf_evlist__add(evlist, evsel);
 277        return 0;
 278}
 279
 280static int perf_evlist__add_attrs(struct perf_evlist *evlist,
 281                                  struct perf_event_attr *attrs, size_t nr_attrs)
 282{
 283        struct perf_evsel *evsel, *n;
 284        LIST_HEAD(head);
 285        size_t i;
 286
 287        for (i = 0; i < nr_attrs; i++) {
 288                evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
 289                if (evsel == NULL)
 290                        goto out_delete_partial_list;
 291                list_add_tail(&evsel->node, &head);
 292        }
 293
 294        perf_evlist__splice_list_tail(evlist, &head);
 295
 296        return 0;
 297
 298out_delete_partial_list:
 299        __evlist__for_each_safe(&head, n, evsel)
 300                perf_evsel__delete(evsel);
 301        return -1;
 302}
 303
 304int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
 305                                     struct perf_event_attr *attrs, size_t nr_attrs)
 306{
 307        size_t i;
 308
 309        for (i = 0; i < nr_attrs; i++)
 310                event_attr_init(attrs + i);
 311
 312        return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
 313}
 314
 315struct perf_evsel *
 316perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
 317{
 318        struct perf_evsel *evsel;
 319
 320        evlist__for_each(evlist, evsel) {
 321                if (evsel->attr.type   == PERF_TYPE_TRACEPOINT &&
 322                    (int)evsel->attr.config == id)
 323                        return evsel;
 324        }
 325
 326        return NULL;
 327}
 328
 329struct perf_evsel *
 330perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
 331                                     const char *name)
 332{
 333        struct perf_evsel *evsel;
 334
 335        evlist__for_each(evlist, evsel) {
 336                if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
 337                    (strcmp(evsel->name, name) == 0))
 338                        return evsel;
 339        }
 340
 341        return NULL;
 342}
 343
 344int perf_evlist__add_newtp(struct perf_evlist *evlist,
 345                           const char *sys, const char *name, void *handler)
 346{
 347        struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
 348
 349        if (IS_ERR(evsel))
 350                return -1;
 351
 352        evsel->handler = handler;
 353        perf_evlist__add(evlist, evsel);
 354        return 0;
 355}
 356
 357static int perf_evlist__nr_threads(struct perf_evlist *evlist,
 358                                   struct perf_evsel *evsel)
 359{
 360        if (evsel->system_wide)
 361                return 1;
 362        else
 363                return thread_map__nr(evlist->threads);
 364}
 365
 366void perf_evlist__disable(struct perf_evlist *evlist)
 367{
 368        struct perf_evsel *pos;
 369
 370        evlist__for_each(evlist, pos) {
 371                if (!perf_evsel__is_group_leader(pos) || !pos->fd)
 372                        continue;
 373                perf_evsel__disable(pos);
 374        }
 375
 376        evlist->enabled = false;
 377}
 378
 379void perf_evlist__enable(struct perf_evlist *evlist)
 380{
 381        struct perf_evsel *pos;
 382
 383        evlist__for_each(evlist, pos) {
 384                if (!perf_evsel__is_group_leader(pos) || !pos->fd)
 385                        continue;
 386                perf_evsel__enable(pos);
 387        }
 388
 389        evlist->enabled = true;
 390}
 391
 392void perf_evlist__toggle_enable(struct perf_evlist *evlist)
 393{
 394        (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
 395}
 396
 397static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
 398                                         struct perf_evsel *evsel, int cpu)
 399{
 400        int thread, err;
 401        int nr_threads = perf_evlist__nr_threads(evlist, evsel);
 402
 403        if (!evsel->fd)
 404                return -EINVAL;
 405
 406        for (thread = 0; thread < nr_threads; thread++) {
 407                err = ioctl(FD(evsel, cpu, thread),
 408                            PERF_EVENT_IOC_ENABLE, 0);
 409                if (err)
 410                        return err;
 411        }
 412        return 0;
 413}
 414
 415static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
 416                                            struct perf_evsel *evsel,
 417                                            int thread)
 418{
 419        int cpu, err;
 420        int nr_cpus = cpu_map__nr(evlist->cpus);
 421
 422        if (!evsel->fd)
 423                return -EINVAL;
 424
 425        for (cpu = 0; cpu < nr_cpus; cpu++) {
 426                err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
 427                if (err)
 428                        return err;
 429        }
 430        return 0;
 431}
 432
 433int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
 434                                  struct perf_evsel *evsel, int idx)
 435{
 436        bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);
 437
 438        if (per_cpu_mmaps)
 439                return perf_evlist__enable_event_cpu(evlist, evsel, idx);
 440        else
 441                return perf_evlist__enable_event_thread(evlist, evsel, idx);
 442}
 443
 444int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
 445{
 446        int nr_cpus = cpu_map__nr(evlist->cpus);
 447        int nr_threads = thread_map__nr(evlist->threads);
 448        int nfds = 0;
 449        struct perf_evsel *evsel;
 450
 451        evlist__for_each(evlist, evsel) {
 452                if (evsel->system_wide)
 453                        nfds += nr_cpus;
 454                else
 455                        nfds += nr_cpus * nr_threads;
 456        }
 457
 458        if (fdarray__available_entries(&evlist->pollfd) < nfds &&
 459            fdarray__grow(&evlist->pollfd, nfds) < 0)
 460                return -ENOMEM;
 461
 462        return 0;
 463}
 464
 465static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx)
 466{
 467        int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP);
 468        /*
 469         * Save the idx so that when we filter out fds POLLHUP'ed we can
 470         * close the associated evlist->mmap[] entry.
 471         */
 472        if (pos >= 0) {
 473                evlist->pollfd.priv[pos].idx = idx;
 474
 475                fcntl(fd, F_SETFL, O_NONBLOCK);
 476        }
 477
 478        return pos;
 479}
 480
 481int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
 482{
 483        return __perf_evlist__add_pollfd(evlist, fd, -1);
 484}
 485
 486static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd)
 487{
 488        struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd);
 489
 490        perf_evlist__mmap_put(evlist, fda->priv[fd].idx);
 491}
 492
 493int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
 494{
 495        return fdarray__filter(&evlist->pollfd, revents_and_mask,
 496                               perf_evlist__munmap_filtered);
 497}
 498
 499int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
 500{
 501        return fdarray__poll(&evlist->pollfd, timeout);
 502}
 503
 504static void perf_evlist__id_hash(struct perf_evlist *evlist,
 505                                 struct perf_evsel *evsel,
 506                                 int cpu, int thread, u64 id)
 507{
 508        int hash;
 509        struct perf_sample_id *sid = SID(evsel, cpu, thread);
 510
 511        sid->id = id;
 512        sid->evsel = evsel;
 513        hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
 514        hlist_add_head(&sid->node, &evlist->heads[hash]);
 515}
 516
 517void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
 518                         int cpu, int thread, u64 id)
 519{
 520        perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
 521        evsel->id[evsel->ids++] = id;
 522}
 523
 524int perf_evlist__id_add_fd(struct perf_evlist *evlist,
 525                           struct perf_evsel *evsel,
 526                           int cpu, int thread, int fd)
 527{
 528        u64 read_data[4] = { 0, };
 529        int id_idx = 1; /* The first entry is the counter value */
 530        u64 id;
 531        int ret;
 532
 533        ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
 534        if (!ret)
 535                goto add;
 536
 537        if (errno != ENOTTY)
 538                return -1;
 539
 540        /* Legacy way to get event id.. All hail to old kernels! */
 541
 542        /*
 543         * This way does not work with group format read, so bail
 544         * out in that case.
 545         */
 546        if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
 547                return -1;
 548
 549        if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
 550            read(fd, &read_data, sizeof(read_data)) == -1)
 551                return -1;
 552
 553        if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
 554                ++id_idx;
 555        if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
 556                ++id_idx;
 557
 558        id = read_data[id_idx];
 559
 560 add:
 561        perf_evlist__id_add(evlist, evsel, cpu, thread, id);
 562        return 0;
 563}
 564
 565static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
 566                                     struct perf_evsel *evsel, int idx, int cpu,
 567                                     int thread)
 568{
 569        struct perf_sample_id *sid = SID(evsel, cpu, thread);
 570        sid->idx = idx;
 571        if (evlist->cpus && cpu >= 0)
 572                sid->cpu = evlist->cpus->map[cpu];
 573        else
 574                sid->cpu = -1;
 575        if (!evsel->system_wide && evlist->threads && thread >= 0)
 576                sid->tid = thread_map__pid(evlist->threads, thread);
 577        else
 578                sid->tid = -1;
 579}
 580
 581struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
 582{
 583        struct hlist_head *head;
 584        struct perf_sample_id *sid;
 585        int hash;
 586
 587        hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
 588        head = &evlist->heads[hash];
 589
 590        hlist_for_each_entry(sid, head, node)
 591                if (sid->id == id)
 592                        return sid;
 593
 594        return NULL;
 595}
 596
 597struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
 598{
 599        struct perf_sample_id *sid;
 600
 601        if (evlist->nr_entries == 1 || !id)
 602                return perf_evlist__first(evlist);
 603
 604        sid = perf_evlist__id2sid(evlist, id);
 605        if (sid)
 606                return sid->evsel;
 607
 608        if (!perf_evlist__sample_id_all(evlist))
 609                return perf_evlist__first(evlist);
 610
 611        return NULL;
 612}
 613
 614struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
 615                                                u64 id)
 616{
 617        struct perf_sample_id *sid;
 618
 619        if (!id)
 620                return NULL;
 621
 622        sid = perf_evlist__id2sid(evlist, id);
 623        if (sid)
 624                return sid->evsel;
 625
 626        return NULL;
 627}
 628
 629static int perf_evlist__event2id(struct perf_evlist *evlist,
 630                                 union perf_event *event, u64 *id)
 631{
 632        const u64 *array = event->sample.array;
 633        ssize_t n;
 634
 635        n = (event->header.size - sizeof(event->header)) >> 3;
 636
 637        if (event->header.type == PERF_RECORD_SAMPLE) {
 638                if (evlist->id_pos >= n)
 639                        return -1;
 640                *id = array[evlist->id_pos];
 641        } else {
 642                if (evlist->is_pos > n)
 643                        return -1;
 644                n -= evlist->is_pos;
 645                *id = array[n];
 646        }
 647        return 0;
 648}
 649
 650static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
 651                                                   union perf_event *event)
 652{
 653        struct perf_evsel *first = perf_evlist__first(evlist);
 654        struct hlist_head *head;
 655        struct perf_sample_id *sid;
 656        int hash;
 657        u64 id;
 658
 659        if (evlist->nr_entries == 1)
 660                return first;
 661
 662        if (!first->attr.sample_id_all &&
 663            event->header.type != PERF_RECORD_SAMPLE)
 664                return first;
 665
 666        if (perf_evlist__event2id(evlist, event, &id))
 667                return NULL;
 668
 669        /* Synthesized events have an id of zero */
 670        if (!id)
 671                return first;
 672
 673        hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
 674        head = &evlist->heads[hash];
 675
 676        hlist_for_each_entry(sid, head, node) {
 677                if (sid->id == id)
 678                        return sid->evsel;
 679        }
 680        return NULL;
 681}
 682
 683static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
 684{
 685        int i;
 686
 687        for (i = 0; i < evlist->nr_mmaps; i++) {
 688                int fd = evlist->mmap[i].fd;
 689                int err;
 690
 691                if (fd < 0)
 692                        continue;
 693                err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
 694                if (err)
 695                        return err;
 696        }
 697        return 0;
 698}
 699
 700int perf_evlist__pause(struct perf_evlist *evlist)
 701{
 702        return perf_evlist__set_paused(evlist, true);
 703}
 704
 705int perf_evlist__resume(struct perf_evlist *evlist)
 706{
 707        return perf_evlist__set_paused(evlist, false);
 708}
 709
 710/* When check_messup is true, 'end' must points to a good entry */
 711static union perf_event *
 712perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start,
 713                u64 end, u64 *prev)
 714{
 715        unsigned char *data = md->base + page_size;
 716        union perf_event *event = NULL;
 717        int diff = end - start;
 718
 719        if (check_messup) {
 720                /*
 721                 * If we're further behind than half the buffer, there's a chance
 722                 * the writer will bite our tail and mess up the samples under us.
 723                 *
 724                 * If we somehow ended up ahead of the 'end', we got messed up.
 725                 *
 726                 * In either case, truncate and restart at 'end'.
 727                 */
 728                if (diff > md->mask / 2 || diff < 0) {
 729                        fprintf(stderr, "WARNING: failed to keep up with mmap data.\n");
 730
 731                        /*
 732                         * 'end' points to a known good entry, start there.
 733                         */
 734                        start = end;
 735                        diff = 0;
 736                }
 737        }
 738
 739        if (diff >= (int)sizeof(event->header)) {
 740                size_t size;
 741
 742                event = (union perf_event *)&data[start & md->mask];
 743                size = event->header.size;
 744
 745                if (size < sizeof(event->header) || diff < (int)size) {
 746                        event = NULL;
 747                        goto broken_event;
 748                }
 749
 750                /*
 751                 * Event straddles the mmap boundary -- header should always
 752                 * be inside due to u64 alignment of output.
 753                 */
 754                if ((start & md->mask) + size != ((start + size) & md->mask)) {
 755                        unsigned int offset = start;
 756                        unsigned int len = min(sizeof(*event), size), cpy;
 757                        void *dst = md->event_copy;
 758
 759                        do {
 760                                cpy = min(md->mask + 1 - (offset & md->mask), len);
 761                                memcpy(dst, &data[offset & md->mask], cpy);
 762                                offset += cpy;
 763                                dst += cpy;
 764                                len -= cpy;
 765                        } while (len);
 766
 767                        event = (union perf_event *) md->event_copy;
 768                }
 769
 770                start += size;
 771        }
 772
 773broken_event:
 774        if (prev)
 775                *prev = start;
 776
 777        return event;
 778}
 779
 780union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 781{
 782        struct perf_mmap *md = &evlist->mmap[idx];
 783        u64 head;
 784        u64 old = md->prev;
 785
 786        /*
 787         * Check if event was unmapped due to a POLLHUP/POLLERR.
 788         */
 789        if (!atomic_read(&md->refcnt))
 790                return NULL;
 791
 792        head = perf_mmap__read_head(md);
 793
 794        return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev);
 795}
 796
 797union perf_event *
 798perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx)
 799{
 800        struct perf_mmap *md = &evlist->mmap[idx];
 801        u64 head, end;
 802        u64 start = md->prev;
 803
 804        /*
 805         * Check if event was unmapped due to a POLLHUP/POLLERR.
 806         */
 807        if (!atomic_read(&md->refcnt))
 808                return NULL;
 809
 810        head = perf_mmap__read_head(md);
 811        if (!head)
 812                return NULL;
 813
 814        /*
 815         * 'head' pointer starts from 0. Kernel minus sizeof(record) form
 816         * it each time when kernel writes to it, so in fact 'head' is
 817         * negative. 'end' pointer is made manually by adding the size of
 818         * the ring buffer to 'head' pointer, means the validate data can
 819         * read is the whole ring buffer. If 'end' is positive, the ring
 820         * buffer has not fully filled, so we must adjust 'end' to 0.
 821         *
 822         * However, since both 'head' and 'end' is unsigned, we can't
 823         * simply compare 'end' against 0. Here we compare '-head' and
 824         * the size of the ring buffer, where -head is the number of bytes
 825         * kernel write to the ring buffer.
 826         */
 827        if (-head < (u64)(md->mask + 1))
 828                end = 0;
 829        else
 830                end = head + md->mask + 1;
 831
 832        return perf_mmap__read(md, false, start, end, &md->prev);
 833}
 834
 835void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)
 836{
 837        struct perf_mmap *md = &evlist->mmap[idx];
 838        u64 head;
 839
 840        if (!atomic_read(&md->refcnt))
 841                return;
 842
 843        head = perf_mmap__read_head(md);
 844        md->prev = head;
 845}
 846
 847static bool perf_mmap__empty(struct perf_mmap *md)
 848{
 849        return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
 850}
 851
 852static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
 853{
 854        atomic_inc(&evlist->mmap[idx].refcnt);
 855}
 856
 857static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
 858{
 859        BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
 860
 861        if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
 862                __perf_evlist__munmap(evlist, idx);
 863}
 864
 865void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
 866{
 867        struct perf_mmap *md = &evlist->mmap[idx];
 868
 869        if (!evlist->overwrite) {
 870                u64 old = md->prev;
 871
 872                perf_mmap__write_tail(md, old);
 873        }
 874
 875        if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
 876                perf_evlist__mmap_put(evlist, idx);
 877}
 878
 879int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
 880                               struct auxtrace_mmap_params *mp __maybe_unused,
 881                               void *userpg __maybe_unused,
 882                               int fd __maybe_unused)
 883{
 884        return 0;
 885}
 886
 887void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
 888{
 889}
 890
 891void __weak auxtrace_mmap_params__init(
 892                        struct auxtrace_mmap_params *mp __maybe_unused,
 893                        off_t auxtrace_offset __maybe_unused,
 894                        unsigned int auxtrace_pages __maybe_unused,
 895                        bool auxtrace_overwrite __maybe_unused)
 896{
 897}
 898
 899void __weak auxtrace_mmap_params__set_idx(
 900                        struct auxtrace_mmap_params *mp __maybe_unused,
 901                        struct perf_evlist *evlist __maybe_unused,
 902                        int idx __maybe_unused,
 903                        bool per_cpu __maybe_unused)
 904{
 905}
 906
 907static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
 908{
 909        if (evlist->mmap[idx].base != NULL) {
 910                munmap(evlist->mmap[idx].base, evlist->mmap_len);
 911                evlist->mmap[idx].base = NULL;
 912                evlist->mmap[idx].fd = -1;
 913                atomic_set(&evlist->mmap[idx].refcnt, 0);
 914        }
 915        auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
 916}
 917
 918void perf_evlist__munmap(struct perf_evlist *evlist)
 919{
 920        int i;
 921
 922        if (evlist->mmap == NULL)
 923                return;
 924
 925        for (i = 0; i < evlist->nr_mmaps; i++)
 926                __perf_evlist__munmap(evlist, i);
 927
 928        zfree(&evlist->mmap);
 929}
 930
 931static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
 932{
 933        int i;
 934
 935        evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
 936        if (cpu_map__empty(evlist->cpus))
 937                evlist->nr_mmaps = thread_map__nr(evlist->threads);
 938        evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
 939        for (i = 0; i < evlist->nr_mmaps; i++)
 940                evlist->mmap[i].fd = -1;
 941        return evlist->mmap != NULL ? 0 : -ENOMEM;
 942}
 943
 944struct mmap_params {
 945        int prot;
 946        int mask;
 947        struct auxtrace_mmap_params auxtrace_mp;
 948};
 949
 950static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
 951                               struct mmap_params *mp, int fd)
 952{
 953        /*
 954         * The last one will be done at perf_evlist__mmap_consume(), so that we
 955         * make sure we don't prevent tools from consuming every last event in
 956         * the ring buffer.
 957         *
 958         * I.e. we can get the POLLHUP meaning that the fd doesn't exist
 959         * anymore, but the last events for it are still in the ring buffer,
 960         * waiting to be consumed.
 961         *
 962         * Tools can chose to ignore this at their own discretion, but the
 963         * evlist layer can't just drop it when filtering events in
 964         * perf_evlist__filter_pollfd().
 965         */
 966        atomic_set(&evlist->mmap[idx].refcnt, 2);
 967        evlist->mmap[idx].prev = 0;
 968        evlist->mmap[idx].mask = mp->mask;
 969        evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
 970                                      MAP_SHARED, fd, 0);
 971        if (evlist->mmap[idx].base == MAP_FAILED) {
 972                pr_debug2("failed to mmap perf event ring buffer, error %d\n",
 973                          errno);
 974                evlist->mmap[idx].base = NULL;
 975                return -1;
 976        }
 977        evlist->mmap[idx].fd = fd;
 978
 979        if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
 980                                &mp->auxtrace_mp, evlist->mmap[idx].base, fd))
 981                return -1;
 982
 983        return 0;
 984}
 985
 986static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
 987                                       struct mmap_params *mp, int cpu,
 988                                       int thread, int *output)
 989{
 990        struct perf_evsel *evsel;
 991
 992        evlist__for_each(evlist, evsel) {
 993                int fd;
 994
 995                if (evsel->system_wide && thread)
 996                        continue;
 997
 998                fd = FD(evsel, cpu, thread);
 999
1000                if (*output == -1) {
1001                        *output = fd;
1002                        if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0)
1003                                return -1;
1004                } else {
1005                        if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
1006                                return -1;
1007
1008                        perf_evlist__mmap_get(evlist, idx);
1009                }
1010
1011                /*
1012                 * The system_wide flag causes a selected event to be opened
1013                 * always without a pid.  Consequently it will never get a
1014                 * POLLHUP, but it is used for tracking in combination with
1015                 * other events, so it should not need to be polled anyway.
1016                 * Therefore don't add it for polling.
1017                 */
1018                if (!evsel->system_wide &&
1019                    __perf_evlist__add_pollfd(evlist, fd, idx) < 0) {
1020                        perf_evlist__mmap_put(evlist, idx);
1021                        return -1;
1022                }
1023
1024                if (evsel->attr.read_format & PERF_FORMAT_ID) {
1025                        if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
1026                                                   fd) < 0)
1027                                return -1;
1028                        perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
1029                                                 thread);
1030                }
1031        }
1032
1033        return 0;
1034}
1035
1036static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
1037                                     struct mmap_params *mp)
1038{
1039        int cpu, thread;
1040        int nr_cpus = cpu_map__nr(evlist->cpus);
1041        int nr_threads = thread_map__nr(evlist->threads);
1042
1043        pr_debug2("perf event ring buffer mmapped per cpu\n");
1044        for (cpu = 0; cpu < nr_cpus; cpu++) {
1045                int output = -1;
1046
1047                auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
1048                                              true);
1049
1050                for (thread = 0; thread < nr_threads; thread++) {
1051                        if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
1052                                                        thread, &output))
1053                                goto out_unmap;
1054                }
1055        }
1056
1057        return 0;
1058
1059out_unmap:
1060        for (cpu = 0; cpu < nr_cpus; cpu++)
1061                __perf_evlist__munmap(evlist, cpu);
1062        return -1;
1063}
1064
1065static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
1066                                        struct mmap_params *mp)
1067{
1068        int thread;
1069        int nr_threads = thread_map__nr(evlist->threads);
1070
1071        pr_debug2("perf event ring buffer mmapped per thread\n");
1072        for (thread = 0; thread < nr_threads; thread++) {
1073                int output = -1;
1074
1075                auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
1076                                              false);
1077
1078                if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
1079                                                &output))
1080                        goto out_unmap;
1081        }
1082
1083        return 0;
1084
1085out_unmap:
1086        for (thread = 0; thread < nr_threads; thread++)
1087                __perf_evlist__munmap(evlist, thread);
1088        return -1;
1089}
1090
1091unsigned long perf_event_mlock_kb_in_pages(void)
1092{
1093        unsigned long pages;
1094        int max;
1095
1096        if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
1097                /*
1098                 * Pick a once upon a time good value, i.e. things look
1099                 * strange since we can't read a sysctl value, but lets not
1100                 * die yet...
1101                 */
1102                max = 512;
1103        } else {
1104                max -= (page_size / 1024);
1105        }
1106
1107        pages = (max * 1024) / page_size;
1108        if (!is_power_of_2(pages))
1109                pages = rounddown_pow_of_two(pages);
1110
1111        return pages;
1112}
1113
1114static size_t perf_evlist__mmap_size(unsigned long pages)
1115{
1116        if (pages == UINT_MAX)
1117                pages = perf_event_mlock_kb_in_pages();
1118        else if (!is_power_of_2(pages))
1119                return 0;
1120
1121        return (pages + 1) * page_size;
1122}
1123
1124static long parse_pages_arg(const char *str, unsigned long min,
1125                            unsigned long max)
1126{
1127        unsigned long pages, val;
1128        static struct parse_tag tags[] = {
1129                { .tag  = 'B', .mult = 1       },
1130                { .tag  = 'K', .mult = 1 << 10 },
1131                { .tag  = 'M', .mult = 1 << 20 },
1132                { .tag  = 'G', .mult = 1 << 30 },
1133                { .tag  = 0 },
1134        };
1135
1136        if (str == NULL)
1137                return -EINVAL;
1138
1139        val = parse_tag_value(str, tags);
1140        if (val != (unsigned long) -1) {
1141                /* we got file size value */
1142                pages = PERF_ALIGN(val, page_size) / page_size;
1143        } else {
1144                /* we got pages count value */
1145                char *eptr;
1146                pages = strtoul(str, &eptr, 10);
1147                if (*eptr != '\0')
1148                        return -EINVAL;
1149        }
1150
1151        if (pages == 0 && min == 0) {
1152                /* leave number of pages at 0 */
1153        } else if (!is_power_of_2(pages)) {
1154                /* round pages up to next power of 2 */
1155                pages = roundup_pow_of_two(pages);
1156                if (!pages)
1157                        return -EINVAL;
1158                pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n",
1159                        pages * page_size, pages);
1160        }
1161
1162        if (pages > max)
1163                return -EINVAL;
1164
1165        return pages;
1166}
1167
1168int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
1169{
1170        unsigned long max = UINT_MAX;
1171        long pages;
1172
1173        if (max > SIZE_MAX / page_size)
1174                max = SIZE_MAX / page_size;
1175
1176        pages = parse_pages_arg(str, 1, max);
1177        if (pages < 0) {
1178                pr_err("Invalid argument for --mmap_pages/-m\n");
1179                return -1;
1180        }
1181
1182        *mmap_pages = pages;
1183        return 0;
1184}
1185
1186int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
1187                                  int unset __maybe_unused)
1188{
1189        return __perf_evlist__parse_mmap_pages(opt->value, str);
1190}
1191
1192/**
1193 * perf_evlist__mmap_ex - Create mmaps to receive events.
1194 * @evlist: list of events
1195 * @pages: map length in pages
1196 * @overwrite: overwrite older events?
1197 * @auxtrace_pages - auxtrace map length in pages
1198 * @auxtrace_overwrite - overwrite older auxtrace data?
1199 *
1200 * If @overwrite is %false the user needs to signal event consumption using
1201 * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
1202 * automatically.
1203 *
1204 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
1205 * consumption using auxtrace_mmap__write_tail().
1206 *
1207 * Return: %0 on success, negative error code otherwise.
1208 */
1209int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1210                         bool overwrite, unsigned int auxtrace_pages,
1211                         bool auxtrace_overwrite)
1212{
1213        struct perf_evsel *evsel;
1214        const struct cpu_map *cpus = evlist->cpus;
1215        const struct thread_map *threads = evlist->threads;
1216        struct mmap_params mp = {
1217                .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE),
1218        };
1219
1220        if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0)
1221                return -ENOMEM;
1222
1223        if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
1224                return -ENOMEM;
1225
1226        evlist->overwrite = overwrite;
1227        evlist->mmap_len = perf_evlist__mmap_size(pages);
1228        pr_debug("mmap size %zuB\n", evlist->mmap_len);
1229        mp.mask = evlist->mmap_len - page_size - 1;
1230
1231        auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
1232                                   auxtrace_pages, auxtrace_overwrite);
1233
1234        evlist__for_each(evlist, evsel) {
1235                if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
1236                    evsel->sample_id == NULL &&
1237                    perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
1238                        return -ENOMEM;
1239        }
1240
1241        if (cpu_map__empty(cpus))
1242                return perf_evlist__mmap_per_thread(evlist, &mp);
1243
1244        return perf_evlist__mmap_per_cpu(evlist, &mp);
1245}
1246
1247int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
1248                      bool overwrite)
1249{
1250        return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
1251}
1252
1253int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1254{
1255        struct cpu_map *cpus;
1256        struct thread_map *threads;
1257
1258        threads = thread_map__new_str(target->pid, target->tid, target->uid);
1259
1260        if (!threads)
1261                return -1;
1262
1263        if (target__uses_dummy_map(target))
1264                cpus = cpu_map__dummy_new();
1265        else
1266                cpus = cpu_map__new(target->cpu_list);
1267
1268        if (!cpus)
1269                goto out_delete_threads;
1270
1271        evlist->has_user_cpus = !!target->cpu_list;
1272
1273        perf_evlist__set_maps(evlist, cpus, threads);
1274
1275        return 0;
1276
1277out_delete_threads:
1278        thread_map__put(threads);
1279        return -1;
1280}
1281
1282void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
1283                           struct thread_map *threads)
1284{
1285        /*
1286         * Allow for the possibility that one or another of the maps isn't being
1287         * changed i.e. don't put it.  Note we are assuming the maps that are
1288         * being applied are brand new and evlist is taking ownership of the
1289         * original reference count of 1.  If that is not the case it is up to
1290         * the caller to increase the reference count.
1291         */
1292        if (cpus != evlist->cpus) {
1293                cpu_map__put(evlist->cpus);
1294                evlist->cpus = cpu_map__get(cpus);
1295        }
1296
1297        if (threads != evlist->threads) {
1298                thread_map__put(evlist->threads);
1299                evlist->threads = thread_map__get(threads);
1300        }
1301
1302        perf_evlist__propagate_maps(evlist);
1303}
1304
1305void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
1306                                   enum perf_event_sample_format bit)
1307{
1308        struct perf_evsel *evsel;
1309
1310        evlist__for_each(evlist, evsel)
1311                __perf_evsel__set_sample_bit(evsel, bit);
1312}
1313
1314void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
1315                                     enum perf_event_sample_format bit)
1316{
1317        struct perf_evsel *evsel;
1318
1319        evlist__for_each(evlist, evsel)
1320                __perf_evsel__reset_sample_bit(evsel, bit);
1321}
1322
1323int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
1324{
1325        struct perf_evsel *evsel;
1326        int err = 0;
1327        const int ncpus = cpu_map__nr(evlist->cpus),
1328                  nthreads = thread_map__nr(evlist->threads);
1329
1330        evlist__for_each(evlist, evsel) {
1331                if (evsel->filter == NULL)
1332                        continue;
1333
1334                /*
1335                 * filters only work for tracepoint event, which doesn't have cpu limit.
1336                 * So evlist and evsel should always be same.
1337                 */
1338                err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter);
1339                if (err) {
1340                        *err_evsel = evsel;
1341                        break;
1342                }
1343        }
1344
1345        return err;
1346}
1347
1348int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
1349{
1350        struct perf_evsel *evsel;
1351        int err = 0;
1352
1353        evlist__for_each(evlist, evsel) {
1354                if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
1355                        continue;
1356
1357                err = perf_evsel__set_filter(evsel, filter);
1358                if (err)
1359                        break;
1360        }
1361
1362        return err;
1363}
1364
1365int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
1366{
1367        char *filter;
1368        int ret = -1;
1369        size_t i;
1370
1371        for (i = 0; i < npids; ++i) {
1372                if (i == 0) {
1373                        if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
1374                                return -1;
1375                } else {
1376                        char *tmp;
1377
1378                        if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
1379                                goto out_free;
1380
1381                        free(filter);
1382                        filter = tmp;
1383                }
1384        }
1385
1386        ret = perf_evlist__set_filter(evlist, filter);
1387out_free:
1388        free(filter);
1389        return ret;
1390}
1391
1392int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
1393{
1394        return perf_evlist__set_filter_pids(evlist, 1, &pid);
1395}
1396
1397bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
1398{
1399        struct perf_evsel *pos;
1400
1401        if (evlist->nr_entries == 1)
1402                return true;
1403
1404        if (evlist->id_pos < 0 || evlist->is_pos < 0)
1405                return false;
1406
1407        evlist__for_each(evlist, pos) {
1408                if (pos->id_pos != evlist->id_pos ||
1409                    pos->is_pos != evlist->is_pos)
1410                        return false;
1411        }
1412
1413        return true;
1414}
1415
1416u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1417{
1418        struct perf_evsel *evsel;
1419
1420        if (evlist->combined_sample_type)
1421                return evlist->combined_sample_type;
1422
1423        evlist__for_each(evlist, evsel)
1424                evlist->combined_sample_type |= evsel->attr.sample_type;
1425
1426        return evlist->combined_sample_type;
1427}
1428
1429u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
1430{
1431        evlist->combined_sample_type = 0;
1432        return __perf_evlist__combined_sample_type(evlist);
1433}
1434
1435u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
1436{
1437        struct perf_evsel *evsel;
1438        u64 branch_type = 0;
1439
1440        evlist__for_each(evlist, evsel)
1441                branch_type |= evsel->attr.branch_sample_type;
1442        return branch_type;
1443}
1444
1445bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
1446{
1447        struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1448        u64 read_format = first->attr.read_format;
1449        u64 sample_type = first->attr.sample_type;
1450
1451        evlist__for_each(evlist, pos) {
1452                if (read_format != pos->attr.read_format)
1453                        return false;
1454        }
1455
1456        /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
1457        if ((sample_type & PERF_SAMPLE_READ) &&
1458            !(read_format & PERF_FORMAT_ID)) {
1459                return false;
1460        }
1461
1462        return true;
1463}
1464
1465u64 perf_evlist__read_format(struct perf_evlist *evlist)
1466{
1467        struct perf_evsel *first = perf_evlist__first(evlist);
1468        return first->attr.read_format;
1469}
1470
1471u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
1472{
1473        struct perf_evsel *first = perf_evlist__first(evlist);
1474        struct perf_sample *data;
1475        u64 sample_type;
1476        u16 size = 0;
1477
1478        if (!first->attr.sample_id_all)
1479                goto out;
1480
1481        sample_type = first->attr.sample_type;
1482
1483        if (sample_type & PERF_SAMPLE_TID)
1484                size += sizeof(data->tid) * 2;
1485
1486       if (sample_type & PERF_SAMPLE_TIME)
1487                size += sizeof(data->time);
1488
1489        if (sample_type & PERF_SAMPLE_ID)
1490                size += sizeof(data->id);
1491
1492        if (sample_type & PERF_SAMPLE_STREAM_ID)
1493                size += sizeof(data->stream_id);
1494
1495        if (sample_type & PERF_SAMPLE_CPU)
1496                size += sizeof(data->cpu) * 2;
1497
1498        if (sample_type & PERF_SAMPLE_IDENTIFIER)
1499                size += sizeof(data->id);
1500out:
1501        return size;
1502}
1503
1504bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
1505{
1506        struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
1507
1508        evlist__for_each_continue(evlist, pos) {
1509                if (first->attr.sample_id_all != pos->attr.sample_id_all)
1510                        return false;
1511        }
1512
1513        return true;
1514}
1515
1516bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
1517{
1518        struct perf_evsel *first = perf_evlist__first(evlist);
1519        return first->attr.sample_id_all;
1520}
1521
1522void perf_evlist__set_selected(struct perf_evlist *evlist,
1523                               struct perf_evsel *evsel)
1524{
1525        evlist->selected = evsel;
1526}
1527
1528void perf_evlist__close(struct perf_evlist *evlist)
1529{
1530        struct perf_evsel *evsel;
1531        int ncpus = cpu_map__nr(evlist->cpus);
1532        int nthreads = thread_map__nr(evlist->threads);
1533        int n;
1534
1535        evlist__for_each_reverse(evlist, evsel) {
1536                n = evsel->cpus ? evsel->cpus->nr : ncpus;
1537                perf_evsel__close(evsel, n, nthreads);
1538        }
1539}
1540
1541static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
1542{
1543        struct cpu_map    *cpus;
1544        struct thread_map *threads;
1545        int err = -ENOMEM;
1546
1547        /*
1548         * Try reading /sys/devices/system/cpu/online to get
1549         * an all cpus map.
1550         *
1551         * FIXME: -ENOMEM is the best we can do here, the cpu_map
1552         * code needs an overhaul to properly forward the
1553         * error, and we may not want to do that fallback to a
1554         * default cpu identity map :-\
1555         */
1556        cpus = cpu_map__new(NULL);
1557        if (!cpus)
1558                goto out;
1559
1560        threads = thread_map__new_dummy();
1561        if (!threads)
1562                goto out_put;
1563
1564        perf_evlist__set_maps(evlist, cpus, threads);
1565out:
1566        return err;
1567out_put:
1568        cpu_map__put(cpus);
1569        goto out;
1570}
1571
1572int perf_evlist__open(struct perf_evlist *evlist)
1573{
1574        struct perf_evsel *evsel;
1575        int err;
1576
1577        /*
1578         * Default: one fd per CPU, all threads, aka systemwide
1579         * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
1580         */
1581        if (evlist->threads == NULL && evlist->cpus == NULL) {
1582                err = perf_evlist__create_syswide_maps(evlist);
1583                if (err < 0)
1584                        goto out_err;
1585        }
1586
1587        perf_evlist__update_id_pos(evlist);
1588
1589        evlist__for_each(evlist, evsel) {
1590                err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
1591                if (err < 0)
1592                        goto out_err;
1593        }
1594
1595        return 0;
1596out_err:
1597        perf_evlist__close(evlist);
1598        errno = -err;
1599        return err;
1600}
1601
1602int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
1603                                  const char *argv[], bool pipe_output,
1604                                  void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
1605{
1606        int child_ready_pipe[2], go_pipe[2];
1607        char bf;
1608
1609        if (pipe(child_ready_pipe) < 0) {
1610                perror("failed to create 'ready' pipe");
1611                return -1;
1612        }
1613
1614        if (pipe(go_pipe) < 0) {
1615                perror("failed to create 'go' pipe");
1616                goto out_close_ready_pipe;
1617        }
1618
1619        evlist->workload.pid = fork();
1620        if (evlist->workload.pid < 0) {
1621                perror("failed to fork");
1622                goto out_close_pipes;
1623        }
1624
1625        if (!evlist->workload.pid) {
1626                int ret;
1627
1628                if (pipe_output)
1629                        dup2(2, 1);
1630
1631                signal(SIGTERM, SIG_DFL);
1632
1633                close(child_ready_pipe[0]);
1634                close(go_pipe[1]);
1635                fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
1636
1637                /*
1638                 * Tell the parent we're ready to go
1639                 */
1640                close(child_ready_pipe[1]);
1641
1642                /*
1643                 * Wait until the parent tells us to go.
1644                 */
1645                ret = read(go_pipe[0], &bf, 1);
1646                /*
1647                 * The parent will ask for the execvp() to be performed by
1648                 * writing exactly one byte, in workload.cork_fd, usually via
1649                 * perf_evlist__start_workload().
1650                 *
1651                 * For cancelling the workload without actually running it,
1652                 * the parent will just close workload.cork_fd, without writing
1653                 * anything, i.e. read will return zero and we just exit()
1654                 * here.
1655                 */
1656                if (ret != 1) {
1657                        if (ret == -1)
1658                                perror("unable to read pipe");
1659                        exit(ret);
1660                }
1661
1662                execvp(argv[0], (char **)argv);
1663
1664                if (exec_error) {
1665                        union sigval val;
1666
1667                        val.sival_int = errno;
1668                        if (sigqueue(getppid(), SIGUSR1, val))
1669                                perror(argv[0]);
1670                } else
1671                        perror(argv[0]);
1672                exit(-1);
1673        }
1674
1675        if (exec_error) {
1676                struct sigaction act = {
1677                        .sa_flags     = SA_SIGINFO,
1678                        .sa_sigaction = exec_error,
1679                };
1680                sigaction(SIGUSR1, &act, NULL);
1681        }
1682
1683        if (target__none(target)) {
1684                if (evlist->threads == NULL) {
1685                        fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
1686                                __func__, __LINE__);
1687                        goto out_close_pipes;
1688                }
1689                thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
1690        }
1691
1692        close(child_ready_pipe[1]);
1693        close(go_pipe[0]);
1694        /*
1695         * wait for child to settle
1696         */
1697        if (read(child_ready_pipe[0], &bf, 1) == -1) {
1698                perror("unable to read pipe");
1699                goto out_close_pipes;
1700        }
1701
1702        fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
1703        evlist->workload.cork_fd = go_pipe[1];
1704        close(child_ready_pipe[0]);
1705        return 0;
1706
1707out_close_pipes:
1708        close(go_pipe[0]);
1709        close(go_pipe[1]);
1710out_close_ready_pipe:
1711        close(child_ready_pipe[0]);
1712        close(child_ready_pipe[1]);
1713        return -1;
1714}
1715
1716int perf_evlist__start_workload(struct perf_evlist *evlist)
1717{
1718        if (evlist->workload.cork_fd > 0) {
1719                char bf = 0;
1720                int ret;
1721                /*
1722                 * Remove the cork, let it rip!
1723                 */
1724                ret = write(evlist->workload.cork_fd, &bf, 1);
1725                if (ret < 0)
1726                        perror("enable to write to pipe");
1727
1728                close(evlist->workload.cork_fd);
1729                return ret;
1730        }
1731
1732        return 0;
1733}
1734
1735int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
1736                              struct perf_sample *sample)
1737{
1738        struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
1739
1740        if (!evsel)
1741                return -EFAULT;
1742        return perf_evsel__parse_sample(evsel, event, sample);
1743}
1744
1745size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
1746{
1747        struct perf_evsel *evsel;
1748        size_t printed = 0;
1749
1750        evlist__for_each(evlist, evsel) {
1751                printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
1752                                   perf_evsel__name(evsel));
1753        }
1754
1755        return printed + fprintf(fp, "\n");
1756}
1757
1758int perf_evlist__strerror_open(struct perf_evlist *evlist,
1759                               int err, char *buf, size_t size)
1760{
1761        int printed, value;
1762        char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1763
1764        switch (err) {
1765        case EACCES:
1766        case EPERM:
1767                printed = scnprintf(buf, size,
1768                                    "Error:\t%s.\n"
1769                                    "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
1770
1771                value = perf_event_paranoid();
1772
1773                printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
1774
1775                if (value >= 2) {
1776                        printed += scnprintf(buf + printed, size - printed,
1777                                             "For your workloads it needs to be <= 1\nHint:\t");
1778                }
1779                printed += scnprintf(buf + printed, size - printed,
1780                                     "For system wide tracing it needs to be set to -1.\n");
1781
1782                printed += scnprintf(buf + printed, size - printed,
1783                                    "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
1784                                    "Hint:\tThe current value is %d.", value);
1785                break;
1786        case EINVAL: {
1787                struct perf_evsel *first = perf_evlist__first(evlist);
1788                int max_freq;
1789
1790                if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
1791                        goto out_default;
1792
1793                if (first->attr.sample_freq < (u64)max_freq)
1794                        goto out_default;
1795
1796                printed = scnprintf(buf, size,
1797                                    "Error:\t%s.\n"
1798                                    "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
1799                                    "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
1800                                    emsg, max_freq, first->attr.sample_freq);
1801                break;
1802        }
1803        default:
1804out_default:
1805                scnprintf(buf, size, "%s", emsg);
1806                break;
1807        }
1808
1809        return 0;
1810}
1811
1812int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
1813{
1814        char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
1815        int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
1816
1817        switch (err) {
1818        case EPERM:
1819                sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
1820                printed += scnprintf(buf + printed, size - printed,
1821                                     "Error:\t%s.\n"
1822                                     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
1823                                     "Hint:\tTried using %zd kB.\n",
1824                                     emsg, pages_max_per_user, pages_attempted);
1825
1826                if (pages_attempted >= pages_max_per_user) {
1827                        printed += scnprintf(buf + printed, size - printed,
1828                                             "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
1829                                             pages_max_per_user + pages_attempted);
1830                }
1831
1832                printed += scnprintf(buf + printed, size - printed,
1833                                     "Hint:\tTry using a smaller -m/--mmap-pages value.");
1834                break;
1835        default:
1836                scnprintf(buf, size, "%s", emsg);
1837                break;
1838        }
1839
1840        return 0;
1841}
1842
1843void perf_evlist__to_front(struct perf_evlist *evlist,
1844                           struct perf_evsel *move_evsel)
1845{
1846        struct perf_evsel *evsel, *n;
1847        LIST_HEAD(move);
1848
1849        if (move_evsel == perf_evlist__first(evlist))
1850                return;
1851
1852        evlist__for_each_safe(evlist, n, evsel) {
1853                if (evsel->leader == move_evsel->leader)
1854                        list_move_tail(&evsel->node, &move);
1855        }
1856
1857        list_splice(&move, &evlist->entries);
1858}
1859
1860void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
1861                                     struct perf_evsel *tracking_evsel)
1862{
1863        struct perf_evsel *evsel;
1864
1865        if (tracking_evsel->tracking)
1866                return;
1867
1868        evlist__for_each(evlist, evsel) {
1869                if (evsel != tracking_evsel)
1870                        evsel->tracking = false;
1871        }
1872
1873        tracking_evsel->tracking = true;
1874}
1875
1876struct perf_evsel *
1877perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
1878                               const char *str)
1879{
1880        struct perf_evsel *evsel;
1881
1882        evlist__for_each(evlist, evsel) {
1883                if (!evsel->name)
1884                        continue;
1885                if (strcmp(str, evsel->name) == 0)
1886                        return evsel;
1887        }
1888
1889        return NULL;
1890}
1891