linux/tools/perf/util/thread-stack.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * thread-stack.c: Synthesize a thread's stack using call / return events
   4 * Copyright (c) 2014, Intel Corporation.
   5 */
   6
   7#include <linux/rbtree.h>
   8#include <linux/list.h>
   9#include <linux/log2.h>
  10#include <linux/zalloc.h>
  11#include <errno.h>
  12#include <stdlib.h>
  13#include <string.h>
  14#include "thread.h"
  15#include "event.h"
  16#include "machine.h"
  17#include "env.h"
  18#include "debug.h"
  19#include "symbol.h"
  20#include "comm.h"
  21#include "call-path.h"
  22#include "thread-stack.h"
  23
  24#define STACK_GROWTH 2048
  25
  26/*
  27 * State of retpoline detection.
  28 *
  29 * RETPOLINE_NONE: no retpoline detection
  30 * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
  31 * X86_RETPOLINE_DETECTED: x86 retpoline detected
  32 */
  33enum retpoline_state_t {
  34        RETPOLINE_NONE,
  35        X86_RETPOLINE_POSSIBLE,
  36        X86_RETPOLINE_DETECTED,
  37};
  38
  39/**
  40 * struct thread_stack_entry - thread stack entry.
  41 * @ret_addr: return address
  42 * @timestamp: timestamp (if known)
  43 * @ref: external reference (e.g. db_id of sample)
  44 * @branch_count: the branch count when the entry was created
  45 * @insn_count: the instruction count when the entry was created
  46 * @cyc_count the cycle count when the entry was created
  47 * @db_id: id used for db-export
  48 * @cp: call path
  49 * @no_call: a 'call' was not seen
  50 * @trace_end: a 'call' but trace ended
  51 * @non_call: a branch but not a 'call' to the start of a different symbol
  52 */
  53struct thread_stack_entry {
  54        u64 ret_addr;
  55        u64 timestamp;
  56        u64 ref;
  57        u64 branch_count;
  58        u64 insn_count;
  59        u64 cyc_count;
  60        u64 db_id;
  61        struct call_path *cp;
  62        bool no_call;
  63        bool trace_end;
  64        bool non_call;
  65};
  66
  67/**
  68 * struct thread_stack - thread stack constructed from 'call' and 'return'
  69 *                       branch samples.
  70 * @stack: array that holds the stack
  71 * @cnt: number of entries in the stack
  72 * @sz: current maximum stack size
  73 * @trace_nr: current trace number
  74 * @branch_count: running branch count
  75 * @insn_count: running  instruction count
  76 * @cyc_count running  cycle count
  77 * @kernel_start: kernel start address
  78 * @last_time: last timestamp
  79 * @crp: call/return processor
  80 * @comm: current comm
  81 * @arr_sz: size of array if this is the first element of an array
  82 * @rstate: used to detect retpolines
  83 * @br_stack_rb: branch stack (ring buffer)
  84 * @br_stack_sz: maximum branch stack size
  85 * @br_stack_pos: current position in @br_stack_rb
  86 * @mispred_all: mark all branches as mispredicted
  87 */
  88struct thread_stack {
  89        struct thread_stack_entry *stack;
  90        size_t cnt;
  91        size_t sz;
  92        u64 trace_nr;
  93        u64 branch_count;
  94        u64 insn_count;
  95        u64 cyc_count;
  96        u64 kernel_start;
  97        u64 last_time;
  98        struct call_return_processor *crp;
  99        struct comm *comm;
 100        unsigned int arr_sz;
 101        enum retpoline_state_t rstate;
 102        struct branch_stack *br_stack_rb;
 103        unsigned int br_stack_sz;
 104        unsigned int br_stack_pos;
 105        bool mispred_all;
 106};
 107
 108/*
 109 * Assume pid == tid == 0 identifies the idle task as defined by
 110 * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
 111 * and therefore requires a stack for each cpu.
 112 */
 113static inline bool thread_stack__per_cpu(struct thread *thread)
 114{
 115        return !(thread->tid || thread->pid_);
 116}
 117
 118static int thread_stack__grow(struct thread_stack *ts)
 119{
 120        struct thread_stack_entry *new_stack;
 121        size_t sz, new_sz;
 122
 123        new_sz = ts->sz + STACK_GROWTH;
 124        sz = new_sz * sizeof(struct thread_stack_entry);
 125
 126        new_stack = realloc(ts->stack, sz);
 127        if (!new_stack)
 128                return -ENOMEM;
 129
 130        ts->stack = new_stack;
 131        ts->sz = new_sz;
 132
 133        return 0;
 134}
 135
 136static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
 137                              struct call_return_processor *crp,
 138                              bool callstack, unsigned int br_stack_sz)
 139{
 140        int err;
 141
 142        if (callstack) {
 143                err = thread_stack__grow(ts);
 144                if (err)
 145                        return err;
 146        }
 147
 148        if (br_stack_sz) {
 149                size_t sz = sizeof(struct branch_stack);
 150
 151                sz += br_stack_sz * sizeof(struct branch_entry);
 152                ts->br_stack_rb = zalloc(sz);
 153                if (!ts->br_stack_rb)
 154                        return -ENOMEM;
 155                ts->br_stack_sz = br_stack_sz;
 156        }
 157
 158        if (thread->maps && thread->maps->machine) {
 159                struct machine *machine = thread->maps->machine;
 160                const char *arch = perf_env__arch(machine->env);
 161
 162                ts->kernel_start = machine__kernel_start(machine);
 163                if (!strcmp(arch, "x86"))
 164                        ts->rstate = X86_RETPOLINE_POSSIBLE;
 165        } else {
 166                ts->kernel_start = 1ULL << 63;
 167        }
 168        ts->crp = crp;
 169
 170        return 0;
 171}
 172
 173static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
 174                                              struct call_return_processor *crp,
 175                                              bool callstack,
 176                                              unsigned int br_stack_sz)
 177{
 178        struct thread_stack *ts = thread->ts, *new_ts;
 179        unsigned int old_sz = ts ? ts->arr_sz : 0;
 180        unsigned int new_sz = 1;
 181
 182        if (thread_stack__per_cpu(thread) && cpu > 0)
 183                new_sz = roundup_pow_of_two(cpu + 1);
 184
 185        if (!ts || new_sz > old_sz) {
 186                new_ts = calloc(new_sz, sizeof(*ts));
 187                if (!new_ts)
 188                        return NULL;
 189                if (ts)
 190                        memcpy(new_ts, ts, old_sz * sizeof(*ts));
 191                new_ts->arr_sz = new_sz;
 192                zfree(&thread->ts);
 193                thread->ts = new_ts;
 194                ts = new_ts;
 195        }
 196
 197        if (thread_stack__per_cpu(thread) && cpu > 0 &&
 198            (unsigned int)cpu < ts->arr_sz)
 199                ts += cpu;
 200
 201        if (!ts->stack &&
 202            thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
 203                return NULL;
 204
 205        return ts;
 206}
 207
 208static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
 209{
 210        struct thread_stack *ts = thread->ts;
 211
 212        if (cpu < 0)
 213                cpu = 0;
 214
 215        if (!ts || (unsigned int)cpu >= ts->arr_sz)
 216                return NULL;
 217
 218        ts += cpu;
 219
 220        if (!ts->stack)
 221                return NULL;
 222
 223        return ts;
 224}
 225
 226static inline struct thread_stack *thread__stack(struct thread *thread,
 227                                                    int cpu)
 228{
 229        if (!thread)
 230                return NULL;
 231
 232        if (thread_stack__per_cpu(thread))
 233                return thread__cpu_stack(thread, cpu);
 234
 235        return thread->ts;
 236}
 237
 238static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
 239                              bool trace_end)
 240{
 241        int err = 0;
 242
 243        if (ts->cnt == ts->sz) {
 244                err = thread_stack__grow(ts);
 245                if (err) {
 246                        pr_warning("Out of memory: discarding thread stack\n");
 247                        ts->cnt = 0;
 248                }
 249        }
 250
 251        ts->stack[ts->cnt].trace_end = trace_end;
 252        ts->stack[ts->cnt++].ret_addr = ret_addr;
 253
 254        return err;
 255}
 256
 257static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
 258{
 259        size_t i;
 260
 261        /*
 262         * In some cases there may be functions which are not seen to return.
 263         * For example when setjmp / longjmp has been used.  Or the perf context
 264         * switch in the kernel which doesn't stop and start tracing in exactly
 265         * the same code path.  When that happens the return address will be
 266         * further down the stack.  If the return address is not found at all,
 267         * we assume the opposite (i.e. this is a return for a call that wasn't
 268         * seen for some reason) and leave the stack alone.
 269         */
 270        for (i = ts->cnt; i; ) {
 271                if (ts->stack[--i].ret_addr == ret_addr) {
 272                        ts->cnt = i;
 273                        return;
 274                }
 275        }
 276}
 277
 278static void thread_stack__pop_trace_end(struct thread_stack *ts)
 279{
 280        size_t i;
 281
 282        for (i = ts->cnt; i; ) {
 283                if (ts->stack[--i].trace_end)
 284                        ts->cnt = i;
 285                else
 286                        return;
 287        }
 288}
 289
 290static bool thread_stack__in_kernel(struct thread_stack *ts)
 291{
 292        if (!ts->cnt)
 293                return false;
 294
 295        return ts->stack[ts->cnt - 1].cp->in_kernel;
 296}
 297
 298static int thread_stack__call_return(struct thread *thread,
 299                                     struct thread_stack *ts, size_t idx,
 300                                     u64 timestamp, u64 ref, bool no_return)
 301{
 302        struct call_return_processor *crp = ts->crp;
 303        struct thread_stack_entry *tse;
 304        struct call_return cr = {
 305                .thread = thread,
 306                .comm = ts->comm,
 307                .db_id = 0,
 308        };
 309        u64 *parent_db_id;
 310
 311        tse = &ts->stack[idx];
 312        cr.cp = tse->cp;
 313        cr.call_time = tse->timestamp;
 314        cr.return_time = timestamp;
 315        cr.branch_count = ts->branch_count - tse->branch_count;
 316        cr.insn_count = ts->insn_count - tse->insn_count;
 317        cr.cyc_count = ts->cyc_count - tse->cyc_count;
 318        cr.db_id = tse->db_id;
 319        cr.call_ref = tse->ref;
 320        cr.return_ref = ref;
 321        if (tse->no_call)
 322                cr.flags |= CALL_RETURN_NO_CALL;
 323        if (no_return)
 324                cr.flags |= CALL_RETURN_NO_RETURN;
 325        if (tse->non_call)
 326                cr.flags |= CALL_RETURN_NON_CALL;
 327
 328        /*
 329         * The parent db_id must be assigned before exporting the child. Note
 330         * it is not possible to export the parent first because its information
 331         * is not yet complete because its 'return' has not yet been processed.
 332         */
 333        parent_db_id = idx ? &(tse - 1)->db_id : NULL;
 334
 335        return crp->process(&cr, parent_db_id, crp->data);
 336}
 337
 338static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
 339{
 340        struct call_return_processor *crp = ts->crp;
 341        int err;
 342
 343        if (!crp) {
 344                ts->cnt = 0;
 345                ts->br_stack_pos = 0;
 346                if (ts->br_stack_rb)
 347                        ts->br_stack_rb->nr = 0;
 348                return 0;
 349        }
 350
 351        while (ts->cnt) {
 352                err = thread_stack__call_return(thread, ts, --ts->cnt,
 353                                                ts->last_time, 0, true);
 354                if (err) {
 355                        pr_err("Error flushing thread stack!\n");
 356                        ts->cnt = 0;
 357                        return err;
 358                }
 359        }
 360
 361        return 0;
 362}
 363
 364int thread_stack__flush(struct thread *thread)
 365{
 366        struct thread_stack *ts = thread->ts;
 367        unsigned int pos;
 368        int err = 0;
 369
 370        if (ts) {
 371                for (pos = 0; pos < ts->arr_sz; pos++) {
 372                        int ret = __thread_stack__flush(thread, ts + pos);
 373
 374                        if (ret)
 375                                err = ret;
 376                }
 377        }
 378
 379        return err;
 380}
 381
 382static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
 383                                          u64 from_ip, u64 to_ip)
 384{
 385        struct branch_stack *bs = ts->br_stack_rb;
 386        struct branch_entry *be;
 387
 388        if (!ts->br_stack_pos)
 389                ts->br_stack_pos = ts->br_stack_sz;
 390
 391        ts->br_stack_pos -= 1;
 392
 393        be              = &bs->entries[ts->br_stack_pos];
 394        be->from        = from_ip;
 395        be->to          = to_ip;
 396        be->flags.value = 0;
 397        be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
 398        be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
 399        /* No support for mispredict */
 400        be->flags.mispred = ts->mispred_all;
 401
 402        if (bs->nr < ts->br_stack_sz)
 403                bs->nr += 1;
 404}
 405
 406int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
 407                        u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
 408                        unsigned int br_stack_sz, bool mispred_all)
 409{
 410        struct thread_stack *ts = thread__stack(thread, cpu);
 411
 412        if (!thread)
 413                return -EINVAL;
 414
 415        if (!ts) {
 416                ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
 417                if (!ts) {
 418                        pr_warning("Out of memory: no thread stack\n");
 419                        return -ENOMEM;
 420                }
 421                ts->trace_nr = trace_nr;
 422                ts->mispred_all = mispred_all;
 423        }
 424
 425        /*
 426         * When the trace is discontinuous, the trace_nr changes.  In that case
 427         * the stack might be completely invalid.  Better to report nothing than
 428         * to report something misleading, so flush the stack.
 429         */
 430        if (trace_nr != ts->trace_nr) {
 431                if (ts->trace_nr)
 432                        __thread_stack__flush(thread, ts);
 433                ts->trace_nr = trace_nr;
 434        }
 435
 436        if (br_stack_sz)
 437                thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
 438
 439        /*
 440         * Stop here if thread_stack__process() is in use, or not recording call
 441         * stack.
 442         */
 443        if (ts->crp || !callstack)
 444                return 0;
 445
 446        if (flags & PERF_IP_FLAG_CALL) {
 447                u64 ret_addr;
 448
 449                if (!to_ip)
 450                        return 0;
 451                ret_addr = from_ip + insn_len;
 452                if (ret_addr == to_ip)
 453                        return 0; /* Zero-length calls are excluded */
 454                return thread_stack__push(ts, ret_addr,
 455                                          flags & PERF_IP_FLAG_TRACE_END);
 456        } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
 457                /*
 458                 * If the caller did not change the trace number (which would
 459                 * have flushed the stack) then try to make sense of the stack.
 460                 * Possibly, tracing began after returning to the current
 461                 * address, so try to pop that. Also, do not expect a call made
 462                 * when the trace ended, to return, so pop that.
 463                 */
 464                thread_stack__pop(ts, to_ip);
 465                thread_stack__pop_trace_end(ts);
 466        } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
 467                thread_stack__pop(ts, to_ip);
 468        }
 469
 470        return 0;
 471}
 472
 473void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
 474{
 475        struct thread_stack *ts = thread__stack(thread, cpu);
 476
 477        if (!ts)
 478                return;
 479
 480        if (trace_nr != ts->trace_nr) {
 481                if (ts->trace_nr)
 482                        __thread_stack__flush(thread, ts);
 483                ts->trace_nr = trace_nr;
 484        }
 485}
 486
 487static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
 488{
 489        __thread_stack__flush(thread, ts);
 490        zfree(&ts->stack);
 491        zfree(&ts->br_stack_rb);
 492}
 493
 494static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
 495{
 496        unsigned int arr_sz = ts->arr_sz;
 497
 498        __thread_stack__free(thread, ts);
 499        memset(ts, 0, sizeof(*ts));
 500        ts->arr_sz = arr_sz;
 501}
 502
 503void thread_stack__free(struct thread *thread)
 504{
 505        struct thread_stack *ts = thread->ts;
 506        unsigned int pos;
 507
 508        if (ts) {
 509                for (pos = 0; pos < ts->arr_sz; pos++)
 510                        __thread_stack__free(thread, ts + pos);
 511                zfree(&thread->ts);
 512        }
 513}
 514
 515static inline u64 callchain_context(u64 ip, u64 kernel_start)
 516{
 517        return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
 518}
 519
 520void thread_stack__sample(struct thread *thread, int cpu,
 521                          struct ip_callchain *chain,
 522                          size_t sz, u64 ip, u64 kernel_start)
 523{
 524        struct thread_stack *ts = thread__stack(thread, cpu);
 525        u64 context = callchain_context(ip, kernel_start);
 526        u64 last_context;
 527        size_t i, j;
 528
 529        if (sz < 2) {
 530                chain->nr = 0;
 531                return;
 532        }
 533
 534        chain->ips[0] = context;
 535        chain->ips[1] = ip;
 536
 537        if (!ts) {
 538                chain->nr = 2;
 539                return;
 540        }
 541
 542        last_context = context;
 543
 544        for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
 545                ip = ts->stack[ts->cnt - j].ret_addr;
 546                context = callchain_context(ip, kernel_start);
 547                if (context != last_context) {
 548                        if (i >= sz - 1)
 549                                break;
 550                        chain->ips[i++] = context;
 551                        last_context = context;
 552                }
 553                chain->ips[i] = ip;
 554        }
 555
 556        chain->nr = i;
 557}
 558
 559/*
 560 * Hardware sample records, created some time after the event occurred, need to
 561 * have subsequent addresses removed from the call chain.
 562 */
 563void thread_stack__sample_late(struct thread *thread, int cpu,
 564                               struct ip_callchain *chain, size_t sz,
 565                               u64 sample_ip, u64 kernel_start)
 566{
 567        struct thread_stack *ts = thread__stack(thread, cpu);
 568        u64 sample_context = callchain_context(sample_ip, kernel_start);
 569        u64 last_context, context, ip;
 570        size_t nr = 0, j;
 571
 572        if (sz < 2) {
 573                chain->nr = 0;
 574                return;
 575        }
 576
 577        if (!ts)
 578                goto out;
 579
 580        /*
 581         * When tracing kernel space, kernel addresses occur at the top of the
 582         * call chain after the event occurred but before tracing stopped.
 583         * Skip them.
 584         */
 585        for (j = 1; j <= ts->cnt; j++) {
 586                ip = ts->stack[ts->cnt - j].ret_addr;
 587                context = callchain_context(ip, kernel_start);
 588                if (context == PERF_CONTEXT_USER ||
 589                    (context == sample_context && ip == sample_ip))
 590                        break;
 591        }
 592
 593        last_context = sample_ip; /* Use sample_ip as an invalid context */
 594
 595        for (; nr < sz && j <= ts->cnt; nr++, j++) {
 596                ip = ts->stack[ts->cnt - j].ret_addr;
 597                context = callchain_context(ip, kernel_start);
 598                if (context != last_context) {
 599                        if (nr >= sz - 1)
 600                                break;
 601                        chain->ips[nr++] = context;
 602                        last_context = context;
 603                }
 604                chain->ips[nr] = ip;
 605        }
 606out:
 607        if (nr) {
 608                chain->nr = nr;
 609        } else {
 610                chain->ips[0] = sample_context;
 611                chain->ips[1] = sample_ip;
 612                chain->nr = 2;
 613        }
 614}
 615
 616void thread_stack__br_sample(struct thread *thread, int cpu,
 617                             struct branch_stack *dst, unsigned int sz)
 618{
 619        struct thread_stack *ts = thread__stack(thread, cpu);
 620        const size_t bsz = sizeof(struct branch_entry);
 621        struct branch_stack *src;
 622        struct branch_entry *be;
 623        unsigned int nr;
 624
 625        dst->nr = 0;
 626
 627        if (!ts)
 628                return;
 629
 630        src = ts->br_stack_rb;
 631        if (!src->nr)
 632                return;
 633
 634        dst->nr = min((unsigned int)src->nr, sz);
 635
 636        be = &dst->entries[0];
 637        nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
 638        memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
 639
 640        if (src->nr >= ts->br_stack_sz) {
 641                sz -= nr;
 642                be = &dst->entries[nr];
 643                nr = min(ts->br_stack_pos, sz);
 644                memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
 645        }
 646}
 647
 648/* Start of user space branch entries */
 649static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
 650{
 651        if (!*start)
 652                *start = be->to && be->to < kernel_start;
 653
 654        return *start;
 655}
 656
 657/*
 658 * Start of branch entries after the ip fell in between 2 branches, or user
 659 * space branch entries.
 660 */
 661static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
 662                     bool *start, struct branch_entry *nb)
 663{
 664        if (!*start) {
 665                *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
 666                         be->from < kernel_start ||
 667                         (be->to && be->to < kernel_start);
 668        }
 669
 670        return *start;
 671}
 672
 673/*
 674 * Hardware sample records, created some time after the event occurred, need to
 675 * have subsequent addresses removed from the branch stack.
 676 */
 677void thread_stack__br_sample_late(struct thread *thread, int cpu,
 678                                  struct branch_stack *dst, unsigned int sz,
 679                                  u64 ip, u64 kernel_start)
 680{
 681        struct thread_stack *ts = thread__stack(thread, cpu);
 682        struct branch_entry *d, *s, *spos, *ssz;
 683        struct branch_stack *src;
 684        unsigned int nr = 0;
 685        bool start = false;
 686
 687        dst->nr = 0;
 688
 689        if (!ts)
 690                return;
 691
 692        src = ts->br_stack_rb;
 693        if (!src->nr)
 694                return;
 695
 696        spos = &src->entries[ts->br_stack_pos];
 697        ssz  = &src->entries[ts->br_stack_sz];
 698
 699        d = &dst->entries[0];
 700        s = spos;
 701
 702        if (ip < kernel_start) {
 703                /*
 704                 * User space sample: start copying branch entries when the
 705                 * branch is in user space.
 706                 */
 707                for (s = spos; s < ssz && nr < sz; s++) {
 708                        if (us_start(s, kernel_start, &start)) {
 709                                *d++ = *s;
 710                                nr += 1;
 711                        }
 712                }
 713
 714                if (src->nr >= ts->br_stack_sz) {
 715                        for (s = &src->entries[0]; s < spos && nr < sz; s++) {
 716                                if (us_start(s, kernel_start, &start)) {
 717                                        *d++ = *s;
 718                                        nr += 1;
 719                                }
 720                        }
 721                }
 722        } else {
 723                struct branch_entry *nb = NULL;
 724
 725                /*
 726                 * Kernel space sample: start copying branch entries when the ip
 727                 * falls in between 2 branches (or the branch is in user space
 728                 * because then the start must have been missed).
 729                 */
 730                for (s = spos; s < ssz && nr < sz; s++) {
 731                        if (ks_start(s, ip, kernel_start, &start, nb)) {
 732                                *d++ = *s;
 733                                nr += 1;
 734                        }
 735                        nb = s;
 736                }
 737
 738                if (src->nr >= ts->br_stack_sz) {
 739                        for (s = &src->entries[0]; s < spos && nr < sz; s++) {
 740                                if (ks_start(s, ip, kernel_start, &start, nb)) {
 741                                        *d++ = *s;
 742                                        nr += 1;
 743                                }
 744                                nb = s;
 745                        }
 746                }
 747        }
 748
 749        dst->nr = nr;
 750}
 751
 752struct call_return_processor *
 753call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
 754                           void *data)
 755{
 756        struct call_return_processor *crp;
 757
 758        crp = zalloc(sizeof(struct call_return_processor));
 759        if (!crp)
 760                return NULL;
 761        crp->cpr = call_path_root__new();
 762        if (!crp->cpr)
 763                goto out_free;
 764        crp->process = process;
 765        crp->data = data;
 766        return crp;
 767
 768out_free:
 769        free(crp);
 770        return NULL;
 771}
 772
 773void call_return_processor__free(struct call_return_processor *crp)
 774{
 775        if (crp) {
 776                call_path_root__free(crp->cpr);
 777                free(crp);
 778        }
 779}
 780
 781static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
 782                                 u64 timestamp, u64 ref, struct call_path *cp,
 783                                 bool no_call, bool trace_end)
 784{
 785        struct thread_stack_entry *tse;
 786        int err;
 787
 788        if (!cp)
 789                return -ENOMEM;
 790
 791        if (ts->cnt == ts->sz) {
 792                err = thread_stack__grow(ts);
 793                if (err)
 794                        return err;
 795        }
 796
 797        tse = &ts->stack[ts->cnt++];
 798        tse->ret_addr = ret_addr;
 799        tse->timestamp = timestamp;
 800        tse->ref = ref;
 801        tse->branch_count = ts->branch_count;
 802        tse->insn_count = ts->insn_count;
 803        tse->cyc_count = ts->cyc_count;
 804        tse->cp = cp;
 805        tse->no_call = no_call;
 806        tse->trace_end = trace_end;
 807        tse->non_call = false;
 808        tse->db_id = 0;
 809
 810        return 0;
 811}
 812
 813static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
 814                                u64 ret_addr, u64 timestamp, u64 ref,
 815                                struct symbol *sym)
 816{
 817        int err;
 818
 819        if (!ts->cnt)
 820                return 1;
 821
 822        if (ts->cnt == 1) {
 823                struct thread_stack_entry *tse = &ts->stack[0];
 824
 825                if (tse->cp->sym == sym)
 826                        return thread_stack__call_return(thread, ts, --ts->cnt,
 827                                                         timestamp, ref, false);
 828        }
 829
 830        if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
 831            !ts->stack[ts->cnt - 1].non_call) {
 832                return thread_stack__call_return(thread, ts, --ts->cnt,
 833                                                 timestamp, ref, false);
 834        } else {
 835                size_t i = ts->cnt - 1;
 836
 837                while (i--) {
 838                        if (ts->stack[i].ret_addr != ret_addr ||
 839                            ts->stack[i].non_call)
 840                                continue;
 841                        i += 1;
 842                        while (ts->cnt > i) {
 843                                err = thread_stack__call_return(thread, ts,
 844                                                                --ts->cnt,
 845                                                                timestamp, ref,
 846                                                                true);
 847                                if (err)
 848                                        return err;
 849                        }
 850                        return thread_stack__call_return(thread, ts, --ts->cnt,
 851                                                         timestamp, ref, false);
 852                }
 853        }
 854
 855        return 1;
 856}
 857
 858static int thread_stack__bottom(struct thread_stack *ts,
 859                                struct perf_sample *sample,
 860                                struct addr_location *from_al,
 861                                struct addr_location *to_al, u64 ref)
 862{
 863        struct call_path_root *cpr = ts->crp->cpr;
 864        struct call_path *cp;
 865        struct symbol *sym;
 866        u64 ip;
 867
 868        if (sample->ip) {
 869                ip = sample->ip;
 870                sym = from_al->sym;
 871        } else if (sample->addr) {
 872                ip = sample->addr;
 873                sym = to_al->sym;
 874        } else {
 875                return 0;
 876        }
 877
 878        cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
 879                                ts->kernel_start);
 880
 881        return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
 882                                     true, false);
 883}
 884
 885static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
 886                                struct perf_sample *sample, u64 ref)
 887{
 888        u64 tm = sample->time;
 889        int err;
 890
 891        /* Return to userspace, so pop all kernel addresses */
 892        while (thread_stack__in_kernel(ts)) {
 893                err = thread_stack__call_return(thread, ts, --ts->cnt,
 894                                                tm, ref, true);
 895                if (err)
 896                        return err;
 897        }
 898
 899        return 0;
 900}
 901
 902static int thread_stack__no_call_return(struct thread *thread,
 903                                        struct thread_stack *ts,
 904                                        struct perf_sample *sample,
 905                                        struct addr_location *from_al,
 906                                        struct addr_location *to_al, u64 ref)
 907{
 908        struct call_path_root *cpr = ts->crp->cpr;
 909        struct call_path *root = &cpr->call_path;
 910        struct symbol *fsym = from_al->sym;
 911        struct symbol *tsym = to_al->sym;
 912        struct call_path *cp, *parent;
 913        u64 ks = ts->kernel_start;
 914        u64 addr = sample->addr;
 915        u64 tm = sample->time;
 916        u64 ip = sample->ip;
 917        int err;
 918
 919        if (ip >= ks && addr < ks) {
 920                /* Return to userspace, so pop all kernel addresses */
 921                err = thread_stack__pop_ks(thread, ts, sample, ref);
 922                if (err)
 923                        return err;
 924
 925                /* If the stack is empty, push the userspace address */
 926                if (!ts->cnt) {
 927                        cp = call_path__findnew(cpr, root, tsym, addr, ks);
 928                        return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
 929                                                     false);
 930                }
 931        } else if (thread_stack__in_kernel(ts) && ip < ks) {
 932                /* Return to userspace, so pop all kernel addresses */
 933                err = thread_stack__pop_ks(thread, ts, sample, ref);
 934                if (err)
 935                        return err;
 936        }
 937
 938        if (ts->cnt)
 939                parent = ts->stack[ts->cnt - 1].cp;
 940        else
 941                parent = root;
 942
 943        if (parent->sym == from_al->sym) {
 944                /*
 945                 * At the bottom of the stack, assume the missing 'call' was
 946                 * before the trace started. So, pop the current symbol and push
 947                 * the 'to' symbol.
 948                 */
 949                if (ts->cnt == 1) {
 950                        err = thread_stack__call_return(thread, ts, --ts->cnt,
 951                                                        tm, ref, false);
 952                        if (err)
 953                                return err;
 954                }
 955
 956                if (!ts->cnt) {
 957                        cp = call_path__findnew(cpr, root, tsym, addr, ks);
 958
 959                        return thread_stack__push_cp(ts, addr, tm, ref, cp,
 960                                                     true, false);
 961                }
 962
 963                /*
 964                 * Otherwise assume the 'return' is being used as a jump (e.g.
 965                 * retpoline) and just push the 'to' symbol.
 966                 */
 967                cp = call_path__findnew(cpr, parent, tsym, addr, ks);
 968
 969                err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
 970                if (!err)
 971                        ts->stack[ts->cnt - 1].non_call = true;
 972
 973                return err;
 974        }
 975
 976        /*
 977         * Assume 'parent' has not yet returned, so push 'to', and then push and
 978         * pop 'from'.
 979         */
 980
 981        cp = call_path__findnew(cpr, parent, tsym, addr, ks);
 982
 983        err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
 984        if (err)
 985                return err;
 986
 987        cp = call_path__findnew(cpr, cp, fsym, ip, ks);
 988
 989        err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
 990        if (err)
 991                return err;
 992
 993        return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
 994}
 995
 996static int thread_stack__trace_begin(struct thread *thread,
 997                                     struct thread_stack *ts, u64 timestamp,
 998                                     u64 ref)
 999{
1000        struct thread_stack_entry *tse;
1001        int err;
1002
1003        if (!ts->cnt)
1004                return 0;
1005
1006        /* Pop trace end */
1007        tse = &ts->stack[ts->cnt - 1];
1008        if (tse->trace_end) {
1009                err = thread_stack__call_return(thread, ts, --ts->cnt,
1010                                                timestamp, ref, false);
1011                if (err)
1012                        return err;
1013        }
1014
1015        return 0;
1016}
1017
1018static int thread_stack__trace_end(struct thread_stack *ts,
1019                                   struct perf_sample *sample, u64 ref)
1020{
1021        struct call_path_root *cpr = ts->crp->cpr;
1022        struct call_path *cp;
1023        u64 ret_addr;
1024
1025        /* No point having 'trace end' on the bottom of the stack */
1026        if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
1027                return 0;
1028
1029        cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
1030                                ts->kernel_start);
1031
1032        ret_addr = sample->ip + sample->insn_len;
1033
1034        return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
1035                                     false, true);
1036}
1037
1038static bool is_x86_retpoline(const char *name)
1039{
1040        const char *p = strstr(name, "__x86_indirect_thunk_");
1041
1042        return p == name || !strcmp(name, "__indirect_thunk_start");
1043}
1044
1045/*
1046 * x86 retpoline functions pollute the call graph. This function removes them.
1047 * This does not handle function return thunks, nor is there any improvement
1048 * for the handling of inline thunks or extern thunks.
1049 */
1050static int thread_stack__x86_retpoline(struct thread_stack *ts,
1051                                       struct perf_sample *sample,
1052                                       struct addr_location *to_al)
1053{
1054        struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
1055        struct call_path_root *cpr = ts->crp->cpr;
1056        struct symbol *sym = tse->cp->sym;
1057        struct symbol *tsym = to_al->sym;
1058        struct call_path *cp;
1059
1060        if (sym && is_x86_retpoline(sym->name)) {
1061                /*
1062                 * This is a x86 retpoline fn. It pollutes the call graph by
1063                 * showing up everywhere there is an indirect branch, but does
1064                 * not itself mean anything. Here the top-of-stack is removed,
1065                 * by decrementing the stack count, and then further down, the
1066                 * resulting top-of-stack is replaced with the actual target.
1067                 * The result is that the retpoline functions will no longer
1068                 * appear in the call graph. Note this only affects the call
1069                 * graph, since all the original branches are left unchanged.
1070                 */
1071                ts->cnt -= 1;
1072                sym = ts->stack[ts->cnt - 2].cp->sym;
1073                if (sym && sym == tsym && to_al->addr != tsym->start) {
1074                        /*
1075                         * Target is back to the middle of the symbol we came
1076                         * from so assume it is an indirect jmp and forget it
1077                         * altogether.
1078                         */
1079                        ts->cnt -= 1;
1080                        return 0;
1081                }
1082        } else if (sym && sym == tsym) {
1083                /*
1084                 * Target is back to the symbol we came from so assume it is an
1085                 * indirect jmp and forget it altogether.
1086                 */
1087                ts->cnt -= 1;
1088                return 0;
1089        }
1090
1091        cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
1092                                sample->addr, ts->kernel_start);
1093        if (!cp)
1094                return -ENOMEM;
1095
1096        /* Replace the top-of-stack with the actual target */
1097        ts->stack[ts->cnt - 1].cp = cp;
1098
1099        return 0;
1100}
1101
1102int thread_stack__process(struct thread *thread, struct comm *comm,
1103                          struct perf_sample *sample,
1104                          struct addr_location *from_al,
1105                          struct addr_location *to_al, u64 ref,
1106                          struct call_return_processor *crp)
1107{
1108        struct thread_stack *ts = thread__stack(thread, sample->cpu);
1109        enum retpoline_state_t rstate;
1110        int err = 0;
1111
1112        if (ts && !ts->crp) {
1113                /* Supersede thread_stack__event() */
1114                thread_stack__reset(thread, ts);
1115                ts = NULL;
1116        }
1117
1118        if (!ts) {
1119                ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
1120                if (!ts)
1121                        return -ENOMEM;
1122                ts->comm = comm;
1123        }
1124
1125        rstate = ts->rstate;
1126        if (rstate == X86_RETPOLINE_DETECTED)
1127                ts->rstate = X86_RETPOLINE_POSSIBLE;
1128
1129        /* Flush stack on exec */
1130        if (ts->comm != comm && thread->pid_ == thread->tid) {
1131                err = __thread_stack__flush(thread, ts);
1132                if (err)
1133                        return err;
1134                ts->comm = comm;
1135        }
1136
1137        /* If the stack is empty, put the current symbol on the stack */
1138        if (!ts->cnt) {
1139                err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
1140                if (err)
1141                        return err;
1142        }
1143
1144        ts->branch_count += 1;
1145        ts->insn_count += sample->insn_cnt;
1146        ts->cyc_count += sample->cyc_cnt;
1147        ts->last_time = sample->time;
1148
1149        if (sample->flags & PERF_IP_FLAG_CALL) {
1150                bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
1151                struct call_path_root *cpr = ts->crp->cpr;
1152                struct call_path *cp;
1153                u64 ret_addr;
1154
1155                if (!sample->ip || !sample->addr)
1156                        return 0;
1157
1158                ret_addr = sample->ip + sample->insn_len;
1159                if (ret_addr == sample->addr)
1160                        return 0; /* Zero-length calls are excluded */
1161
1162                cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1163                                        to_al->sym, sample->addr,
1164                                        ts->kernel_start);
1165                err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
1166                                            cp, false, trace_end);
1167
1168                /*
1169                 * A call to the same symbol but not the start of the symbol,
1170                 * may be the start of a x86 retpoline.
1171                 */
1172                if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
1173                    from_al->sym == to_al->sym &&
1174                    to_al->addr != to_al->sym->start)
1175                        ts->rstate = X86_RETPOLINE_DETECTED;
1176
1177        } else if (sample->flags & PERF_IP_FLAG_RETURN) {
1178                if (!sample->addr) {
1179                        u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
1180                                                 PERF_IP_FLAG_INTERRUPT;
1181
1182                        if (!(sample->flags & return_from_kernel))
1183                                return 0;
1184
1185                        /* Pop kernel stack */
1186                        return thread_stack__pop_ks(thread, ts, sample, ref);
1187                }
1188
1189                if (!sample->ip)
1190                        return 0;
1191
1192                /* x86 retpoline 'return' doesn't match the stack */
1193                if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
1194                    ts->stack[ts->cnt - 1].ret_addr != sample->addr)
1195                        return thread_stack__x86_retpoline(ts, sample, to_al);
1196
1197                err = thread_stack__pop_cp(thread, ts, sample->addr,
1198                                           sample->time, ref, from_al->sym);
1199                if (err) {
1200                        if (err < 0)
1201                                return err;
1202                        err = thread_stack__no_call_return(thread, ts, sample,
1203                                                           from_al, to_al, ref);
1204                }
1205        } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
1206                err = thread_stack__trace_begin(thread, ts, sample->time, ref);
1207        } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
1208                err = thread_stack__trace_end(ts, sample, ref);
1209        } else if (sample->flags & PERF_IP_FLAG_BRANCH &&
1210                   from_al->sym != to_al->sym && to_al->sym &&
1211                   to_al->addr == to_al->sym->start) {
1212                struct call_path_root *cpr = ts->crp->cpr;
1213                struct call_path *cp;
1214
1215                /*
1216                 * The compiler might optimize a call/ret combination by making
1217                 * it a jmp. Make that visible by recording on the stack a
1218                 * branch to the start of a different symbol. Note, that means
1219                 * when a ret pops the stack, all jmps must be popped off first.
1220                 */
1221                cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
1222                                        to_al->sym, sample->addr,
1223                                        ts->kernel_start);
1224                err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
1225                                            false);
1226                if (!err)
1227                        ts->stack[ts->cnt - 1].non_call = true;
1228        }
1229
1230        return err;
1231}
1232
1233size_t thread_stack__depth(struct thread *thread, int cpu)
1234{
1235        struct thread_stack *ts = thread__stack(thread, cpu);
1236
1237        if (!ts)
1238                return 0;
1239        return ts->cnt;
1240}
1241