linux/tools/perf/util/thread-stack.c
<<
>>
Prefs
   1/*
   2 * thread-stack.c: Synthesize a thread's stack using call / return events
   3 * Copyright (c) 2014, Intel Corporation.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms and conditions of the GNU General Public License,
   7 * version 2, as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope it will be useful, but WITHOUT
  10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12 * more details.
  13 *
  14 */
  15
  16#include <linux/rbtree.h>
  17#include <linux/list.h>
  18#include "thread.h"
  19#include "event.h"
  20#include "machine.h"
  21#include "util.h"
  22#include "debug.h"
  23#include "symbol.h"
  24#include "comm.h"
  25#include "call-path.h"
  26#include "thread-stack.h"
  27
  28#define STACK_GROWTH 2048
  29
  30/**
  31 * struct thread_stack_entry - thread stack entry.
  32 * @ret_addr: return address
  33 * @timestamp: timestamp (if known)
  34 * @ref: external reference (e.g. db_id of sample)
  35 * @branch_count: the branch count when the entry was created
  36 * @cp: call path
  37 * @no_call: a 'call' was not seen
  38 */
  39struct thread_stack_entry {
  40        u64 ret_addr;
  41        u64 timestamp;
  42        u64 ref;
  43        u64 branch_count;
  44        struct call_path *cp;
  45        bool no_call;
  46};
  47
  48/**
  49 * struct thread_stack - thread stack constructed from 'call' and 'return'
  50 *                       branch samples.
  51 * @stack: array that holds the stack
  52 * @cnt: number of entries in the stack
  53 * @sz: current maximum stack size
  54 * @trace_nr: current trace number
  55 * @branch_count: running branch count
  56 * @kernel_start: kernel start address
  57 * @last_time: last timestamp
  58 * @crp: call/return processor
  59 * @comm: current comm
  60 */
  61struct thread_stack {
  62        struct thread_stack_entry *stack;
  63        size_t cnt;
  64        size_t sz;
  65        u64 trace_nr;
  66        u64 branch_count;
  67        u64 kernel_start;
  68        u64 last_time;
  69        struct call_return_processor *crp;
  70        struct comm *comm;
  71};
  72
  73static int thread_stack__grow(struct thread_stack *ts)
  74{
  75        struct thread_stack_entry *new_stack;
  76        size_t sz, new_sz;
  77
  78        new_sz = ts->sz + STACK_GROWTH;
  79        sz = new_sz * sizeof(struct thread_stack_entry);
  80
  81        new_stack = realloc(ts->stack, sz);
  82        if (!new_stack)
  83                return -ENOMEM;
  84
  85        ts->stack = new_stack;
  86        ts->sz = new_sz;
  87
  88        return 0;
  89}
  90
  91static struct thread_stack *thread_stack__new(struct thread *thread,
  92                                              struct call_return_processor *crp)
  93{
  94        struct thread_stack *ts;
  95
  96        ts = zalloc(sizeof(struct thread_stack));
  97        if (!ts)
  98                return NULL;
  99
 100        if (thread_stack__grow(ts)) {
 101                free(ts);
 102                return NULL;
 103        }
 104
 105        if (thread->mg && thread->mg->machine)
 106                ts->kernel_start = machine__kernel_start(thread->mg->machine);
 107        else
 108                ts->kernel_start = 1ULL << 63;
 109        ts->crp = crp;
 110
 111        return ts;
 112}
 113
 114static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
 115{
 116        int err = 0;
 117
 118        if (ts->cnt == ts->sz) {
 119                err = thread_stack__grow(ts);
 120                if (err) {
 121                        pr_warning("Out of memory: discarding thread stack\n");
 122                        ts->cnt = 0;
 123                }
 124        }
 125
 126        ts->stack[ts->cnt++].ret_addr = ret_addr;
 127
 128        return err;
 129}
 130
 131static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
 132{
 133        size_t i;
 134
 135        /*
 136         * In some cases there may be functions which are not seen to return.
 137         * For example when setjmp / longjmp has been used.  Or the perf context
 138         * switch in the kernel which doesn't stop and start tracing in exactly
 139         * the same code path.  When that happens the return address will be
 140         * further down the stack.  If the return address is not found at all,
 141         * we assume the opposite (i.e. this is a return for a call that wasn't
 142         * seen for some reason) and leave the stack alone.
 143         */
 144        for (i = ts->cnt; i; ) {
 145                if (ts->stack[--i].ret_addr == ret_addr) {
 146                        ts->cnt = i;
 147                        return;
 148                }
 149        }
 150}
 151
 152static bool thread_stack__in_kernel(struct thread_stack *ts)
 153{
 154        if (!ts->cnt)
 155                return false;
 156
 157        return ts->stack[ts->cnt - 1].cp->in_kernel;
 158}
 159
 160static int thread_stack__call_return(struct thread *thread,
 161                                     struct thread_stack *ts, size_t idx,
 162                                     u64 timestamp, u64 ref, bool no_return)
 163{
 164        struct call_return_processor *crp = ts->crp;
 165        struct thread_stack_entry *tse;
 166        struct call_return cr = {
 167                .thread = thread,
 168                .comm = ts->comm,
 169                .db_id = 0,
 170        };
 171
 172        tse = &ts->stack[idx];
 173        cr.cp = tse->cp;
 174        cr.call_time = tse->timestamp;
 175        cr.return_time = timestamp;
 176        cr.branch_count = ts->branch_count - tse->branch_count;
 177        cr.call_ref = tse->ref;
 178        cr.return_ref = ref;
 179        if (tse->no_call)
 180                cr.flags |= CALL_RETURN_NO_CALL;
 181        if (no_return)
 182                cr.flags |= CALL_RETURN_NO_RETURN;
 183
 184        return crp->process(&cr, crp->data);
 185}
 186
 187static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
 188{
 189        struct call_return_processor *crp = ts->crp;
 190        int err;
 191
 192        if (!crp) {
 193                ts->cnt = 0;
 194                return 0;
 195        }
 196
 197        while (ts->cnt) {
 198                err = thread_stack__call_return(thread, ts, --ts->cnt,
 199                                                ts->last_time, 0, true);
 200                if (err) {
 201                        pr_err("Error flushing thread stack!\n");
 202                        ts->cnt = 0;
 203                        return err;
 204                }
 205        }
 206
 207        return 0;
 208}
 209
 210int thread_stack__flush(struct thread *thread)
 211{
 212        if (thread->ts)
 213                return __thread_stack__flush(thread, thread->ts);
 214
 215        return 0;
 216}
 217
 218int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 219                        u64 to_ip, u16 insn_len, u64 trace_nr)
 220{
 221        if (!thread)
 222                return -EINVAL;
 223
 224        if (!thread->ts) {
 225                thread->ts = thread_stack__new(thread, NULL);
 226                if (!thread->ts) {
 227                        pr_warning("Out of memory: no thread stack\n");
 228                        return -ENOMEM;
 229                }
 230                thread->ts->trace_nr = trace_nr;
 231        }
 232
 233        /*
 234         * When the trace is discontinuous, the trace_nr changes.  In that case
 235         * the stack might be completely invalid.  Better to report nothing than
 236         * to report something misleading, so flush the stack.
 237         */
 238        if (trace_nr != thread->ts->trace_nr) {
 239                if (thread->ts->trace_nr)
 240                        __thread_stack__flush(thread, thread->ts);
 241                thread->ts->trace_nr = trace_nr;
 242        }
 243
 244        /* Stop here if thread_stack__process() is in use */
 245        if (thread->ts->crp)
 246                return 0;
 247
 248        if (flags & PERF_IP_FLAG_CALL) {
 249                u64 ret_addr;
 250
 251                if (!to_ip)
 252                        return 0;
 253                ret_addr = from_ip + insn_len;
 254                if (ret_addr == to_ip)
 255                        return 0; /* Zero-length calls are excluded */
 256                return thread_stack__push(thread->ts, ret_addr);
 257        } else if (flags & PERF_IP_FLAG_RETURN) {
 258                if (!from_ip)
 259                        return 0;
 260                thread_stack__pop(thread->ts, to_ip);
 261        }
 262
 263        return 0;
 264}
 265
 266void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
 267{
 268        if (!thread || !thread->ts)
 269                return;
 270
 271        if (trace_nr != thread->ts->trace_nr) {
 272                if (thread->ts->trace_nr)
 273                        __thread_stack__flush(thread, thread->ts);
 274                thread->ts->trace_nr = trace_nr;
 275        }
 276}
 277
 278void thread_stack__free(struct thread *thread)
 279{
 280        if (thread->ts) {
 281                __thread_stack__flush(thread, thread->ts);
 282                zfree(&thread->ts->stack);
 283                zfree(&thread->ts);
 284        }
 285}
 286
 287void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
 288                          size_t sz, u64 ip)
 289{
 290        size_t i;
 291
 292        if (!thread || !thread->ts)
 293                chain->nr = 1;
 294        else
 295                chain->nr = min(sz, thread->ts->cnt + 1);
 296
 297        chain->ips[0] = ip;
 298
 299        for (i = 1; i < chain->nr; i++)
 300                chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
 301}
 302
 303struct call_return_processor *
 304call_return_processor__new(int (*process)(struct call_return *cr, void *data),
 305                           void *data)
 306{
 307        struct call_return_processor *crp;
 308
 309        crp = zalloc(sizeof(struct call_return_processor));
 310        if (!crp)
 311                return NULL;
 312        crp->cpr = call_path_root__new();
 313        if (!crp->cpr)
 314                goto out_free;
 315        crp->process = process;
 316        crp->data = data;
 317        return crp;
 318
 319out_free:
 320        free(crp);
 321        return NULL;
 322}
 323
 324void call_return_processor__free(struct call_return_processor *crp)
 325{
 326        if (crp) {
 327                call_path_root__free(crp->cpr);
 328                free(crp);
 329        }
 330}
 331
 332static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
 333                                 u64 timestamp, u64 ref, struct call_path *cp,
 334                                 bool no_call)
 335{
 336        struct thread_stack_entry *tse;
 337        int err;
 338
 339        if (ts->cnt == ts->sz) {
 340                err = thread_stack__grow(ts);
 341                if (err)
 342                        return err;
 343        }
 344
 345        tse = &ts->stack[ts->cnt++];
 346        tse->ret_addr = ret_addr;
 347        tse->timestamp = timestamp;
 348        tse->ref = ref;
 349        tse->branch_count = ts->branch_count;
 350        tse->cp = cp;
 351        tse->no_call = no_call;
 352
 353        return 0;
 354}
 355
 356static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
 357                                u64 ret_addr, u64 timestamp, u64 ref,
 358                                struct symbol *sym)
 359{
 360        int err;
 361
 362        if (!ts->cnt)
 363                return 1;
 364
 365        if (ts->cnt == 1) {
 366                struct thread_stack_entry *tse = &ts->stack[0];
 367
 368                if (tse->cp->sym == sym)
 369                        return thread_stack__call_return(thread, ts, --ts->cnt,
 370                                                         timestamp, ref, false);
 371        }
 372
 373        if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
 374                return thread_stack__call_return(thread, ts, --ts->cnt,
 375                                                 timestamp, ref, false);
 376        } else {
 377                size_t i = ts->cnt - 1;
 378
 379                while (i--) {
 380                        if (ts->stack[i].ret_addr != ret_addr)
 381                                continue;
 382                        i += 1;
 383                        while (ts->cnt > i) {
 384                                err = thread_stack__call_return(thread, ts,
 385                                                                --ts->cnt,
 386                                                                timestamp, ref,
 387                                                                true);
 388                                if (err)
 389                                        return err;
 390                        }
 391                        return thread_stack__call_return(thread, ts, --ts->cnt,
 392                                                         timestamp, ref, false);
 393                }
 394        }
 395
 396        return 1;
 397}
 398
 399static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
 400                                struct perf_sample *sample,
 401                                struct addr_location *from_al,
 402                                struct addr_location *to_al, u64 ref)
 403{
 404        struct call_path_root *cpr = ts->crp->cpr;
 405        struct call_path *cp;
 406        struct symbol *sym;
 407        u64 ip;
 408
 409        if (sample->ip) {
 410                ip = sample->ip;
 411                sym = from_al->sym;
 412        } else if (sample->addr) {
 413                ip = sample->addr;
 414                sym = to_al->sym;
 415        } else {
 416                return 0;
 417        }
 418
 419        cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
 420                                ts->kernel_start);
 421        if (!cp)
 422                return -ENOMEM;
 423
 424        return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
 425                                     true);
 426}
 427
 428static int thread_stack__no_call_return(struct thread *thread,
 429                                        struct thread_stack *ts,
 430                                        struct perf_sample *sample,
 431                                        struct addr_location *from_al,
 432                                        struct addr_location *to_al, u64 ref)
 433{
 434        struct call_path_root *cpr = ts->crp->cpr;
 435        struct call_path *cp, *parent;
 436        u64 ks = ts->kernel_start;
 437        int err;
 438
 439        if (sample->ip >= ks && sample->addr < ks) {
 440                /* Return to userspace, so pop all kernel addresses */
 441                while (thread_stack__in_kernel(ts)) {
 442                        err = thread_stack__call_return(thread, ts, --ts->cnt,
 443                                                        sample->time, ref,
 444                                                        true);
 445                        if (err)
 446                                return err;
 447                }
 448
 449                /* If the stack is empty, push the userspace address */
 450                if (!ts->cnt) {
 451                        cp = call_path__findnew(cpr, &cpr->call_path,
 452                                                to_al->sym, sample->addr,
 453                                                ts->kernel_start);
 454                        if (!cp)
 455                                return -ENOMEM;
 456                        return thread_stack__push_cp(ts, 0, sample->time, ref,
 457                                                     cp, true);
 458                }
 459        } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
 460                /* Return to userspace, so pop all kernel addresses */
 461                while (thread_stack__in_kernel(ts)) {
 462                        err = thread_stack__call_return(thread, ts, --ts->cnt,
 463                                                        sample->time, ref,
 464                                                        true);
 465                        if (err)
 466                                return err;
 467                }
 468        }
 469
 470        if (ts->cnt)
 471                parent = ts->stack[ts->cnt - 1].cp;
 472        else
 473                parent = &cpr->call_path;
 474
 475        /* This 'return' had no 'call', so push and pop top of stack */
 476        cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
 477                                ts->kernel_start);
 478        if (!cp)
 479                return -ENOMEM;
 480
 481        err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
 482                                    true);
 483        if (err)
 484                return err;
 485
 486        return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
 487                                    to_al->sym);
 488}
 489
 490static int thread_stack__trace_begin(struct thread *thread,
 491                                     struct thread_stack *ts, u64 timestamp,
 492                                     u64 ref)
 493{
 494        struct thread_stack_entry *tse;
 495        int err;
 496
 497        if (!ts->cnt)
 498                return 0;
 499
 500        /* Pop trace end */
 501        tse = &ts->stack[ts->cnt - 1];
 502        if (tse->cp->sym == NULL && tse->cp->ip == 0) {
 503                err = thread_stack__call_return(thread, ts, --ts->cnt,
 504                                                timestamp, ref, false);
 505                if (err)
 506                        return err;
 507        }
 508
 509        return 0;
 510}
 511
 512static int thread_stack__trace_end(struct thread_stack *ts,
 513                                   struct perf_sample *sample, u64 ref)
 514{
 515        struct call_path_root *cpr = ts->crp->cpr;
 516        struct call_path *cp;
 517        u64 ret_addr;
 518
 519        /* No point having 'trace end' on the bottom of the stack */
 520        if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
 521                return 0;
 522
 523        cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
 524                                ts->kernel_start);
 525        if (!cp)
 526                return -ENOMEM;
 527
 528        ret_addr = sample->ip + sample->insn_len;
 529
 530        return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
 531                                     false);
 532}
 533
 534int thread_stack__process(struct thread *thread, struct comm *comm,
 535                          struct perf_sample *sample,
 536                          struct addr_location *from_al,
 537                          struct addr_location *to_al, u64 ref,
 538                          struct call_return_processor *crp)
 539{
 540        struct thread_stack *ts = thread->ts;
 541        int err = 0;
 542
 543        if (ts) {
 544                if (!ts->crp) {
 545                        /* Supersede thread_stack__event() */
 546                        thread_stack__free(thread);
 547                        thread->ts = thread_stack__new(thread, crp);
 548                        if (!thread->ts)
 549                                return -ENOMEM;
 550                        ts = thread->ts;
 551                        ts->comm = comm;
 552                }
 553        } else {
 554                thread->ts = thread_stack__new(thread, crp);
 555                if (!thread->ts)
 556                        return -ENOMEM;
 557                ts = thread->ts;
 558                ts->comm = comm;
 559        }
 560
 561        /* Flush stack on exec */
 562        if (ts->comm != comm && thread->pid_ == thread->tid) {
 563                err = __thread_stack__flush(thread, ts);
 564                if (err)
 565                        return err;
 566                ts->comm = comm;
 567        }
 568
 569        /* If the stack is empty, put the current symbol on the stack */
 570        if (!ts->cnt) {
 571                err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
 572                                           ref);
 573                if (err)
 574                        return err;
 575        }
 576
 577        ts->branch_count += 1;
 578        ts->last_time = sample->time;
 579
 580        if (sample->flags & PERF_IP_FLAG_CALL) {
 581                struct call_path_root *cpr = ts->crp->cpr;
 582                struct call_path *cp;
 583                u64 ret_addr;
 584
 585                if (!sample->ip || !sample->addr)
 586                        return 0;
 587
 588                ret_addr = sample->ip + sample->insn_len;
 589                if (ret_addr == sample->addr)
 590                        return 0; /* Zero-length calls are excluded */
 591
 592                cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
 593                                        to_al->sym, sample->addr,
 594                                        ts->kernel_start);
 595                if (!cp)
 596                        return -ENOMEM;
 597                err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
 598                                            cp, false);
 599        } else if (sample->flags & PERF_IP_FLAG_RETURN) {
 600                if (!sample->ip || !sample->addr)
 601                        return 0;
 602
 603                err = thread_stack__pop_cp(thread, ts, sample->addr,
 604                                           sample->time, ref, from_al->sym);
 605                if (err) {
 606                        if (err < 0)
 607                                return err;
 608                        err = thread_stack__no_call_return(thread, ts, sample,
 609                                                           from_al, to_al, ref);
 610                }
 611        } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
 612                err = thread_stack__trace_begin(thread, ts, sample->time, ref);
 613        } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
 614                err = thread_stack__trace_end(ts, sample, ref);
 615        }
 616
 617        return err;
 618}
 619
 620size_t thread_stack__depth(struct thread *thread)
 621{
 622        if (!thread->ts)
 623                return 0;
 624        return thread->ts->cnt;
 625}
 626