1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/rbtree.h>
17#include <linux/list.h>
18#include <linux/log2.h>
19#include <errno.h>
20#include "thread.h"
21#include "event.h"
22#include "machine.h"
23#include "util.h"
24#include "debug.h"
25#include "symbol.h"
26#include "comm.h"
27#include "call-path.h"
28#include "thread-stack.h"
29
30#define STACK_GROWTH 2048
31
32
33
34
35
36
37
38
39
40
41
42struct thread_stack_entry {
43 u64 ret_addr;
44 u64 timestamp;
45 u64 ref;
46 u64 branch_count;
47 struct call_path *cp;
48 bool no_call;
49 bool trace_end;
50};
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66struct thread_stack {
67 struct thread_stack_entry *stack;
68 size_t cnt;
69 size_t sz;
70 u64 trace_nr;
71 u64 branch_count;
72 u64 kernel_start;
73 u64 last_time;
74 struct call_return_processor *crp;
75 struct comm *comm;
76 unsigned int arr_sz;
77};
78
79
80
81
82
83
84static inline bool thread_stack__per_cpu(struct thread *thread)
85{
86 return !(thread->tid || thread->pid_);
87}
88
89static int thread_stack__grow(struct thread_stack *ts)
90{
91 struct thread_stack_entry *new_stack;
92 size_t sz, new_sz;
93
94 new_sz = ts->sz + STACK_GROWTH;
95 sz = new_sz * sizeof(struct thread_stack_entry);
96
97 new_stack = realloc(ts->stack, sz);
98 if (!new_stack)
99 return -ENOMEM;
100
101 ts->stack = new_stack;
102 ts->sz = new_sz;
103
104 return 0;
105}
106
107static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
108 struct call_return_processor *crp)
109{
110 int err;
111
112 err = thread_stack__grow(ts);
113 if (err)
114 return err;
115
116 if (thread->mg && thread->mg->machine)
117 ts->kernel_start = machine__kernel_start(thread->mg->machine);
118 else
119 ts->kernel_start = 1ULL << 63;
120 ts->crp = crp;
121
122 return 0;
123}
124
125static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
126 struct call_return_processor *crp)
127{
128 struct thread_stack *ts = thread->ts, *new_ts;
129 unsigned int old_sz = ts ? ts->arr_sz : 0;
130 unsigned int new_sz = 1;
131
132 if (thread_stack__per_cpu(thread) && cpu > 0)
133 new_sz = roundup_pow_of_two(cpu + 1);
134
135 if (!ts || new_sz > old_sz) {
136 new_ts = calloc(new_sz, sizeof(*ts));
137 if (!new_ts)
138 return NULL;
139 if (ts)
140 memcpy(new_ts, ts, old_sz * sizeof(*ts));
141 new_ts->arr_sz = new_sz;
142 zfree(&thread->ts);
143 thread->ts = new_ts;
144 ts = new_ts;
145 }
146
147 if (thread_stack__per_cpu(thread) && cpu > 0 &&
148 (unsigned int)cpu < ts->arr_sz)
149 ts += cpu;
150
151 if (!ts->stack &&
152 thread_stack__init(ts, thread, crp))
153 return NULL;
154
155 return ts;
156}
157
158static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
159{
160 struct thread_stack *ts = thread->ts;
161
162 if (cpu < 0)
163 cpu = 0;
164
165 if (!ts || (unsigned int)cpu >= ts->arr_sz)
166 return NULL;
167
168 ts += cpu;
169
170 if (!ts->stack)
171 return NULL;
172
173 return ts;
174}
175
176static inline struct thread_stack *thread__stack(struct thread *thread,
177 int cpu)
178{
179 if (!thread)
180 return NULL;
181
182 if (thread_stack__per_cpu(thread))
183 return thread__cpu_stack(thread, cpu);
184
185 return thread->ts;
186}
187
188static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
189 bool trace_end)
190{
191 int err = 0;
192
193 if (ts->cnt == ts->sz) {
194 err = thread_stack__grow(ts);
195 if (err) {
196 pr_warning("Out of memory: discarding thread stack\n");
197 ts->cnt = 0;
198 }
199 }
200
201 ts->stack[ts->cnt].trace_end = trace_end;
202 ts->stack[ts->cnt++].ret_addr = ret_addr;
203
204 return err;
205}
206
207static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
208{
209 size_t i;
210
211
212
213
214
215
216
217
218
219
220 for (i = ts->cnt; i; ) {
221 if (ts->stack[--i].ret_addr == ret_addr) {
222 ts->cnt = i;
223 return;
224 }
225 }
226}
227
228static void thread_stack__pop_trace_end(struct thread_stack *ts)
229{
230 size_t i;
231
232 for (i = ts->cnt; i; ) {
233 if (ts->stack[--i].trace_end)
234 ts->cnt = i;
235 else
236 return;
237 }
238}
239
240static bool thread_stack__in_kernel(struct thread_stack *ts)
241{
242 if (!ts->cnt)
243 return false;
244
245 return ts->stack[ts->cnt - 1].cp->in_kernel;
246}
247
248static int thread_stack__call_return(struct thread *thread,
249 struct thread_stack *ts, size_t idx,
250 u64 timestamp, u64 ref, bool no_return)
251{
252 struct call_return_processor *crp = ts->crp;
253 struct thread_stack_entry *tse;
254 struct call_return cr = {
255 .thread = thread,
256 .comm = ts->comm,
257 .db_id = 0,
258 };
259
260 tse = &ts->stack[idx];
261 cr.cp = tse->cp;
262 cr.call_time = tse->timestamp;
263 cr.return_time = timestamp;
264 cr.branch_count = ts->branch_count - tse->branch_count;
265 cr.call_ref = tse->ref;
266 cr.return_ref = ref;
267 if (tse->no_call)
268 cr.flags |= CALL_RETURN_NO_CALL;
269 if (no_return)
270 cr.flags |= CALL_RETURN_NO_RETURN;
271
272 return crp->process(&cr, crp->data);
273}
274
275static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
276{
277 struct call_return_processor *crp = ts->crp;
278 int err;
279
280 if (!crp) {
281 ts->cnt = 0;
282 return 0;
283 }
284
285 while (ts->cnt) {
286 err = thread_stack__call_return(thread, ts, --ts->cnt,
287 ts->last_time, 0, true);
288 if (err) {
289 pr_err("Error flushing thread stack!\n");
290 ts->cnt = 0;
291 return err;
292 }
293 }
294
295 return 0;
296}
297
298int thread_stack__flush(struct thread *thread)
299{
300 struct thread_stack *ts = thread->ts;
301 unsigned int pos;
302 int err = 0;
303
304 if (ts) {
305 for (pos = 0; pos < ts->arr_sz; pos++) {
306 int ret = __thread_stack__flush(thread, ts + pos);
307
308 if (ret)
309 err = ret;
310 }
311 }
312
313 return err;
314}
315
316int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
317 u64 to_ip, u16 insn_len, u64 trace_nr)
318{
319 struct thread_stack *ts = thread__stack(thread, cpu);
320
321 if (!thread)
322 return -EINVAL;
323
324 if (!ts) {
325 ts = thread_stack__new(thread, cpu, NULL);
326 if (!ts) {
327 pr_warning("Out of memory: no thread stack\n");
328 return -ENOMEM;
329 }
330 ts->trace_nr = trace_nr;
331 }
332
333
334
335
336
337
338 if (trace_nr != ts->trace_nr) {
339 if (ts->trace_nr)
340 __thread_stack__flush(thread, ts);
341 ts->trace_nr = trace_nr;
342 }
343
344
345 if (ts->crp)
346 return 0;
347
348 if (flags & PERF_IP_FLAG_CALL) {
349 u64 ret_addr;
350
351 if (!to_ip)
352 return 0;
353 ret_addr = from_ip + insn_len;
354 if (ret_addr == to_ip)
355 return 0;
356 return thread_stack__push(ts, ret_addr,
357 flags & PERF_IP_FLAG_TRACE_END);
358 } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
359
360
361
362
363
364
365
366 thread_stack__pop(ts, to_ip);
367 thread_stack__pop_trace_end(ts);
368 } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
369 thread_stack__pop(ts, to_ip);
370 }
371
372 return 0;
373}
374
375void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
376{
377 struct thread_stack *ts = thread__stack(thread, cpu);
378
379 if (!ts)
380 return;
381
382 if (trace_nr != ts->trace_nr) {
383 if (ts->trace_nr)
384 __thread_stack__flush(thread, ts);
385 ts->trace_nr = trace_nr;
386 }
387}
388
389static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
390{
391 __thread_stack__flush(thread, ts);
392 zfree(&ts->stack);
393}
394
395static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
396{
397 unsigned int arr_sz = ts->arr_sz;
398
399 __thread_stack__free(thread, ts);
400 memset(ts, 0, sizeof(*ts));
401 ts->arr_sz = arr_sz;
402}
403
404void thread_stack__free(struct thread *thread)
405{
406 struct thread_stack *ts = thread->ts;
407 unsigned int pos;
408
409 if (ts) {
410 for (pos = 0; pos < ts->arr_sz; pos++)
411 __thread_stack__free(thread, ts + pos);
412 zfree(&thread->ts);
413 }
414}
415
416static inline u64 callchain_context(u64 ip, u64 kernel_start)
417{
418 return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
419}
420
421void thread_stack__sample(struct thread *thread, int cpu,
422 struct ip_callchain *chain,
423 size_t sz, u64 ip, u64 kernel_start)
424{
425 struct thread_stack *ts = thread__stack(thread, cpu);
426 u64 context = callchain_context(ip, kernel_start);
427 u64 last_context;
428 size_t i, j;
429
430 if (sz < 2) {
431 chain->nr = 0;
432 return;
433 }
434
435 chain->ips[0] = context;
436 chain->ips[1] = ip;
437
438 if (!ts) {
439 chain->nr = 2;
440 return;
441 }
442
443 last_context = context;
444
445 for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
446 ip = ts->stack[ts->cnt - j].ret_addr;
447 context = callchain_context(ip, kernel_start);
448 if (context != last_context) {
449 if (i >= sz - 1)
450 break;
451 chain->ips[i++] = context;
452 last_context = context;
453 }
454 chain->ips[i] = ip;
455 }
456
457 chain->nr = i;
458}
459
460struct call_return_processor *
461call_return_processor__new(int (*process)(struct call_return *cr, void *data),
462 void *data)
463{
464 struct call_return_processor *crp;
465
466 crp = zalloc(sizeof(struct call_return_processor));
467 if (!crp)
468 return NULL;
469 crp->cpr = call_path_root__new();
470 if (!crp->cpr)
471 goto out_free;
472 crp->process = process;
473 crp->data = data;
474 return crp;
475
476out_free:
477 free(crp);
478 return NULL;
479}
480
481void call_return_processor__free(struct call_return_processor *crp)
482{
483 if (crp) {
484 call_path_root__free(crp->cpr);
485 free(crp);
486 }
487}
488
489static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
490 u64 timestamp, u64 ref, struct call_path *cp,
491 bool no_call, bool trace_end)
492{
493 struct thread_stack_entry *tse;
494 int err;
495
496 if (ts->cnt == ts->sz) {
497 err = thread_stack__grow(ts);
498 if (err)
499 return err;
500 }
501
502 tse = &ts->stack[ts->cnt++];
503 tse->ret_addr = ret_addr;
504 tse->timestamp = timestamp;
505 tse->ref = ref;
506 tse->branch_count = ts->branch_count;
507 tse->cp = cp;
508 tse->no_call = no_call;
509 tse->trace_end = trace_end;
510
511 return 0;
512}
513
514static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
515 u64 ret_addr, u64 timestamp, u64 ref,
516 struct symbol *sym)
517{
518 int err;
519
520 if (!ts->cnt)
521 return 1;
522
523 if (ts->cnt == 1) {
524 struct thread_stack_entry *tse = &ts->stack[0];
525
526 if (tse->cp->sym == sym)
527 return thread_stack__call_return(thread, ts, --ts->cnt,
528 timestamp, ref, false);
529 }
530
531 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
532 return thread_stack__call_return(thread, ts, --ts->cnt,
533 timestamp, ref, false);
534 } else {
535 size_t i = ts->cnt - 1;
536
537 while (i--) {
538 if (ts->stack[i].ret_addr != ret_addr)
539 continue;
540 i += 1;
541 while (ts->cnt > i) {
542 err = thread_stack__call_return(thread, ts,
543 --ts->cnt,
544 timestamp, ref,
545 true);
546 if (err)
547 return err;
548 }
549 return thread_stack__call_return(thread, ts, --ts->cnt,
550 timestamp, ref, false);
551 }
552 }
553
554 return 1;
555}
556
557static int thread_stack__bottom(struct thread_stack *ts,
558 struct perf_sample *sample,
559 struct addr_location *from_al,
560 struct addr_location *to_al, u64 ref)
561{
562 struct call_path_root *cpr = ts->crp->cpr;
563 struct call_path *cp;
564 struct symbol *sym;
565 u64 ip;
566
567 if (sample->ip) {
568 ip = sample->ip;
569 sym = from_al->sym;
570 } else if (sample->addr) {
571 ip = sample->addr;
572 sym = to_al->sym;
573 } else {
574 return 0;
575 }
576
577 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
578 ts->kernel_start);
579 if (!cp)
580 return -ENOMEM;
581
582 return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
583 true, false);
584}
585
586static int thread_stack__no_call_return(struct thread *thread,
587 struct thread_stack *ts,
588 struct perf_sample *sample,
589 struct addr_location *from_al,
590 struct addr_location *to_al, u64 ref)
591{
592 struct call_path_root *cpr = ts->crp->cpr;
593 struct call_path *cp, *parent;
594 u64 ks = ts->kernel_start;
595 int err;
596
597 if (sample->ip >= ks && sample->addr < ks) {
598
599 while (thread_stack__in_kernel(ts)) {
600 err = thread_stack__call_return(thread, ts, --ts->cnt,
601 sample->time, ref,
602 true);
603 if (err)
604 return err;
605 }
606
607
608 if (!ts->cnt) {
609 cp = call_path__findnew(cpr, &cpr->call_path,
610 to_al->sym, sample->addr,
611 ts->kernel_start);
612 if (!cp)
613 return -ENOMEM;
614 return thread_stack__push_cp(ts, 0, sample->time, ref,
615 cp, true, false);
616 }
617 } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
618
619 while (thread_stack__in_kernel(ts)) {
620 err = thread_stack__call_return(thread, ts, --ts->cnt,
621 sample->time, ref,
622 true);
623 if (err)
624 return err;
625 }
626 }
627
628 if (ts->cnt)
629 parent = ts->stack[ts->cnt - 1].cp;
630 else
631 parent = &cpr->call_path;
632
633
634 cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
635 ts->kernel_start);
636 if (!cp)
637 return -ENOMEM;
638
639 err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
640 true, false);
641 if (err)
642 return err;
643
644 return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
645 to_al->sym);
646}
647
648static int thread_stack__trace_begin(struct thread *thread,
649 struct thread_stack *ts, u64 timestamp,
650 u64 ref)
651{
652 struct thread_stack_entry *tse;
653 int err;
654
655 if (!ts->cnt)
656 return 0;
657
658
659 tse = &ts->stack[ts->cnt - 1];
660 if (tse->trace_end) {
661 err = thread_stack__call_return(thread, ts, --ts->cnt,
662 timestamp, ref, false);
663 if (err)
664 return err;
665 }
666
667 return 0;
668}
669
670static int thread_stack__trace_end(struct thread_stack *ts,
671 struct perf_sample *sample, u64 ref)
672{
673 struct call_path_root *cpr = ts->crp->cpr;
674 struct call_path *cp;
675 u64 ret_addr;
676
677
678 if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
679 return 0;
680
681 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
682 ts->kernel_start);
683 if (!cp)
684 return -ENOMEM;
685
686 ret_addr = sample->ip + sample->insn_len;
687
688 return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
689 false, true);
690}
691
692int thread_stack__process(struct thread *thread, struct comm *comm,
693 struct perf_sample *sample,
694 struct addr_location *from_al,
695 struct addr_location *to_al, u64 ref,
696 struct call_return_processor *crp)
697{
698 struct thread_stack *ts = thread__stack(thread, sample->cpu);
699 int err = 0;
700
701 if (ts && !ts->crp) {
702
703 thread_stack__reset(thread, ts);
704 ts = NULL;
705 }
706
707 if (!ts) {
708 ts = thread_stack__new(thread, sample->cpu, crp);
709 if (!ts)
710 return -ENOMEM;
711 ts->comm = comm;
712 }
713
714
715 if (ts->comm != comm && thread->pid_ == thread->tid) {
716 err = __thread_stack__flush(thread, ts);
717 if (err)
718 return err;
719 ts->comm = comm;
720 }
721
722
723 if (!ts->cnt) {
724 err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
725 if (err)
726 return err;
727 }
728
729 ts->branch_count += 1;
730 ts->last_time = sample->time;
731
732 if (sample->flags & PERF_IP_FLAG_CALL) {
733 bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END;
734 struct call_path_root *cpr = ts->crp->cpr;
735 struct call_path *cp;
736 u64 ret_addr;
737
738 if (!sample->ip || !sample->addr)
739 return 0;
740
741 ret_addr = sample->ip + sample->insn_len;
742 if (ret_addr == sample->addr)
743 return 0;
744
745 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
746 to_al->sym, sample->addr,
747 ts->kernel_start);
748 if (!cp)
749 return -ENOMEM;
750 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
751 cp, false, trace_end);
752 } else if (sample->flags & PERF_IP_FLAG_RETURN) {
753 if (!sample->ip || !sample->addr)
754 return 0;
755
756 err = thread_stack__pop_cp(thread, ts, sample->addr,
757 sample->time, ref, from_al->sym);
758 if (err) {
759 if (err < 0)
760 return err;
761 err = thread_stack__no_call_return(thread, ts, sample,
762 from_al, to_al, ref);
763 }
764 } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
765 err = thread_stack__trace_begin(thread, ts, sample->time, ref);
766 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
767 err = thread_stack__trace_end(ts, sample, ref);
768 }
769
770 return err;
771}
772
773size_t thread_stack__depth(struct thread *thread, int cpu)
774{
775 struct thread_stack *ts = thread__stack(thread, cpu);
776
777 if (!ts)
778 return 0;
779 return ts->cnt;
780}
781