linux/arch/powerpc/perf/callchain.c
<<
>>
Prefs
   1/*
   2 * Performance counter callchain support - powerpc architecture code
   3 *
   4 * Copyright © 2009 Paul Mackerras, IBM Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU General Public License
   8 * as published by the Free Software Foundation; either version
   9 * 2 of the License, or (at your option) any later version.
  10 */
  11#include <linux/kernel.h>
  12#include <linux/sched.h>
  13#include <linux/perf_event.h>
  14#include <linux/percpu.h>
  15#include <linux/uaccess.h>
  16#include <linux/mm.h>
  17#include <asm/ptrace.h>
  18#include <asm/pgtable.h>
  19#include <asm/sigcontext.h>
  20#include <asm/ucontext.h>
  21#include <asm/vdso.h>
  22#ifdef CONFIG_PPC64
  23#include "../kernel/ppc32.h"
  24#endif
  25
  26
  27/*
  28 * Is sp valid as the address of the next kernel stack frame after prev_sp?
  29 * The next frame may be in a different stack area but should not go
  30 * back down in the same stack area.
  31 */
  32static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
  33{
  34        if (sp & 0xf)
  35                return 0;               /* must be 16-byte aligned */
  36        if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
  37                return 0;
  38        if (sp >= prev_sp + STACK_FRAME_MIN_SIZE)
  39                return 1;
  40        /*
  41         * sp could decrease when we jump off an interrupt stack
  42         * back to the regular process stack.
  43         */
  44        if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1)))
  45                return 1;
  46        return 0;
  47}
  48
  49void
  50perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
  51{
  52        unsigned long sp, next_sp;
  53        unsigned long next_ip;
  54        unsigned long lr;
  55        long level = 0;
  56        unsigned long *fp;
  57
  58        lr = regs->link;
  59        sp = regs->gpr[1];
  60        perf_callchain_store(entry, perf_instruction_pointer(regs));
  61
  62        if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
  63                return;
  64
  65        for (;;) {
  66                fp = (unsigned long *) sp;
  67                next_sp = fp[0];
  68
  69                if (next_sp == sp + STACK_INT_FRAME_SIZE &&
  70                    fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
  71                        /*
  72                         * This looks like an interrupt frame for an
  73                         * interrupt that occurred in the kernel
  74                         */
  75                        regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
  76                        next_ip = regs->nip;
  77                        lr = regs->link;
  78                        level = 0;
  79                        perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
  80
  81                } else {
  82                        if (level == 0)
  83                                next_ip = lr;
  84                        else
  85                                next_ip = fp[STACK_FRAME_LR_SAVE];
  86
  87                        /*
  88                         * We can't tell which of the first two addresses
  89                         * we get are valid, but we can filter out the
  90                         * obviously bogus ones here.  We replace them
  91                         * with 0 rather than removing them entirely so
  92                         * that userspace can tell which is which.
  93                         */
  94                        if ((level == 1 && next_ip == lr) ||
  95                            (level <= 1 && !kernel_text_address(next_ip)))
  96                                next_ip = 0;
  97
  98                        ++level;
  99                }
 100
 101                perf_callchain_store(entry, next_ip);
 102                if (!valid_next_sp(next_sp, sp))
 103                        return;
 104                sp = next_sp;
 105        }
 106}
 107
 108#ifdef CONFIG_PPC64
 109/*
 110 * On 64-bit we don't want to invoke hash_page on user addresses from
 111 * interrupt context, so if the access faults, we read the page tables
 112 * to find which page (if any) is mapped and access it directly.
 113 */
 114static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
 115{
 116        int ret = -EFAULT;
 117        pgd_t *pgdir;
 118        pte_t *ptep, pte;
 119        unsigned shift;
 120        unsigned long addr = (unsigned long) ptr;
 121        unsigned long offset;
 122        unsigned long pfn, flags;
 123        void *kaddr;
 124
 125        pgdir = current->mm->pgd;
 126        if (!pgdir)
 127                return -EFAULT;
 128
 129        local_irq_save(flags);
 130        ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
 131        if (!ptep)
 132                goto err_out;
 133        if (!shift)
 134                shift = PAGE_SHIFT;
 135
 136        /* align address to page boundary */
 137        offset = addr & ((1UL << shift) - 1);
 138
 139        pte = READ_ONCE(*ptep);
 140        if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
 141                goto err_out;
 142        pfn = pte_pfn(pte);
 143        if (!page_is_ram(pfn))
 144                goto err_out;
 145
 146        /* no highmem to worry about here */
 147        kaddr = pfn_to_kaddr(pfn);
 148        memcpy(buf, kaddr + offset, nb);
 149        ret = 0;
 150err_out:
 151        local_irq_restore(flags);
 152        return ret;
 153}
 154
 155static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
 156{
 157        if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
 158            ((unsigned long)ptr & 7))
 159                return -EFAULT;
 160
 161        pagefault_disable();
 162        if (!__get_user_inatomic(*ret, ptr)) {
 163                pagefault_enable();
 164                return 0;
 165        }
 166        pagefault_enable();
 167
 168        return read_user_stack_slow(ptr, ret, 8);
 169}
 170
 171static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
 172{
 173        if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
 174            ((unsigned long)ptr & 3))
 175                return -EFAULT;
 176
 177        pagefault_disable();
 178        if (!__get_user_inatomic(*ret, ptr)) {
 179                pagefault_enable();
 180                return 0;
 181        }
 182        pagefault_enable();
 183
 184        return read_user_stack_slow(ptr, ret, 4);
 185}
 186
 187static inline int valid_user_sp(unsigned long sp, int is_64)
 188{
 189        if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32)
 190                return 0;
 191        return 1;
 192}
 193
 194/*
 195 * 64-bit user processes use the same stack frame for RT and non-RT signals.
 196 */
 197struct signal_frame_64 {
 198        char            dummy[__SIGNAL_FRAMESIZE];
 199        struct ucontext uc;
 200        unsigned long   unused[2];
 201        unsigned int    tramp[6];
 202        struct siginfo  *pinfo;
 203        void            *puc;
 204        struct siginfo  info;
 205        char            abigap[288];
 206};
 207
 208static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
 209{
 210        if (nip == fp + offsetof(struct signal_frame_64, tramp))
 211                return 1;
 212        if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
 213            nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
 214                return 1;
 215        return 0;
 216}
 217
 218/*
 219 * Do some sanity checking on the signal frame pointed to by sp.
 220 * We check the pinfo and puc pointers in the frame.
 221 */
 222static int sane_signal_64_frame(unsigned long sp)
 223{
 224        struct signal_frame_64 __user *sf;
 225        unsigned long pinfo, puc;
 226
 227        sf = (struct signal_frame_64 __user *) sp;
 228        if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
 229            read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
 230                return 0;
 231        return pinfo == (unsigned long) &sf->info &&
 232                puc == (unsigned long) &sf->uc;
 233}
 234
 235static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 236                                   struct pt_regs *regs)
 237{
 238        unsigned long sp, next_sp;
 239        unsigned long next_ip;
 240        unsigned long lr;
 241        long level = 0;
 242        struct signal_frame_64 __user *sigframe;
 243        unsigned long __user *fp, *uregs;
 244
 245        next_ip = perf_instruction_pointer(regs);
 246        lr = regs->link;
 247        sp = regs->gpr[1];
 248        perf_callchain_store(entry, next_ip);
 249
 250        while (entry->nr < PERF_MAX_STACK_DEPTH) {
 251                fp = (unsigned long __user *) sp;
 252                if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
 253                        return;
 254                if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
 255                        return;
 256
 257                /*
 258                 * Note: the next_sp - sp >= signal frame size check
 259                 * is true when next_sp < sp, which can happen when
 260                 * transitioning from an alternate signal stack to the
 261                 * normal stack.
 262                 */
 263                if (next_sp - sp >= sizeof(struct signal_frame_64) &&
 264                    (is_sigreturn_64_address(next_ip, sp) ||
 265                     (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
 266                    sane_signal_64_frame(sp)) {
 267                        /*
 268                         * This looks like an signal frame
 269                         */
 270                        sigframe = (struct signal_frame_64 __user *) sp;
 271                        uregs = sigframe->uc.uc_mcontext.gp_regs;
 272                        if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
 273                            read_user_stack_64(&uregs[PT_LNK], &lr) ||
 274                            read_user_stack_64(&uregs[PT_R1], &sp))
 275                                return;
 276                        level = 0;
 277                        perf_callchain_store(entry, PERF_CONTEXT_USER);
 278                        perf_callchain_store(entry, next_ip);
 279                        continue;
 280                }
 281
 282                if (level == 0)
 283                        next_ip = lr;
 284                perf_callchain_store(entry, next_ip);
 285                ++level;
 286                sp = next_sp;
 287        }
 288}
 289
 290static inline int current_is_64bit(void)
 291{
 292        /*
 293         * We can't use test_thread_flag() here because we may be on an
 294         * interrupt stack, and the thread flags don't get copied over
 295         * from the thread_info on the main stack to the interrupt stack.
 296         */
 297        return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
 298}
 299
 300#else  /* CONFIG_PPC64 */
 301/*
 302 * On 32-bit we just access the address and let hash_page create a
 303 * HPTE if necessary, so there is no need to fall back to reading
 304 * the page tables.  Since this is called at interrupt level,
 305 * do_page_fault() won't treat a DSI as a page fault.
 306 */
 307static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
 308{
 309        int rc;
 310
 311        if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
 312            ((unsigned long)ptr & 3))
 313                return -EFAULT;
 314
 315        pagefault_disable();
 316        rc = __get_user_inatomic(*ret, ptr);
 317        pagefault_enable();
 318
 319        return rc;
 320}
 321
 322static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
 323                                          struct pt_regs *regs)
 324{
 325}
 326
 327static inline int current_is_64bit(void)
 328{
 329        return 0;
 330}
 331
 332static inline int valid_user_sp(unsigned long sp, int is_64)
 333{
 334        if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
 335                return 0;
 336        return 1;
 337}
 338
 339#define __SIGNAL_FRAMESIZE32    __SIGNAL_FRAMESIZE
 340#define sigcontext32            sigcontext
 341#define mcontext32              mcontext
 342#define ucontext32              ucontext
 343#define compat_siginfo_t        struct siginfo
 344
 345#endif /* CONFIG_PPC64 */
 346
 347/*
 348 * Layout for non-RT signal frames
 349 */
 350struct signal_frame_32 {
 351        char                    dummy[__SIGNAL_FRAMESIZE32];
 352        struct sigcontext32     sctx;
 353        struct mcontext32       mctx;
 354        int                     abigap[56];
 355};
 356
 357/*
 358 * Layout for RT signal frames
 359 */
 360struct rt_signal_frame_32 {
 361        char                    dummy[__SIGNAL_FRAMESIZE32 + 16];
 362        compat_siginfo_t        info;
 363        struct ucontext32       uc;
 364        int                     abigap[56];
 365};
 366
 367static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
 368{
 369        if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
 370                return 1;
 371        if (vdso32_sigtramp && current->mm->context.vdso_base &&
 372            nip == current->mm->context.vdso_base + vdso32_sigtramp)
 373                return 1;
 374        return 0;
 375}
 376
 377static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
 378{
 379        if (nip == fp + offsetof(struct rt_signal_frame_32,
 380                                 uc.uc_mcontext.mc_pad))
 381                return 1;
 382        if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
 383            nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
 384                return 1;
 385        return 0;
 386}
 387
 388static int sane_signal_32_frame(unsigned int sp)
 389{
 390        struct signal_frame_32 __user *sf;
 391        unsigned int regs;
 392
 393        sf = (struct signal_frame_32 __user *) (unsigned long) sp;
 394        if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
 395                return 0;
 396        return regs == (unsigned long) &sf->mctx;
 397}
 398
 399static int sane_rt_signal_32_frame(unsigned int sp)
 400{
 401        struct rt_signal_frame_32 __user *sf;
 402        unsigned int regs;
 403
 404        sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
 405        if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
 406                return 0;
 407        return regs == (unsigned long) &sf->uc.uc_mcontext;
 408}
 409
 410static unsigned int __user *signal_frame_32_regs(unsigned int sp,
 411                                unsigned int next_sp, unsigned int next_ip)
 412{
 413        struct mcontext32 __user *mctx = NULL;
 414        struct signal_frame_32 __user *sf;
 415        struct rt_signal_frame_32 __user *rt_sf;
 416
 417        /*
 418         * Note: the next_sp - sp >= signal frame size check
 419         * is true when next_sp < sp, for example, when
 420         * transitioning from an alternate signal stack to the
 421         * normal stack.
 422         */
 423        if (next_sp - sp >= sizeof(struct signal_frame_32) &&
 424            is_sigreturn_32_address(next_ip, sp) &&
 425            sane_signal_32_frame(sp)) {
 426                sf = (struct signal_frame_32 __user *) (unsigned long) sp;
 427                mctx = &sf->mctx;
 428        }
 429
 430        if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
 431            is_rt_sigreturn_32_address(next_ip, sp) &&
 432            sane_rt_signal_32_frame(sp)) {
 433                rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
 434                mctx = &rt_sf->uc.uc_mcontext;
 435        }
 436
 437        if (!mctx)
 438                return NULL;
 439        return mctx->mc_gregs;
 440}
 441
 442static void perf_callchain_user_32(struct perf_callchain_entry *entry,
 443                                   struct pt_regs *regs)
 444{
 445        unsigned int sp, next_sp;
 446        unsigned int next_ip;
 447        unsigned int lr;
 448        long level = 0;
 449        unsigned int __user *fp, *uregs;
 450
 451        next_ip = perf_instruction_pointer(regs);
 452        lr = regs->link;
 453        sp = regs->gpr[1];
 454        perf_callchain_store(entry, next_ip);
 455
 456        while (entry->nr < PERF_MAX_STACK_DEPTH) {
 457                fp = (unsigned int __user *) (unsigned long) sp;
 458                if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
 459                        return;
 460                if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
 461                        return;
 462
 463                uregs = signal_frame_32_regs(sp, next_sp, next_ip);
 464                if (!uregs && level <= 1)
 465                        uregs = signal_frame_32_regs(sp, next_sp, lr);
 466                if (uregs) {
 467                        /*
 468                         * This looks like an signal frame, so restart
 469                         * the stack trace with the values in it.
 470                         */
 471                        if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
 472                            read_user_stack_32(&uregs[PT_LNK], &lr) ||
 473                            read_user_stack_32(&uregs[PT_R1], &sp))
 474                                return;
 475                        level = 0;
 476                        perf_callchain_store(entry, PERF_CONTEXT_USER);
 477                        perf_callchain_store(entry, next_ip);
 478                        continue;
 479                }
 480
 481                if (level == 0)
 482                        next_ip = lr;
 483                perf_callchain_store(entry, next_ip);
 484                ++level;
 485                sp = next_sp;
 486        }
 487}
 488
 489void
 490perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 491{
 492        if (current_is_64bit())
 493                perf_callchain_user_64(entry, regs);
 494        else
 495                perf_callchain_user_32(entry, regs);
 496}
 497