linux/arch/x86/kernel/dumpstack.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 1991, 1992  Linus Torvalds
   3 *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
   4 */
   5#include <linux/kallsyms.h>
   6#include <linux/kprobes.h>
   7#include <linux/uaccess.h>
   8#include <linux/utsname.h>
   9#include <linux/hardirq.h>
  10#include <linux/kdebug.h>
  11#include <linux/module.h>
  12#include <linux/ptrace.h>
  13#include <linux/sched/debug.h>
  14#include <linux/sched/task_stack.h>
  15#include <linux/ftrace.h>
  16#include <linux/kexec.h>
  17#include <linux/bug.h>
  18#include <linux/nmi.h>
  19#include <linux/sysfs.h>
  20
  21#include <asm/stacktrace.h>
  22#include <asm/unwind.h>
  23
  24int panic_on_unrecovered_nmi;
  25int panic_on_io_nmi;
  26unsigned int code_bytes = 64;
  27static int die_counter;
  28
  29bool in_task_stack(unsigned long *stack, struct task_struct *task,
  30                   struct stack_info *info)
  31{
  32        unsigned long *begin = task_stack_page(task);
  33        unsigned long *end   = task_stack_page(task) + THREAD_SIZE;
  34
  35        if (stack < begin || stack >= end)
  36                return false;
  37
  38        info->type      = STACK_TYPE_TASK;
  39        info->begin     = begin;
  40        info->end       = end;
  41        info->next_sp   = NULL;
  42
  43        return true;
  44}
  45
  46static void printk_stack_address(unsigned long address, int reliable,
  47                                 char *log_lvl)
  48{
  49        touch_nmi_watchdog();
  50        printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
  51}
  52
  53void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
  54                        unsigned long *stack, char *log_lvl)
  55{
  56        struct unwind_state state;
  57        struct stack_info stack_info = {0};
  58        unsigned long visit_mask = 0;
  59        int graph_idx = 0;
  60
  61        printk("%sCall Trace:\n", log_lvl);
  62
  63        unwind_start(&state, task, regs, stack);
  64        stack = stack ? : get_stack_pointer(task, regs);
  65
  66        /*
  67         * Iterate through the stacks, starting with the current stack pointer.
  68         * Each stack has a pointer to the next one.
  69         *
  70         * x86-64 can have several stacks:
  71         * - task stack
  72         * - interrupt stack
  73         * - HW exception stacks (double fault, nmi, debug, mce)
  74         *
  75         * x86-32 can have up to three stacks:
  76         * - task stack
  77         * - softirq stack
  78         * - hardirq stack
  79         */
  80        for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
  81                const char *stack_name;
  82
  83                /*
  84                 * If we overflowed the task stack into a guard page, jump back
  85                 * to the bottom of the usable stack.
  86                 */
  87                if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
  88                        stack = task_stack_page(task);
  89
  90                if (get_stack_info(stack, task, &stack_info, &visit_mask))
  91                        break;
  92
  93                stack_name = stack_type_name(stack_info.type);
  94                if (stack_name)
  95                        printk("%s <%s>\n", log_lvl, stack_name);
  96
  97                /*
  98                 * Scan the stack, printing any text addresses we find.  At the
  99                 * same time, follow proper stack frames with the unwinder.
 100                 *
 101                 * Addresses found during the scan which are not reported by
 102                 * the unwinder are considered to be additional clues which are
 103                 * sometimes useful for debugging and are prefixed with '?'.
 104                 * This also serves as a failsafe option in case the unwinder
 105                 * goes off in the weeds.
 106                 */
 107                for (; stack < stack_info.end; stack++) {
 108                        unsigned long real_addr;
 109                        int reliable = 0;
 110                        unsigned long addr = READ_ONCE_NOCHECK(*stack);
 111                        unsigned long *ret_addr_p =
 112                                unwind_get_return_address_ptr(&state);
 113
 114                        if (!__kernel_text_address(addr))
 115                                continue;
 116
 117                        /*
 118                         * Don't print regs->ip again if it was already printed
 119                         * by __show_regs() below.
 120                         */
 121                        if (regs && stack == &regs->ip) {
 122                                unwind_next_frame(&state);
 123                                continue;
 124                        }
 125
 126                        if (stack == ret_addr_p)
 127                                reliable = 1;
 128
 129                        /*
 130                         * When function graph tracing is enabled for a
 131                         * function, its return address on the stack is
 132                         * replaced with the address of an ftrace handler
 133                         * (return_to_handler).  In that case, before printing
 134                         * the "real" address, we want to print the handler
 135                         * address as an "unreliable" hint that function graph
 136                         * tracing was involved.
 137                         */
 138                        real_addr = ftrace_graph_ret_addr(task, &graph_idx,
 139                                                          addr, stack);
 140                        if (real_addr != addr)
 141                                printk_stack_address(addr, 0, log_lvl);
 142                        printk_stack_address(real_addr, reliable, log_lvl);
 143
 144                        if (!reliable)
 145                                continue;
 146
 147                        /*
 148                         * Get the next frame from the unwinder.  No need to
 149                         * check for an error: if anything goes wrong, the rest
 150                         * of the addresses will just be printed as unreliable.
 151                         */
 152                        unwind_next_frame(&state);
 153
 154                        /* if the frame has entry regs, print them */
 155                        regs = unwind_get_entry_regs(&state);
 156                        if (regs)
 157                                __show_regs(regs, 0);
 158                }
 159
 160                if (stack_name)
 161                        printk("%s </%s>\n", log_lvl, stack_name);
 162        }
 163}
 164
 165void show_stack(struct task_struct *task, unsigned long *sp)
 166{
 167        task = task ? : current;
 168
 169        /*
 170         * Stack frames below this one aren't interesting.  Don't show them
 171         * if we're printing for %current.
 172         */
 173        if (!sp && task == current)
 174                sp = get_stack_pointer(current, NULL);
 175
 176        show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT);
 177}
 178
 179void show_stack_regs(struct pt_regs *regs)
 180{
 181        show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
 182}
 183
 184static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 185static int die_owner = -1;
 186static unsigned int die_nest_count;
 187
 188unsigned long oops_begin(void)
 189{
 190        int cpu;
 191        unsigned long flags;
 192
 193        oops_enter();
 194
 195        /* racy, but better than risking deadlock. */
 196        raw_local_irq_save(flags);
 197        cpu = smp_processor_id();
 198        if (!arch_spin_trylock(&die_lock)) {
 199                if (cpu == die_owner)
 200                        /* nested oops. should stop eventually */;
 201                else
 202                        arch_spin_lock(&die_lock);
 203        }
 204        die_nest_count++;
 205        die_owner = cpu;
 206        console_verbose();
 207        bust_spinlocks(1);
 208        return flags;
 209}
 210EXPORT_SYMBOL_GPL(oops_begin);
 211NOKPROBE_SYMBOL(oops_begin);
 212
 213void __noreturn rewind_stack_do_exit(int signr);
 214
 215void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 216{
 217        if (regs && kexec_should_crash(current))
 218                crash_kexec(regs);
 219
 220        bust_spinlocks(0);
 221        die_owner = -1;
 222        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 223        die_nest_count--;
 224        if (!die_nest_count)
 225                /* Nest count reaches zero, release the lock. */
 226                arch_spin_unlock(&die_lock);
 227        raw_local_irq_restore(flags);
 228        oops_exit();
 229
 230        if (!signr)
 231                return;
 232        if (in_interrupt())
 233                panic("Fatal exception in interrupt");
 234        if (panic_on_oops)
 235                panic("Fatal exception");
 236
 237        /*
 238         * We're not going to return, but we might be on an IST stack or
 239         * have very little stack space left.  Rewind the stack and kill
 240         * the task.
 241         */
 242        rewind_stack_do_exit(signr);
 243}
 244NOKPROBE_SYMBOL(oops_end);
 245
 246int __die(const char *str, struct pt_regs *regs, long err)
 247{
 248#ifdef CONFIG_X86_32
 249        unsigned short ss;
 250        unsigned long sp;
 251#endif
 252        printk(KERN_DEFAULT
 253               "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
 254               IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
 255               IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
 256               debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
 257               IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "");
 258
 259        if (notify_die(DIE_OOPS, str, regs, err,
 260                        current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
 261                return 1;
 262
 263        print_modules();
 264        show_regs(regs);
 265#ifdef CONFIG_X86_32
 266        if (user_mode(regs)) {
 267                sp = regs->sp;
 268                ss = regs->ss & 0xffff;
 269        } else {
 270                sp = kernel_stack_pointer(regs);
 271                savesegment(ss, ss);
 272        }
 273        printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
 274               (void *)regs->ip, ss, sp);
 275#else
 276        /* Executive summary in case the oops scrolled away */
 277        printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
 278#endif
 279        return 0;
 280}
 281NOKPROBE_SYMBOL(__die);
 282
 283/*
 284 * This is gone through when something in the kernel has done something bad
 285 * and is about to be terminated:
 286 */
 287void die(const char *str, struct pt_regs *regs, long err)
 288{
 289        unsigned long flags = oops_begin();
 290        int sig = SIGSEGV;
 291
 292        if (__die(str, regs, err))
 293                sig = 0;
 294        oops_end(flags, regs, sig);
 295}
 296
 297static int __init code_bytes_setup(char *s)
 298{
 299        ssize_t ret;
 300        unsigned long val;
 301
 302        if (!s)
 303                return -EINVAL;
 304
 305        ret = kstrtoul(s, 0, &val);
 306        if (ret)
 307                return ret;
 308
 309        code_bytes = val;
 310        if (code_bytes > 8192)
 311                code_bytes = 8192;
 312
 313        return 1;
 314}
 315__setup("code_bytes=", code_bytes_setup);
 316