linux/arch/x86/kernel/dumpstack.c
<<
>>
Prefs
   1/*
   2 *  Copyright (C) 1991, 1992  Linus Torvalds
   3 *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
   4 */
   5#include <linux/kallsyms.h>
   6#include <linux/kprobes.h>
   7#include <linux/uaccess.h>
   8#include <linux/utsname.h>
   9#include <linux/hardirq.h>
  10#include <linux/kdebug.h>
  11#include <linux/module.h>
  12#include <linux/ptrace.h>
  13#include <linux/sched/debug.h>
  14#include <linux/sched/task_stack.h>
  15#include <linux/ftrace.h>
  16#include <linux/kexec.h>
  17#include <linux/bug.h>
  18#include <linux/nmi.h>
  19#include <linux/sysfs.h>
  20
  21#include <asm/stacktrace.h>
  22#include <asm/unwind.h>
  23
  24int panic_on_unrecovered_nmi;
  25int panic_on_io_nmi;
  26unsigned int code_bytes = 64;
  27static int die_counter;
  28
  29bool in_task_stack(unsigned long *stack, struct task_struct *task,
  30                   struct stack_info *info)
  31{
  32        unsigned long *begin = task_stack_page(task);
  33        unsigned long *end   = task_stack_page(task) + THREAD_SIZE;
  34
  35        if (stack < begin || stack >= end)
  36                return false;
  37
  38        info->type      = STACK_TYPE_TASK;
  39        info->begin     = begin;
  40        info->end       = end;
  41        info->next_sp   = NULL;
  42
  43        return true;
  44}
  45
  46static void printk_stack_address(unsigned long address, int reliable,
  47                                 char *log_lvl)
  48{
  49        touch_nmi_watchdog();
  50        printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
  51}
  52
  53void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
  54                        unsigned long *stack, char *log_lvl)
  55{
  56        struct unwind_state state;
  57        struct stack_info stack_info = {0};
  58        unsigned long visit_mask = 0;
  59        int graph_idx = 0;
  60
  61        printk("%sCall Trace:\n", log_lvl);
  62
  63        unwind_start(&state, task, regs, stack);
  64        stack = stack ? : get_stack_pointer(task, regs);
  65
  66        /*
  67         * Iterate through the stacks, starting with the current stack pointer.
  68         * Each stack has a pointer to the next one.
  69         *
  70         * x86-64 can have several stacks:
  71         * - task stack
  72         * - interrupt stack
  73         * - HW exception stacks (double fault, nmi, debug, mce)
  74         *
  75         * x86-32 can have up to three stacks:
  76         * - task stack
  77         * - softirq stack
  78         * - hardirq stack
  79         */
  80        for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
  81                const char *stack_name;
  82
  83                /*
  84                 * If we overflowed the task stack into a guard page, jump back
  85                 * to the bottom of the usable stack.
  86                 */
  87                if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
  88                        stack = task_stack_page(task);
  89
  90                if (get_stack_info(stack, task, &stack_info, &visit_mask))
  91                        break;
  92
  93                stack_name = stack_type_name(stack_info.type);
  94                if (stack_name)
  95                        printk("%s <%s>\n", log_lvl, stack_name);
  96
  97                if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
  98                        __show_regs(regs, 0);
  99
 100                /*
 101                 * Scan the stack, printing any text addresses we find.  At the
 102                 * same time, follow proper stack frames with the unwinder.
 103                 *
 104                 * Addresses found during the scan which are not reported by
 105                 * the unwinder are considered to be additional clues which are
 106                 * sometimes useful for debugging and are prefixed with '?'.
 107                 * This also serves as a failsafe option in case the unwinder
 108                 * goes off in the weeds.
 109                 */
 110                for (; stack < stack_info.end; stack++) {
 111                        unsigned long real_addr;
 112                        int reliable = 0;
 113                        unsigned long addr = READ_ONCE_NOCHECK(*stack);
 114                        unsigned long *ret_addr_p =
 115                                unwind_get_return_address_ptr(&state);
 116
 117                        if (!__kernel_text_address(addr))
 118                                continue;
 119
 120                        /*
 121                         * Don't print regs->ip again if it was already printed
 122                         * by __show_regs() below.
 123                         */
 124                        if (regs && stack == &regs->ip)
 125                                goto next;
 126
 127                        if (stack == ret_addr_p)
 128                                reliable = 1;
 129
 130                        /*
 131                         * When function graph tracing is enabled for a
 132                         * function, its return address on the stack is
 133                         * replaced with the address of an ftrace handler
 134                         * (return_to_handler).  In that case, before printing
 135                         * the "real" address, we want to print the handler
 136                         * address as an "unreliable" hint that function graph
 137                         * tracing was involved.
 138                         */
 139                        real_addr = ftrace_graph_ret_addr(task, &graph_idx,
 140                                                          addr, stack);
 141                        if (real_addr != addr)
 142                                printk_stack_address(addr, 0, log_lvl);
 143                        printk_stack_address(real_addr, reliable, log_lvl);
 144
 145                        if (!reliable)
 146                                continue;
 147
 148next:
 149                        /*
 150                         * Get the next frame from the unwinder.  No need to
 151                         * check for an error: if anything goes wrong, the rest
 152                         * of the addresses will just be printed as unreliable.
 153                         */
 154                        unwind_next_frame(&state);
 155
 156                        /* if the frame has entry regs, print them */
 157                        regs = unwind_get_entry_regs(&state);
 158                        if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
 159                                __show_regs(regs, 0);
 160                }
 161
 162                if (stack_name)
 163                        printk("%s </%s>\n", log_lvl, stack_name);
 164        }
 165}
 166
 167void show_stack(struct task_struct *task, unsigned long *sp)
 168{
 169        task = task ? : current;
 170
 171        /*
 172         * Stack frames below this one aren't interesting.  Don't show them
 173         * if we're printing for %current.
 174         */
 175        if (!sp && task == current)
 176                sp = get_stack_pointer(current, NULL);
 177
 178        show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT);
 179}
 180
 181void show_stack_regs(struct pt_regs *regs)
 182{
 183        show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
 184}
 185
 186static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 187static int die_owner = -1;
 188static unsigned int die_nest_count;
 189
 190unsigned long oops_begin(void)
 191{
 192        int cpu;
 193        unsigned long flags;
 194
 195        oops_enter();
 196
 197        /* racy, but better than risking deadlock. */
 198        raw_local_irq_save(flags);
 199        cpu = smp_processor_id();
 200        if (!arch_spin_trylock(&die_lock)) {
 201                if (cpu == die_owner)
 202                        /* nested oops. should stop eventually */;
 203                else
 204                        arch_spin_lock(&die_lock);
 205        }
 206        die_nest_count++;
 207        die_owner = cpu;
 208        console_verbose();
 209        bust_spinlocks(1);
 210        return flags;
 211}
 212EXPORT_SYMBOL_GPL(oops_begin);
 213NOKPROBE_SYMBOL(oops_begin);
 214
 215void __noreturn rewind_stack_do_exit(int signr);
 216
 217void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 218{
 219        if (regs && kexec_should_crash(current))
 220                crash_kexec(regs);
 221
 222        bust_spinlocks(0);
 223        die_owner = -1;
 224        add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
 225        die_nest_count--;
 226        if (!die_nest_count)
 227                /* Nest count reaches zero, release the lock. */
 228                arch_spin_unlock(&die_lock);
 229        raw_local_irq_restore(flags);
 230        oops_exit();
 231
 232        if (!signr)
 233                return;
 234        if (in_interrupt())
 235                panic("Fatal exception in interrupt");
 236        if (panic_on_oops)
 237                panic("Fatal exception");
 238
 239        /*
 240         * We're not going to return, but we might be on an IST stack or
 241         * have very little stack space left.  Rewind the stack and kill
 242         * the task.
 243         */
 244        rewind_stack_do_exit(signr);
 245}
 246NOKPROBE_SYMBOL(oops_end);
 247
 248int __die(const char *str, struct pt_regs *regs, long err)
 249{
 250#ifdef CONFIG_X86_32
 251        unsigned short ss;
 252        unsigned long sp;
 253#endif
 254        printk(KERN_DEFAULT
 255               "%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
 256               IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT"         : "",
 257               IS_ENABLED(CONFIG_SMP)     ? " SMP"             : "",
 258               debug_pagealloc_enabled()  ? " DEBUG_PAGEALLOC" : "",
 259               IS_ENABLED(CONFIG_KASAN)   ? " KASAN"           : "");
 260
 261        if (notify_die(DIE_OOPS, str, regs, err,
 262                        current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
 263                return 1;
 264
 265        print_modules();
 266        show_regs(regs);
 267#ifdef CONFIG_X86_32
 268        if (user_mode(regs)) {
 269                sp = regs->sp;
 270                ss = regs->ss;
 271        } else {
 272                sp = kernel_stack_pointer(regs);
 273                savesegment(ss, ss);
 274        }
 275        printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
 276               (void *)regs->ip, ss, sp);
 277#else
 278        /* Executive summary in case the oops scrolled away */
 279        printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
 280#endif
 281        return 0;
 282}
 283NOKPROBE_SYMBOL(__die);
 284
 285/*
 286 * This is gone through when something in the kernel has done something bad
 287 * and is about to be terminated:
 288 */
 289void die(const char *str, struct pt_regs *regs, long err)
 290{
 291        unsigned long flags = oops_begin();
 292        int sig = SIGSEGV;
 293
 294        if (__die(str, regs, err))
 295                sig = 0;
 296        oops_end(flags, regs, sig);
 297}
 298
 299static int __init code_bytes_setup(char *s)
 300{
 301        ssize_t ret;
 302        unsigned long val;
 303
 304        if (!s)
 305                return -EINVAL;
 306
 307        ret = kstrtoul(s, 0, &val);
 308        if (ret)
 309                return ret;
 310
 311        code_bytes = val;
 312        if (code_bytes > 8192)
 313                code_bytes = 8192;
 314
 315        return 1;
 316}
 317__setup("code_bytes=", code_bytes_setup);
 318