LXR linux/kernel/panic.c

   1/*
   2 *  linux/kernel/panic.c
   3 *
   4 *  Copyright (C) 1991, 1992  Linus Torvalds
   5 */
   6
   7/*
   8 * This function is used through-out the kernel (including mm and fs)
   9 * to indicate a major problem.
  10 */
  11#include <linux/debug_locks.h>
  12#include <linux/sched/debug.h>
  13#include <linux/interrupt.h>
  14#include <linux/kmsg_dump.h>
  15#include <linux/kallsyms.h>
  16#include <linux/notifier.h>
  17#include <linux/module.h>
  18#include <linux/random.h>
  19#include <linux/ftrace.h>
  20#include <linux/reboot.h>
  21#include <linux/delay.h>
  22#include <linux/kexec.h>
  23#include <linux/sched.h>
  24#include <linux/sysrq.h>
  25#include <linux/init.h>
  26#include <linux/nmi.h>
  27#include <linux/console.h>
  28#include <linux/bug.h>
  29#include <linux/ratelimit.h>
  30#include <linux/debugfs.h>
  31#include <asm/sections.h>
  32
  33#define PANIC_TIMER_STEP 100
  34#define PANIC_BLINK_SPD 18
  35
  36int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
  37static unsigned long tainted_mask;
  38static int pause_on_oops;
  39static int pause_on_oops_flag;
  40static DEFINE_SPINLOCK(pause_on_oops_lock);
  41bool crash_kexec_post_notifiers;
  42int panic_on_warn __read_mostly;
  43
  44int panic_timeout = CONFIG_PANIC_TIMEOUT;
  45EXPORT_SYMBOL_GPL(panic_timeout);
  46
  47ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
  48
  49EXPORT_SYMBOL(panic_notifier_list);
  50
  51static long no_blink(int state)
  52{
  53        return 0;
  54}
  55
  56/* Returns how long it waited in ms */
  57long (*panic_blink)(int state);
  58EXPORT_SYMBOL(panic_blink);
  59
  60/*
  61 * Stop ourself in panic -- architecture code may override this
  62 */
  63void __weak panic_smp_self_stop(void)
  64{
  65        while (1)
  66                cpu_relax();
  67}
  68
  69/*
  70 * Stop ourselves in NMI context if another CPU has already panicked. Arch code
  71 * may override this to prepare for crash dumping, e.g. save regs info.
  72 */
  73void __weak nmi_panic_self_stop(struct pt_regs *regs)
  74{
  75        panic_smp_self_stop();
  76}
  77
  78/*
  79 * Stop other CPUs in panic.  Architecture dependent code may override this
  80 * with more suitable version.  For example, if the architecture supports
  81 * crash dump, it should save registers of each stopped CPU and disable
  82 * per-CPU features such as virtualization extensions.
  83 */
  84void __weak crash_smp_send_stop(void)
  85{
  86        static int cpus_stopped;
  87
  88        /*
  89         * This function can be called twice in panic path, but obviously
  90         * we execute this only once.
  91         */
  92        if (cpus_stopped)
  93                return;
  94
  95        /*
  96         * Note smp_send_stop is the usual smp shutdown function, which
  97         * unfortunately means it may not be hardened to work in a panic
  98         * situation.
  99         */
 100        smp_send_stop();
 101        cpus_stopped = 1;
 102}
 103
 104atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
 105
 106/*
 107 * A variant of panic() called from NMI context. We return if we've already
 108 * panicked on this CPU. If another CPU already panicked, loop in
 109 * nmi_panic_self_stop() which can provide architecture dependent code such
 110 * as saving register state for crash dump.
 111 */
 112void nmi_panic(struct pt_regs *regs, const char *msg)
 113{
 114        int old_cpu, cpu;
 115
 116        cpu = raw_smp_processor_id();
 117        old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu);
 118
 119        if (old_cpu == PANIC_CPU_INVALID)
 120                panic("%s", msg);
 121        else if (old_cpu != cpu)
 122                nmi_panic_self_stop(regs);
 123}
 124EXPORT_SYMBOL(nmi_panic);
 125
 126/**
 127 *      panic - halt the system
 128 *      @fmt: The text string to print
 129 *
 130 *      Display a message, then perform cleanups.
 131 *
 132 *      This function never returns.
 133 */
 134void panic(const char *fmt, ...)
 135{
 136        static char buf[1024];
 137        va_list args;
 138        long i, i_next = 0;
 139        int state = 0;
 140        int old_cpu, this_cpu;
 141        bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
 142
 143        /*
 144         * Disable local interrupts. This will prevent panic_smp_self_stop
 145         * from deadlocking the first cpu that invokes the panic, since
 146         * there is nothing to prevent an interrupt handler (that runs
 147         * after setting panic_cpu) from invoking panic() again.
 148         */
 149        local_irq_disable();
 150
 151        /*
 152         * It's possible to come here directly from a panic-assertion and
 153         * not have preempt disabled. Some functions called from here want
 154         * preempt to be disabled. No point enabling it later though...
 155         *
 156         * Only one CPU is allowed to execute the panic code from here. For
 157         * multiple parallel invocations of panic, all other CPUs either
 158         * stop themself or will wait until they are stopped by the 1st CPU
 159         * with smp_send_stop().
 160         *
 161         * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which
 162         * comes here, so go ahead.
 163         * `old_cpu == this_cpu' means we came from nmi_panic() which sets
 164         * panic_cpu to this CPU.  In this case, this is also the 1st CPU.
 165         */
 166        this_cpu = raw_smp_processor_id();
 167        old_cpu  = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
 168
 169        if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu)
 170                panic_smp_self_stop();
 171
 172        console_verbose();
 173        bust_spinlocks(1);
 174        va_start(args, fmt);
 175        vsnprintf(buf, sizeof(buf), fmt, args);
 176        va_end(args);
 177        pr_emerg("Kernel panic - not syncing: %s\n", buf);
 178#ifdef CONFIG_DEBUG_BUGVERBOSE
 179        /*
 180         * Avoid nested stack-dumping if a panic occurs during oops processing
 181         */
 182        if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
 183                dump_stack();
 184#endif
 185
 186        /*
 187         * If we have crashed and we have a crash kernel loaded let it handle
 188         * everything else.
 189         * If we want to run this after calling panic_notifiers, pass
 190         * the "crash_kexec_post_notifiers" option to the kernel.
 191         *
 192         * Bypass the panic_cpu check and call __crash_kexec directly.
 193         */
 194        if (!_crash_kexec_post_notifiers) {
 195                printk_safe_flush_on_panic();
 196                __crash_kexec(NULL);
 197
 198                /*
 199                 * Note smp_send_stop is the usual smp shutdown function, which
 200                 * unfortunately means it may not be hardened to work in a
 201                 * panic situation.
 202                 */
 203                smp_send_stop();
 204        } else {
 205                /*
 206                 * If we want to do crash dump after notifier calls and
 207                 * kmsg_dump, we will need architecture dependent extra
 208                 * works in addition to stopping other CPUs.
 209                 */
 210                crash_smp_send_stop();
 211        }
 212
 213        /*
 214         * Run any panic handlers, including those that might need to
 215         * add information to the kmsg dump output.
 216         */
 217        atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
 218
 219        /* Call flush even twice. It tries harder with a single online CPU */
 220        printk_safe_flush_on_panic();
 221        kmsg_dump(KMSG_DUMP_PANIC);
 222
 223        /*
 224         * If you doubt kdump always works fine in any situation,
 225         * "crash_kexec_post_notifiers" offers you a chance to run
 226         * panic_notifiers and dumping kmsg before kdump.
 227         * Note: since some panic_notifiers can make crashed kernel
 228         * more unstable, it can increase risks of the kdump failure too.
 229         *
 230         * Bypass the panic_cpu check and call __crash_kexec directly.
 231         */
 232        if (_crash_kexec_post_notifiers)
 233                __crash_kexec(NULL);
 234
 235        bust_spinlocks(0);
 236
 237        /*
 238         * We may have ended up stopping the CPU holding the lock (in
 239         * smp_send_stop()) while still having some valuable data in the console
 240         * buffer.  Try to acquire the lock then release it regardless of the
 241         * result.  The release will also print the buffers out.  Locks debug
 242         * should be disabled to avoid reporting bad unlock balance when
 243         * panic() is not being callled from OOPS.
 244         */
 245        debug_locks_off();
 246        console_flush_on_panic();
 247
 248        if (!panic_blink)
 249                panic_blink = no_blink;
 250
 251        if (panic_timeout > 0) {
 252                /*
 253                 * Delay timeout seconds before rebooting the machine.
 254                 * We can't use the "normal" timers since we just panicked.
 255                 */
 256                pr_emerg("Rebooting in %d seconds..\n", panic_timeout);
 257
 258                for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) {
 259                        touch_nmi_watchdog();
 260                        if (i >= i_next) {
 261                                i += panic_blink(state ^= 1);
 262                                i_next = i + 3600 / PANIC_BLINK_SPD;
 263                        }
 264                        mdelay(PANIC_TIMER_STEP);
 265                }
 266        }
 267        if (panic_timeout != 0) {
 268                /*
 269                 * This will not be a clean reboot, with everything
 270                 * shutting down.  But if there is a chance of
 271                 * rebooting the system it will be rebooted.
 272                 */
 273                emergency_restart();
 274        }
 275#ifdef __sparc__
 276        {
 277                extern int stop_a_enabled;
 278                /* Make sure the user can actually press Stop-A (L1-A) */
 279                stop_a_enabled = 1;
 280                pr_emerg("Press Stop-A (L1-A) from sun keyboard or send break\n"
 281                         "twice on console to return to the boot prom\n");
 282        }
 283#endif
 284#if defined(CONFIG_S390)
 285        {
 286                unsigned long caller;
 287
 288                caller = (unsigned long)__builtin_return_address(0);
 289                disabled_wait(caller);
 290        }
 291#endif
 292        pr_emerg("---[ end Kernel panic - not syncing: %s\n", buf);
 293        local_irq_enable();
 294        for (i = 0; ; i += PANIC_TIMER_STEP) {
 295                touch_softlockup_watchdog();
 296                if (i >= i_next) {
 297                        i += panic_blink(state ^= 1);
 298                        i_next = i + 3600 / PANIC_BLINK_SPD;
 299                }
 300                mdelay(PANIC_TIMER_STEP);
 301        }
 302}
 303
 304EXPORT_SYMBOL(panic);
 305
 306/*
 307 * TAINT_FORCED_RMMOD could be a per-module flag but the module
 308 * is being removed anyway.
 309 */
 310const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = {
 311        { 'P', 'G', true },     /* TAINT_PROPRIETARY_MODULE */
 312        { 'F', ' ', true },     /* TAINT_FORCED_MODULE */
 313        { 'S', ' ', false },    /* TAINT_CPU_OUT_OF_SPEC */
 314        { 'R', ' ', false },    /* TAINT_FORCED_RMMOD */
 315        { 'M', ' ', false },    /* TAINT_MACHINE_CHECK */
 316        { 'B', ' ', false },    /* TAINT_BAD_PAGE */
 317        { 'U', ' ', false },    /* TAINT_USER */
 318        { 'D', ' ', false },    /* TAINT_DIE */
 319        { 'A', ' ', false },    /* TAINT_OVERRIDDEN_ACPI_TABLE */
 320        { 'W', ' ', false },    /* TAINT_WARN */
 321        { 'C', ' ', true },     /* TAINT_CRAP */
 322        { 'I', ' ', false },    /* TAINT_FIRMWARE_WORKAROUND */
 323        { 'O', ' ', true },     /* TAINT_OOT_MODULE */
 324        { 'E', ' ', true },     /* TAINT_UNSIGNED_MODULE */
 325        { 'L', ' ', false },    /* TAINT_SOFTLOCKUP */
 326        { 'K', ' ', true },     /* TAINT_LIVEPATCH */
 327        { 'X', ' ', true },     /* TAINT_AUX */
 328};
 329
 330/**
 331 *      print_tainted - return a string to represent the kernel taint state.
 332 *
 333 *  'P' - Proprietary module has been loaded.
 334 *  'F' - Module has been forcibly loaded.
 335 *  'S' - SMP with CPUs not designed for SMP.
 336 *  'R' - User forced a module unload.
 337 *  'M' - System experienced a machine check exception.
 338 *  'B' - System has hit bad_page.
 339 *  'U' - Userspace-defined naughtiness.
 340 *  'D' - Kernel has oopsed before
 341 *  'A' - ACPI table overridden.
 342 *  'W' - Taint on warning.
 343 *  'C' - modules from drivers/staging are loaded.
 344 *  'I' - Working around severe firmware bug.
 345 *  'O' - Out-of-tree module has been loaded.
 346 *  'E' - Unsigned module has been loaded.
 347 *  'L' - A soft lockup has previously occurred.
 348 *  'K' - Kernel has been live patched.
 349 *  'X' - Auxiliary taint, for distros' use.
 350 *
 351 *      The string is overwritten by the next call to print_tainted().
 352 */
 353const char *print_tainted(void)
 354{
 355        static char buf[TAINT_FLAGS_COUNT + sizeof("Tainted: ")];
 356
 357        if (tainted_mask) {
 358                char *s;
 359                int i;
 360
 361                s = buf + sprintf(buf, "Tainted: ");
 362                for (i = 0; i < TAINT_FLAGS_COUNT; i++) {
 363                        const struct taint_flag *t = &taint_flags[i];
 364                        *s++ = test_bit(i, &tainted_mask) ?
 365                                        t->c_true : t->c_false;
 366                }
 367                *s = 0;
 368        } else
 369                snprintf(buf, sizeof(buf), "Not tainted");
 370
 371        return buf;
 372}
 373
 374int test_taint(unsigned flag)
 375{
 376        return test_bit(flag, &tainted_mask);
 377}
 378EXPORT_SYMBOL(test_taint);
 379
 380unsigned long get_taint(void)
 381{
 382        return tainted_mask;
 383}
 384
 385/**
 386 * add_taint: add a taint flag if not already set.
 387 * @flag: one of the TAINT_* constants.
 388 * @lockdep_ok: whether lock debugging is still OK.
 389 *
 390 * If something bad has gone wrong, you'll want @lockdebug_ok = false, but for
 391 * some notewortht-but-not-corrupting cases, it can be set to true.
 392 */
 393void add_taint(unsigned flag, enum lockdep_ok lockdep_ok)
 394{
 395        if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off())
 396                pr_warn("Disabling lock debugging due to kernel taint\n");
 397
 398        set_bit(flag, &tainted_mask);
 399}
 400EXPORT_SYMBOL(add_taint);
 401
 402static void spin_msec(int msecs)
 403{
 404        int i;
 405
 406        for (i = 0; i < msecs; i++) {
 407                touch_nmi_watchdog();
 408                mdelay(1);
 409        }
 410}
 411
 412/*
 413 * It just happens that oops_enter() and oops_exit() are identically
 414 * implemented...
 415 */
 416static void do_oops_enter_exit(void)
 417{
 418        unsigned long flags;
 419        static int spin_counter;
 420
 421        if (!pause_on_oops)
 422                return;
 423
 424        spin_lock_irqsave(&pause_on_oops_lock, flags);
 425        if (pause_on_oops_flag == 0) {
 426                /* This CPU may now print the oops message */
 427                pause_on_oops_flag = 1;
 428        } else {
 429                /* We need to stall this CPU */
 430                if (!spin_counter) {
 431                        /* This CPU gets to do the counting */
 432                        spin_counter = pause_on_oops;
 433                        do {
 434                                spin_unlock(&pause_on_oops_lock);
 435                                spin_msec(MSEC_PER_SEC);
 436                                spin_lock(&pause_on_oops_lock);
 437                        } while (--spin_counter);
 438                        pause_on_oops_flag = 0;
 439                } else {
 440                        /* This CPU waits for a different one */
 441                        while (spin_counter) {
 442                                spin_unlock(&pause_on_oops_lock);
 443                                spin_msec(1);
 444                                spin_lock(&pause_on_oops_lock);
 445                        }
 446                }
 447        }
 448        spin_unlock_irqrestore(&pause_on_oops_lock, flags);
 449}
 450
 451/*
 452 * Return true if the calling CPU is allowed to print oops-related info.
 453 * This is a bit racy..
 454 */
 455int oops_may_print(void)
 456{
 457        return pause_on_oops_flag == 0;
 458}
 459
 460/*
 461 * Called when the architecture enters its oops handler, before it prints
 462 * anything.  If this is the first CPU to oops, and it's oopsing the first
 463 * time then let it proceed.
 464 *
 465 * This is all enabled by the pause_on_oops kernel boot option.  We do all
 466 * this to ensure that oopses don't scroll off the screen.  It has the
 467 * side-effect of preventing later-oopsing CPUs from mucking up the display,
 468 * too.
 469 *
 470 * It turns out that the CPU which is allowed to print ends up pausing for
 471 * the right duration, whereas all the other CPUs pause for twice as long:
 472 * once in oops_enter(), once in oops_exit().
 473 */
 474void oops_enter(void)
 475{
 476        tracing_off();
 477        /* can't trust the integrity of the kernel anymore: */
 478        debug_locks_off();
 479        do_oops_enter_exit();
 480}
 481
 482/*
 483 * 64-bit random ID for oopses:
 484 */
 485static u64 oops_id;
 486
 487static int init_oops_id(void)
 488{
 489        if (!oops_id)
 490                get_random_bytes(&oops_id, sizeof(oops_id));
 491        else
 492                oops_id++;
 493
 494        return 0;
 495}
 496late_initcall(init_oops_id);
 497
 498void print_oops_end_marker(void)
 499{
 500        init_oops_id();
 501        pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id);
 502}
 503
 504/*
 505 * Called when the architecture exits its oops handler, after printing
 506 * everything.
 507 */
 508void oops_exit(void)
 509{
 510        do_oops_enter_exit();
 511        print_oops_end_marker();
 512        kmsg_dump(KMSG_DUMP_OOPS);
 513}
 514
 515struct warn_args {
 516        const char *fmt;
 517        va_list args;
 518};
 519
 520void __warn(const char *file, int line, void *caller, unsigned taint,
 521            struct pt_regs *regs, struct warn_args *args)
 522{
 523        disable_trace_on_warning();
 524
 525        if (args)
 526                pr_warn(CUT_HERE);
 527
 528        if (file)
 529                pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
 530                        raw_smp_processor_id(), current->pid, file, line,
 531                        caller);
 532        else
 533                pr_warn("WARNING: CPU: %d PID: %d at %pS\n",
 534                        raw_smp_processor_id(), current->pid, caller);
 535
 536        if (args)
 537                vprintk(args->fmt, args->args);
 538
 539        if (panic_on_warn) {
 540                /*
 541                 * This thread may hit another WARN() in the panic path.
 542                 * Resetting this prevents additional WARN() from panicking the
 543                 * system on this thread.  Other threads are blocked by the
 544                 * panic_mutex in panic().
 545                 */
 546                panic_on_warn = 0;
 547                panic("panic_on_warn set ...\n");
 548        }
 549
 550        print_modules();
 551
 552        if (regs)
 553                show_regs(regs);
 554        else
 555                dump_stack();
 556
 557        print_oops_end_marker();
 558
 559        /* Just a warning, don't kill lockdep. */
 560        add_taint(taint, LOCKDEP_STILL_OK);
 561}
 562
 563#ifdef WANT_WARN_ON_SLOWPATH
 564void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
 565{
 566        struct warn_args args;
 567
 568        args.fmt = fmt;
 569        va_start(args.args, fmt);
 570        __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL,
 571               &args);
 572        va_end(args.args);
 573}
 574EXPORT_SYMBOL(warn_slowpath_fmt);
 575
 576void warn_slowpath_fmt_taint(const char *file, int line,
 577                             unsigned taint, const char *fmt, ...)
 578{
 579        struct warn_args args;
 580
 581        args.fmt = fmt;
 582        va_start(args.args, fmt);
 583        __warn(file, line, __builtin_return_address(0), taint, NULL, &args);
 584        va_end(args.args);
 585}
 586EXPORT_SYMBOL(warn_slowpath_fmt_taint);
 587
 588void warn_slowpath_null(const char *file, int line)
 589{
 590        pr_warn(CUT_HERE);
 591        __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL);
 592}
 593EXPORT_SYMBOL(warn_slowpath_null);
 594#else
 595void __warn_printk(const char *fmt, ...)
 596{
 597        va_list args;
 598
 599        pr_warn(CUT_HERE);
 600
 601        va_start(args, fmt);
 602        vprintk(fmt, args);
 603        va_end(args);
 604}
 605EXPORT_SYMBOL(__warn_printk);
 606#endif
 607
 608#ifdef CONFIG_BUG
 609
 610/* Support resetting WARN*_ONCE state */
 611
 612static int clear_warn_once_set(void *data, u64 val)
 613{
 614        generic_bug_clear_once();
 615        memset(__start_once, 0, __end_once - __start_once);
 616        return 0;
 617}
 618
 619DEFINE_SIMPLE_ATTRIBUTE(clear_warn_once_fops,
 620                        NULL,
 621                        clear_warn_once_set,
 622                        "%lld\n");
 623
 624static __init int register_warn_debugfs(void)
 625{
 626        /* Don't care about failure */
 627        debugfs_create_file("clear_warn_once", 0200, NULL,
 628                            NULL, &clear_warn_once_fops);
 629        return 0;
 630}
 631
 632device_initcall(register_warn_debugfs);
 633#endif
 634
 635#ifdef CONFIG_CC_STACKPROTECTOR
 636
 637/*
 638 * Called when gcc's -fstack-protector feature is used, and
 639 * gcc detects corruption of the on-stack canary value
 640 */
 641__visible void __stack_chk_fail(void)
 642{
 643        panic("stack-protector: Kernel stack is corrupted in: %pB\n",
 644                __builtin_return_address(0));
 645}
 646EXPORT_SYMBOL(__stack_chk_fail);
 647
 648#endif
 649
 650#ifdef CONFIG_ARCH_HAS_REFCOUNT
 651void refcount_error_report(struct pt_regs *regs, const char *err)
 652{
 653        WARN_RATELIMIT(1, "refcount_t %s at %pB in %s[%d], uid/euid: %u/%u\n",
 654                err, (void *)instruction_pointer(regs),
 655                current->comm, task_pid_nr(current),
 656                from_kuid_munged(&init_user_ns, current_uid()),
 657                from_kuid_munged(&init_user_ns, current_euid()));
 658}
 659#endif
 660
 661core_param(panic, panic_timeout, int, 0644);
 662core_param(pause_on_oops, pause_on_oops, int, 0644);
 663core_param(panic_on_warn, panic_on_warn, int, 0644);
 664core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644);
 665
 666static int __init oops_setup(char *s)
 667{
 668        if (!s)
 669                return -EINVAL;
 670        if (!strcmp(s, "panic"))
 671                panic_on_oops = 1;
 672        return 0;
 673}
 674early_param("oops", oops_setup);
 675