linux/arch/x86/kernel/crash.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Architecture specific (i386/x86_64) functions for kexec based crash dumps.
   4 *
   5 * Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
   6 *
   7 * Copyright (C) IBM Corporation, 2004. All rights reserved.
   8 * Copyright (C) Red Hat Inc., 2014. All rights reserved.
   9 * Authors:
  10 *      Vivek Goyal <vgoyal@redhat.com>
  11 *
  12 */
  13
  14#define pr_fmt(fmt)     "kexec: " fmt
  15
  16#include <linux/types.h>
  17#include <linux/kernel.h>
  18#include <linux/smp.h>
  19#include <linux/reboot.h>
  20#include <linux/kexec.h>
  21#include <linux/delay.h>
  22#include <linux/elf.h>
  23#include <linux/elfcore.h>
  24#include <linux/export.h>
  25#include <linux/slab.h>
  26#include <linux/vmalloc.h>
  27#include <linux/memblock.h>
  28
  29#include <asm/processor.h>
  30#include <asm/hardirq.h>
  31#include <asm/nmi.h>
  32#include <asm/hw_irq.h>
  33#include <asm/apic.h>
  34#include <asm/e820/types.h>
  35#include <asm/io_apic.h>
  36#include <asm/hpet.h>
  37#include <linux/kdebug.h>
  38#include <asm/cpu.h>
  39#include <asm/reboot.h>
  40#include <asm/virtext.h>
  41#include <asm/intel_pt.h>
  42#include <asm/crash.h>
  43#include <asm/cmdline.h>
  44
  45/* Used while preparing memory map entries for second kernel */
  46struct crash_memmap_data {
  47        struct boot_params *params;
  48        /* Type of memory */
  49        unsigned int type;
  50};
  51
  52/*
  53 * This is used to VMCLEAR all VMCSs loaded on the
  54 * processor. And when loading kvm_intel module, the
  55 * callback function pointer will be assigned.
  56 *
  57 * protected by rcu.
  58 */
  59crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
  60EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
  61
  62static inline void cpu_crash_vmclear_loaded_vmcss(void)
  63{
  64        crash_vmclear_fn *do_vmclear_operation = NULL;
  65
  66        rcu_read_lock();
  67        do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
  68        if (do_vmclear_operation)
  69                do_vmclear_operation();
  70        rcu_read_unlock();
  71}
  72
  73#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
  74
  75static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
  76{
  77        crash_save_cpu(regs, cpu);
  78
  79        /*
  80         * VMCLEAR VMCSs loaded on all cpus if needed.
  81         */
  82        cpu_crash_vmclear_loaded_vmcss();
  83
  84        /* Disable VMX or SVM if needed.
  85         *
  86         * We need to disable virtualization on all CPUs.
  87         * Having VMX or SVM enabled on any CPU may break rebooting
  88         * after the kdump kernel has finished its task.
  89         */
  90        cpu_emergency_vmxoff();
  91        cpu_emergency_svm_disable();
  92
  93        /*
  94         * Disable Intel PT to stop its logging
  95         */
  96        cpu_emergency_stop_pt();
  97
  98        disable_local_APIC();
  99}
 100
 101void kdump_nmi_shootdown_cpus(void)
 102{
 103        nmi_shootdown_cpus(kdump_nmi_callback);
 104
 105        disable_local_APIC();
 106}
 107
 108/* Override the weak function in kernel/panic.c */
 109void crash_smp_send_stop(void)
 110{
 111        static int cpus_stopped;
 112
 113        if (cpus_stopped)
 114                return;
 115
 116        if (smp_ops.crash_stop_other_cpus)
 117                smp_ops.crash_stop_other_cpus();
 118        else
 119                smp_send_stop();
 120
 121        cpus_stopped = 1;
 122}
 123
 124#else
 125void crash_smp_send_stop(void)
 126{
 127        /* There are no cpus to shootdown */
 128}
 129#endif
 130
 131void native_machine_crash_shutdown(struct pt_regs *regs)
 132{
 133        /* This function is only called after the system
 134         * has panicked or is otherwise in a critical state.
 135         * The minimum amount of code to allow a kexec'd kernel
 136         * to run successfully needs to happen here.
 137         *
 138         * In practice this means shooting down the other cpus in
 139         * an SMP system.
 140         */
 141        /* The kernel is broken so disable interrupts */
 142        local_irq_disable();
 143
 144        crash_smp_send_stop();
 145
 146        /*
 147         * VMCLEAR VMCSs loaded on this cpu if needed.
 148         */
 149        cpu_crash_vmclear_loaded_vmcss();
 150
 151        /* Booting kdump kernel with VMX or SVM enabled won't work,
 152         * because (among other limitations) we can't disable paging
 153         * with the virt flags.
 154         */
 155        cpu_emergency_vmxoff();
 156        cpu_emergency_svm_disable();
 157
 158        /*
 159         * Disable Intel PT to stop its logging
 160         */
 161        cpu_emergency_stop_pt();
 162
 163#ifdef CONFIG_X86_IO_APIC
 164        /* Prevent crash_kexec() from deadlocking on ioapic_lock. */
 165        ioapic_zap_locks();
 166        clear_IO_APIC();
 167#endif
 168        lapic_shutdown();
 169        restore_boot_irq_mode();
 170#ifdef CONFIG_HPET_TIMER
 171        hpet_disable();
 172#endif
 173        crash_save_cpu(regs, safe_smp_processor_id());
 174}
 175
 176#ifdef CONFIG_KEXEC_FILE
 177
 178static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
 179{
 180        unsigned int *nr_ranges = arg;
 181
 182        (*nr_ranges)++;
 183        return 0;
 184}
 185
 186/* Gather all the required information to prepare elf headers for ram regions */
 187static struct crash_mem *fill_up_crash_elf_data(void)
 188{
 189        unsigned int nr_ranges = 0;
 190        struct crash_mem *cmem;
 191
 192        walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
 193        if (!nr_ranges)
 194                return NULL;
 195
 196        /*
 197         * Exclusion of crash region and/or crashk_low_res may cause
 198         * another range split. So add extra two slots here.
 199         */
 200        nr_ranges += 2;
 201        cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
 202        if (!cmem)
 203                return NULL;
 204
 205        cmem->max_nr_ranges = nr_ranges;
 206        cmem->nr_ranges = 0;
 207
 208        return cmem;
 209}
 210
 211/*
 212 * Look for any unwanted ranges between mstart, mend and remove them. This
 213 * might lead to split and split ranges are put in cmem->ranges[] array
 214 */
 215static int elf_header_exclude_ranges(struct crash_mem *cmem)
 216{
 217        int ret = 0;
 218
 219        /* Exclude the low 1M because it is always reserved */
 220        ret = crash_exclude_mem_range(cmem, 0, (1<<20)-1);
 221        if (ret)
 222                return ret;
 223
 224        /* Exclude crashkernel region */
 225        ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
 226        if (ret)
 227                return ret;
 228
 229        if (crashk_low_res.end)
 230                ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
 231                                              crashk_low_res.end);
 232
 233        return ret;
 234}
 235
 236static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
 237{
 238        struct crash_mem *cmem = arg;
 239
 240        cmem->ranges[cmem->nr_ranges].start = res->start;
 241        cmem->ranges[cmem->nr_ranges].end = res->end;
 242        cmem->nr_ranges++;
 243
 244        return 0;
 245}
 246
 247/* Prepare elf headers. Return addr and size */
 248static int prepare_elf_headers(struct kimage *image, void **addr,
 249                                        unsigned long *sz)
 250{
 251        struct crash_mem *cmem;
 252        int ret;
 253
 254        cmem = fill_up_crash_elf_data();
 255        if (!cmem)
 256                return -ENOMEM;
 257
 258        ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
 259        if (ret)
 260                goto out;
 261
 262        /* Exclude unwanted mem ranges */
 263        ret = elf_header_exclude_ranges(cmem);
 264        if (ret)
 265                goto out;
 266
 267        /* By default prepare 64bit headers */
 268        ret =  crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz);
 269
 270out:
 271        vfree(cmem);
 272        return ret;
 273}
 274
 275static int add_e820_entry(struct boot_params *params, struct e820_entry *entry)
 276{
 277        unsigned int nr_e820_entries;
 278
 279        nr_e820_entries = params->e820_entries;
 280        if (nr_e820_entries >= E820_MAX_ENTRIES_ZEROPAGE)
 281                return 1;
 282
 283        memcpy(&params->e820_table[nr_e820_entries], entry, sizeof(struct e820_entry));
 284        params->e820_entries++;
 285        return 0;
 286}
 287
 288static int memmap_entry_callback(struct resource *res, void *arg)
 289{
 290        struct crash_memmap_data *cmd = arg;
 291        struct boot_params *params = cmd->params;
 292        struct e820_entry ei;
 293
 294        ei.addr = res->start;
 295        ei.size = resource_size(res);
 296        ei.type = cmd->type;
 297        add_e820_entry(params, &ei);
 298
 299        return 0;
 300}
 301
 302static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
 303                                 unsigned long long mstart,
 304                                 unsigned long long mend)
 305{
 306        unsigned long start, end;
 307
 308        cmem->ranges[0].start = mstart;
 309        cmem->ranges[0].end = mend;
 310        cmem->nr_ranges = 1;
 311
 312        /* Exclude elf header region */
 313        start = image->elf_load_addr;
 314        end = start + image->elf_headers_sz - 1;
 315        return crash_exclude_mem_range(cmem, start, end);
 316}
 317
 318/* Prepare memory map for crash dump kernel */
 319int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
 320{
 321        int i, ret = 0;
 322        unsigned long flags;
 323        struct e820_entry ei;
 324        struct crash_memmap_data cmd;
 325        struct crash_mem *cmem;
 326
 327        cmem = vzalloc(struct_size(cmem, ranges, 1));
 328        if (!cmem)
 329                return -ENOMEM;
 330
 331        memset(&cmd, 0, sizeof(struct crash_memmap_data));
 332        cmd.params = params;
 333
 334        /* Add the low 1M */
 335        cmd.type = E820_TYPE_RAM;
 336        flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 337        walk_iomem_res_desc(IORES_DESC_NONE, flags, 0, (1<<20)-1, &cmd,
 338                            memmap_entry_callback);
 339
 340        /* Add ACPI tables */
 341        cmd.type = E820_TYPE_ACPI;
 342        flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 343        walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd,
 344                            memmap_entry_callback);
 345
 346        /* Add ACPI Non-volatile Storage */
 347        cmd.type = E820_TYPE_NVS;
 348        walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
 349                            memmap_entry_callback);
 350
 351        /* Add e820 reserved ranges */
 352        cmd.type = E820_TYPE_RESERVED;
 353        flags = IORESOURCE_MEM;
 354        walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd,
 355                            memmap_entry_callback);
 356
 357        /* Add crashk_low_res region */
 358        if (crashk_low_res.end) {
 359                ei.addr = crashk_low_res.start;
 360                ei.size = resource_size(&crashk_low_res);
 361                ei.type = E820_TYPE_RAM;
 362                add_e820_entry(params, &ei);
 363        }
 364
 365        /* Exclude some ranges from crashk_res and add rest to memmap */
 366        ret = memmap_exclude_ranges(image, cmem, crashk_res.start, crashk_res.end);
 367        if (ret)
 368                goto out;
 369
 370        for (i = 0; i < cmem->nr_ranges; i++) {
 371                ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1;
 372
 373                /* If entry is less than a page, skip it */
 374                if (ei.size < PAGE_SIZE)
 375                        continue;
 376                ei.addr = cmem->ranges[i].start;
 377                ei.type = E820_TYPE_RAM;
 378                add_e820_entry(params, &ei);
 379        }
 380
 381out:
 382        vfree(cmem);
 383        return ret;
 384}
 385
 386int crash_load_segments(struct kimage *image)
 387{
 388        int ret;
 389        struct kexec_buf kbuf = { .image = image, .buf_min = 0,
 390                                  .buf_max = ULONG_MAX, .top_down = false };
 391
 392        /* Prepare elf headers and add a segment */
 393        ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz);
 394        if (ret)
 395                return ret;
 396
 397        image->elf_headers = kbuf.buffer;
 398        image->elf_headers_sz = kbuf.bufsz;
 399
 400        kbuf.memsz = kbuf.bufsz;
 401        kbuf.buf_align = ELF_CORE_HEADER_ALIGN;
 402        kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
 403        ret = kexec_add_buffer(&kbuf);
 404        if (ret) {
 405                vfree((void *)image->elf_headers);
 406                return ret;
 407        }
 408        image->elf_load_addr = kbuf.mem;
 409        pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
 410                 image->elf_load_addr, kbuf.bufsz, kbuf.bufsz);
 411
 412        return ret;
 413}
 414#endif /* CONFIG_KEXEC_FILE */
 415