linux/arch/arm64/kernel/machine_kexec.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * kexec for arm64
   4 *
   5 * Copyright (C) Linaro.
   6 * Copyright (C) Huawei Futurewei Technologies.
   7 */
   8
   9#include <linux/interrupt.h>
  10#include <linux/irq.h>
  11#include <linux/kernel.h>
  12#include <linux/kexec.h>
  13#include <linux/page-flags.h>
  14#include <linux/set_memory.h>
  15#include <linux/smp.h>
  16
  17#include <asm/cacheflush.h>
  18#include <asm/cpu_ops.h>
  19#include <asm/daifflags.h>
  20#include <asm/memory.h>
  21#include <asm/mmu.h>
  22#include <asm/mmu_context.h>
  23#include <asm/page.h>
  24
  25#include "cpu-reset.h"
  26
  27/* Global variables for the arm64_relocate_new_kernel routine. */
  28extern const unsigned char arm64_relocate_new_kernel[];
  29extern const unsigned long arm64_relocate_new_kernel_size;
  30
  31/**
  32 * kexec_image_info - For debugging output.
  33 */
  34#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
  35static void _kexec_image_info(const char *func, int line,
  36        const struct kimage *kimage)
  37{
  38        unsigned long i;
  39
  40        pr_debug("%s:%d:\n", func, line);
  41        pr_debug("  kexec kimage info:\n");
  42        pr_debug("    type:        %d\n", kimage->type);
  43        pr_debug("    start:       %lx\n", kimage->start);
  44        pr_debug("    head:        %lx\n", kimage->head);
  45        pr_debug("    nr_segments: %lu\n", kimage->nr_segments);
  46        pr_debug("    kern_reloc: %pa\n", &kimage->arch.kern_reloc);
  47
  48        for (i = 0; i < kimage->nr_segments; i++) {
  49                pr_debug("      segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
  50                        i,
  51                        kimage->segment[i].mem,
  52                        kimage->segment[i].mem + kimage->segment[i].memsz,
  53                        kimage->segment[i].memsz,
  54                        kimage->segment[i].memsz /  PAGE_SIZE);
  55        }
  56}
  57
  58void machine_kexec_cleanup(struct kimage *kimage)
  59{
  60        /* Empty routine needed to avoid build errors. */
  61}
  62
  63int machine_kexec_post_load(struct kimage *kimage)
  64{
  65        void *reloc_code = page_to_virt(kimage->control_code_page);
  66
  67        memcpy(reloc_code, arm64_relocate_new_kernel,
  68               arm64_relocate_new_kernel_size);
  69        kimage->arch.kern_reloc = __pa(reloc_code);
  70        kexec_image_info(kimage);
  71
  72        /*
  73         * For execution with the MMU off, reloc_code needs to be cleaned to the
  74         * PoC and invalidated from the I-cache.
  75         */
  76        dcache_clean_inval_poc((unsigned long)reloc_code,
  77                            (unsigned long)reloc_code +
  78                                    arm64_relocate_new_kernel_size);
  79        icache_inval_pou((uintptr_t)reloc_code,
  80                                (uintptr_t)reloc_code +
  81                                        arm64_relocate_new_kernel_size);
  82
  83        return 0;
  84}
  85
  86/**
  87 * machine_kexec_prepare - Prepare for a kexec reboot.
  88 *
  89 * Called from the core kexec code when a kernel image is loaded.
  90 * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
  91 * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
  92 */
  93int machine_kexec_prepare(struct kimage *kimage)
  94{
  95        if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
  96                pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
  97                return -EBUSY;
  98        }
  99
 100        return 0;
 101}
 102
 103/**
 104 * kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
 105 */
 106static void kexec_list_flush(struct kimage *kimage)
 107{
 108        kimage_entry_t *entry;
 109
 110        for (entry = &kimage->head; ; entry++) {
 111                unsigned int flag;
 112                unsigned long addr;
 113
 114                /* flush the list entries. */
 115                dcache_clean_inval_poc((unsigned long)entry,
 116                                    (unsigned long)entry +
 117                                            sizeof(kimage_entry_t));
 118
 119                flag = *entry & IND_FLAGS;
 120                if (flag == IND_DONE)
 121                        break;
 122
 123                addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK);
 124
 125                switch (flag) {
 126                case IND_INDIRECTION:
 127                        /* Set entry point just before the new list page. */
 128                        entry = (kimage_entry_t *)addr - 1;
 129                        break;
 130                case IND_SOURCE:
 131                        /* flush the source pages. */
 132                        dcache_clean_inval_poc(addr, addr + PAGE_SIZE);
 133                        break;
 134                case IND_DESTINATION:
 135                        break;
 136                default:
 137                        BUG();
 138                }
 139        }
 140}
 141
 142/**
 143 * kexec_segment_flush - Helper to flush the kimage segments to PoC.
 144 */
 145static void kexec_segment_flush(const struct kimage *kimage)
 146{
 147        unsigned long i;
 148
 149        pr_debug("%s:\n", __func__);
 150
 151        for (i = 0; i < kimage->nr_segments; i++) {
 152                pr_debug("  segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
 153                        i,
 154                        kimage->segment[i].mem,
 155                        kimage->segment[i].mem + kimage->segment[i].memsz,
 156                        kimage->segment[i].memsz,
 157                        kimage->segment[i].memsz /  PAGE_SIZE);
 158
 159                dcache_clean_inval_poc(
 160                        (unsigned long)phys_to_virt(kimage->segment[i].mem),
 161                        (unsigned long)phys_to_virt(kimage->segment[i].mem) +
 162                                kimage->segment[i].memsz);
 163        }
 164}
 165
 166/**
 167 * machine_kexec - Do the kexec reboot.
 168 *
 169 * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
 170 */
 171void machine_kexec(struct kimage *kimage)
 172{
 173        bool in_kexec_crash = (kimage == kexec_crash_image);
 174        bool stuck_cpus = cpus_are_stuck_in_kernel();
 175
 176        /*
 177         * New cpus may have become stuck_in_kernel after we loaded the image.
 178         */
 179        BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
 180        WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
 181                "Some CPUs may be stale, kdump will be unreliable.\n");
 182
 183        /* Flush the kimage list and its buffers. */
 184        kexec_list_flush(kimage);
 185
 186        /* Flush the new image if already in place. */
 187        if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
 188                kexec_segment_flush(kimage);
 189
 190        pr_info("Bye!\n");
 191
 192        local_daif_mask();
 193
 194        /*
 195         * cpu_soft_restart will shutdown the MMU, disable data caches, then
 196         * transfer control to the kern_reloc which contains a copy of
 197         * the arm64_relocate_new_kernel routine.  arm64_relocate_new_kernel
 198         * uses physical addressing to relocate the new image to its final
 199         * position and transfers control to the image entry point when the
 200         * relocation is complete.
 201         * In kexec case, kimage->start points to purgatory assuming that
 202         * kernel entry and dtb address are embedded in purgatory by
 203         * userspace (kexec-tools).
 204         * In kexec_file case, the kernel starts directly without purgatory.
 205         */
 206        cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start,
 207                         kimage->arch.dtb_mem);
 208
 209        BUG(); /* Should never get here. */
 210}
 211
 212static void machine_kexec_mask_interrupts(void)
 213{
 214        unsigned int i;
 215        struct irq_desc *desc;
 216
 217        for_each_irq_desc(i, desc) {
 218                struct irq_chip *chip;
 219                int ret;
 220
 221                chip = irq_desc_get_chip(desc);
 222                if (!chip)
 223                        continue;
 224
 225                /*
 226                 * First try to remove the active state. If this
 227                 * fails, try to EOI the interrupt.
 228                 */
 229                ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
 230
 231                if (ret && irqd_irq_inprogress(&desc->irq_data) &&
 232                    chip->irq_eoi)
 233                        chip->irq_eoi(&desc->irq_data);
 234
 235                if (chip->irq_mask)
 236                        chip->irq_mask(&desc->irq_data);
 237
 238                if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
 239                        chip->irq_disable(&desc->irq_data);
 240        }
 241}
 242
 243/**
 244 * machine_crash_shutdown - shutdown non-crashing cpus and save registers
 245 */
 246void machine_crash_shutdown(struct pt_regs *regs)
 247{
 248        local_irq_disable();
 249
 250        /* shutdown non-crashing cpus */
 251        crash_smp_send_stop();
 252
 253        /* for crashing cpu */
 254        crash_save_cpu(regs, smp_processor_id());
 255        machine_kexec_mask_interrupts();
 256
 257        pr_info("Starting crashdump kernel...\n");
 258}
 259
 260void arch_kexec_protect_crashkres(void)
 261{
 262        int i;
 263
 264        kexec_segment_flush(kexec_crash_image);
 265
 266        for (i = 0; i < kexec_crash_image->nr_segments; i++)
 267                set_memory_valid(
 268                        __phys_to_virt(kexec_crash_image->segment[i].mem),
 269                        kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
 270}
 271
 272void arch_kexec_unprotect_crashkres(void)
 273{
 274        int i;
 275
 276        for (i = 0; i < kexec_crash_image->nr_segments; i++)
 277                set_memory_valid(
 278                        __phys_to_virt(kexec_crash_image->segment[i].mem),
 279                        kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
 280}
 281
 282#ifdef CONFIG_HIBERNATION
 283/*
 284 * To preserve the crash dump kernel image, the relevant memory segments
 285 * should be mapped again around the hibernation.
 286 */
 287void crash_prepare_suspend(void)
 288{
 289        if (kexec_crash_image)
 290                arch_kexec_unprotect_crashkres();
 291}
 292
 293void crash_post_resume(void)
 294{
 295        if (kexec_crash_image)
 296                arch_kexec_protect_crashkres();
 297}
 298
 299/*
 300 * crash_is_nosave
 301 *
 302 * Return true only if a page is part of reserved memory for crash dump kernel,
 303 * but does not hold any data of loaded kernel image.
 304 *
 305 * Note that all the pages in crash dump kernel memory have been initially
 306 * marked as Reserved as memory was allocated via memblock_reserve().
 307 *
 308 * In hibernation, the pages which are Reserved and yet "nosave" are excluded
 309 * from the hibernation iamge. crash_is_nosave() does thich check for crash
 310 * dump kernel and will reduce the total size of hibernation image.
 311 */
 312
 313bool crash_is_nosave(unsigned long pfn)
 314{
 315        int i;
 316        phys_addr_t addr;
 317
 318        if (!crashk_res.end)
 319                return false;
 320
 321        /* in reserved memory? */
 322        addr = __pfn_to_phys(pfn);
 323        if ((addr < crashk_res.start) || (crashk_res.end < addr))
 324                return false;
 325
 326        if (!kexec_crash_image)
 327                return true;
 328
 329        /* not part of loaded kernel image? */
 330        for (i = 0; i < kexec_crash_image->nr_segments; i++)
 331                if (addr >= kexec_crash_image->segment[i].mem &&
 332                                addr < (kexec_crash_image->segment[i].mem +
 333                                        kexec_crash_image->segment[i].memsz))
 334                        return false;
 335
 336        return true;
 337}
 338
 339void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
 340{
 341        unsigned long addr;
 342        struct page *page;
 343
 344        for (addr = begin; addr < end; addr += PAGE_SIZE) {
 345                page = phys_to_page(addr);
 346                free_reserved_page(page);
 347        }
 348}
 349#endif /* CONFIG_HIBERNATION */
 350