linux/kernel/kexec.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * kexec.c - kexec_load system call
   4 * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
   5 */
   6
   7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
   8
   9#include <linux/capability.h>
  10#include <linux/mm.h>
  11#include <linux/file.h>
  12#include <linux/security.h>
  13#include <linux/kexec.h>
  14#include <linux/mutex.h>
  15#include <linux/list.h>
  16#include <linux/syscalls.h>
  17#include <linux/vmalloc.h>
  18#include <linux/slab.h>
  19
  20#include "kexec_internal.h"
  21
  22static int copy_user_segment_list(struct kimage *image,
  23                                  unsigned long nr_segments,
  24                                  struct kexec_segment __user *segments)
  25{
  26        int ret;
  27        size_t segment_bytes;
  28
  29        /* Read in the segments */
  30        image->nr_segments = nr_segments;
  31        segment_bytes = nr_segments * sizeof(*segments);
  32        ret = copy_from_user(image->segment, segments, segment_bytes);
  33        if (ret)
  34                ret = -EFAULT;
  35
  36        return ret;
  37}
  38
  39static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
  40                             unsigned long nr_segments,
  41                             struct kexec_segment __user *segments,
  42                             unsigned long flags)
  43{
  44        int ret;
  45        struct kimage *image;
  46        bool kexec_on_panic = flags & KEXEC_ON_CRASH;
  47
  48        if (kexec_on_panic) {
  49                /* Verify we have a valid entry point */
  50                if ((entry < phys_to_boot_phys(crashk_res.start)) ||
  51                    (entry > phys_to_boot_phys(crashk_res.end)))
  52                        return -EADDRNOTAVAIL;
  53        }
  54
  55        /* Allocate and initialize a controlling structure */
  56        image = do_kimage_alloc_init();
  57        if (!image)
  58                return -ENOMEM;
  59
  60        image->start = entry;
  61
  62        ret = copy_user_segment_list(image, nr_segments, segments);
  63        if (ret)
  64                goto out_free_image;
  65
  66        if (kexec_on_panic) {
  67                /* Enable special crash kernel control page alloc policy. */
  68                image->control_page = crashk_res.start;
  69                image->type = KEXEC_TYPE_CRASH;
  70        }
  71
  72        ret = sanity_check_segment_list(image);
  73        if (ret)
  74                goto out_free_image;
  75
  76        /*
  77         * Find a location for the control code buffer, and add it
  78         * the vector of segments so that it's pages will also be
  79         * counted as destination pages.
  80         */
  81        ret = -ENOMEM;
  82        image->control_code_page = kimage_alloc_control_pages(image,
  83                                           get_order(KEXEC_CONTROL_PAGE_SIZE));
  84        if (!image->control_code_page) {
  85                pr_err("Could not allocate control_code_buffer\n");
  86                goto out_free_image;
  87        }
  88
  89        if (!kexec_on_panic) {
  90                image->swap_page = kimage_alloc_control_pages(image, 0);
  91                if (!image->swap_page) {
  92                        pr_err("Could not allocate swap buffer\n");
  93                        goto out_free_control_pages;
  94                }
  95        }
  96
  97        *rimage = image;
  98        return 0;
  99out_free_control_pages:
 100        kimage_free_page_list(&image->control_pages);
 101out_free_image:
 102        kfree(image);
 103        return ret;
 104}
 105
 106static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
 107                struct kexec_segment __user *segments, unsigned long flags)
 108{
 109        struct kimage **dest_image, *image;
 110        unsigned long i;
 111        int ret;
 112
 113        if (flags & KEXEC_ON_CRASH) {
 114                dest_image = &kexec_crash_image;
 115                if (kexec_crash_image)
 116                        arch_kexec_unprotect_crashkres();
 117        } else {
 118                dest_image = &kexec_image;
 119        }
 120
 121        if (nr_segments == 0) {
 122                /* Uninstall image */
 123                kimage_free(xchg(dest_image, NULL));
 124                return 0;
 125        }
 126        if (flags & KEXEC_ON_CRASH) {
 127                /*
 128                 * Loading another kernel to switch to if this one
 129                 * crashes.  Free any current crash dump kernel before
 130                 * we corrupt it.
 131                 */
 132                kimage_free(xchg(&kexec_crash_image, NULL));
 133        }
 134
 135        ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
 136        if (ret)
 137                return ret;
 138
 139        if (flags & KEXEC_PRESERVE_CONTEXT)
 140                image->preserve_context = 1;
 141
 142        ret = machine_kexec_prepare(image);
 143        if (ret)
 144                goto out;
 145
 146        /*
 147         * Some architecture(like S390) may touch the crash memory before
 148         * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
 149         */
 150        ret = kimage_crash_copy_vmcoreinfo(image);
 151        if (ret)
 152                goto out;
 153
 154        for (i = 0; i < nr_segments; i++) {
 155                ret = kimage_load_segment(image, &image->segment[i]);
 156                if (ret)
 157                        goto out;
 158        }
 159
 160        kimage_terminate(image);
 161
 162        /* Install the new kernel and uninstall the old */
 163        image = xchg(dest_image, image);
 164
 165out:
 166        if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
 167                arch_kexec_protect_crashkres();
 168
 169        kimage_free(image);
 170        return ret;
 171}
 172
 173/*
 174 * Exec Kernel system call: for obvious reasons only root may call it.
 175 *
 176 * This call breaks up into three pieces.
 177 * - A generic part which loads the new kernel from the current
 178 *   address space, and very carefully places the data in the
 179 *   allocated pages.
 180 *
 181 * - A generic part that interacts with the kernel and tells all of
 182 *   the devices to shut down.  Preventing on-going dmas, and placing
 183 *   the devices in a consistent state so a later kernel can
 184 *   reinitialize them.
 185 *
 186 * - A machine specific part that includes the syscall number
 187 *   and then copies the image to it's final destination.  And
 188 *   jumps into the image at entry.
 189 *
 190 * kexec does not sync, or unmount filesystems so if you need
 191 * that to happen you need to do that yourself.
 192 */
 193
 194static inline int kexec_load_check(unsigned long nr_segments,
 195                                   unsigned long flags)
 196{
 197        int result;
 198
 199        /* We only trust the superuser with rebooting the system. */
 200        if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
 201                return -EPERM;
 202
 203        /* Permit LSMs and IMA to fail the kexec */
 204        result = security_kernel_load_data(LOADING_KEXEC_IMAGE);
 205        if (result < 0)
 206                return result;
 207
 208        /*
 209         * Verify we have a legal set of flags
 210         * This leaves us room for future extensions.
 211         */
 212        if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
 213                return -EINVAL;
 214
 215        /* Put an artificial cap on the number
 216         * of segments passed to kexec_load.
 217         */
 218        if (nr_segments > KEXEC_SEGMENT_MAX)
 219                return -EINVAL;
 220
 221        return 0;
 222}
 223
 224SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
 225                struct kexec_segment __user *, segments, unsigned long, flags)
 226{
 227        int result;
 228
 229        result = kexec_load_check(nr_segments, flags);
 230        if (result)
 231                return result;
 232
 233        /* Verify we are on the appropriate architecture */
 234        if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
 235                ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
 236                return -EINVAL;
 237
 238        /* Because we write directly to the reserved memory
 239         * region when loading crash kernels we need a mutex here to
 240         * prevent multiple crash  kernels from attempting to load
 241         * simultaneously, and to prevent a crash kernel from loading
 242         * over the top of a in use crash kernel.
 243         *
 244         * KISS: always take the mutex.
 245         */
 246        if (!mutex_trylock(&kexec_mutex))
 247                return -EBUSY;
 248
 249        result = do_kexec_load(entry, nr_segments, segments, flags);
 250
 251        mutex_unlock(&kexec_mutex);
 252
 253        return result;
 254}
 255
 256#ifdef CONFIG_COMPAT
 257COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
 258                       compat_ulong_t, nr_segments,
 259                       struct compat_kexec_segment __user *, segments,
 260                       compat_ulong_t, flags)
 261{
 262        struct compat_kexec_segment in;
 263        struct kexec_segment out, __user *ksegments;
 264        unsigned long i, result;
 265
 266        result = kexec_load_check(nr_segments, flags);
 267        if (result)
 268                return result;
 269
 270        /* Don't allow clients that don't understand the native
 271         * architecture to do anything.
 272         */
 273        if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
 274                return -EINVAL;
 275
 276        ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
 277        for (i = 0; i < nr_segments; i++) {
 278                result = copy_from_user(&in, &segments[i], sizeof(in));
 279                if (result)
 280                        return -EFAULT;
 281
 282                out.buf   = compat_ptr(in.buf);
 283                out.bufsz = in.bufsz;
 284                out.mem   = in.mem;
 285                out.memsz = in.memsz;
 286
 287                result = copy_to_user(&ksegments[i], &out, sizeof(out));
 288                if (result)
 289                        return -EFAULT;
 290        }
 291
 292        /* Because we write directly to the reserved memory
 293         * region when loading crash kernels we need a mutex here to
 294         * prevent multiple crash  kernels from attempting to load
 295         * simultaneously, and to prevent a crash kernel from loading
 296         * over the top of a in use crash kernel.
 297         *
 298         * KISS: always take the mutex.
 299         */
 300        if (!mutex_trylock(&kexec_mutex))
 301                return -EBUSY;
 302
 303        result = do_kexec_load(entry, nr_segments, ksegments, flags);
 304
 305        mutex_unlock(&kexec_mutex);
 306
 307        return result;
 308}
 309#endif
 310