linux/kernel/kexec.c
<<
>>
Prefs
   1/*
   2 * kexec.c - kexec_load system call
   3 * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
   4 *
   5 * This source code is licensed under the GNU General Public License,
   6 * Version 2.  See the file COPYING for more details.
   7 */
   8
   9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10
  11#include <linux/capability.h>
  12#include <linux/mm.h>
  13#include <linux/file.h>
  14#include <linux/security.h>
  15#include <linux/kexec.h>
  16#include <linux/mutex.h>
  17#include <linux/list.h>
  18#include <linux/syscalls.h>
  19#include <linux/vmalloc.h>
  20#include <linux/slab.h>
  21
  22#include "kexec_internal.h"
  23
  24static int copy_user_segment_list(struct kimage *image,
  25                                  unsigned long nr_segments,
  26                                  struct kexec_segment __user *segments)
  27{
  28        int ret;
  29        size_t segment_bytes;
  30
  31        /* Read in the segments */
  32        image->nr_segments = nr_segments;
  33        segment_bytes = nr_segments * sizeof(*segments);
  34        ret = copy_from_user(image->segment, segments, segment_bytes);
  35        if (ret)
  36                ret = -EFAULT;
  37
  38        return ret;
  39}
  40
  41static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
  42                             unsigned long nr_segments,
  43                             struct kexec_segment __user *segments,
  44                             unsigned long flags)
  45{
  46        int ret;
  47        struct kimage *image;
  48        bool kexec_on_panic = flags & KEXEC_ON_CRASH;
  49
  50        if (kexec_on_panic) {
  51                /* Verify we have a valid entry point */
  52                if ((entry < phys_to_boot_phys(crashk_res.start)) ||
  53                    (entry > phys_to_boot_phys(crashk_res.end)))
  54                        return -EADDRNOTAVAIL;
  55        }
  56
  57        /* Allocate and initialize a controlling structure */
  58        image = do_kimage_alloc_init();
  59        if (!image)
  60                return -ENOMEM;
  61
  62        image->start = entry;
  63
  64        ret = copy_user_segment_list(image, nr_segments, segments);
  65        if (ret)
  66                goto out_free_image;
  67
  68        if (kexec_on_panic) {
  69                /* Enable special crash kernel control page alloc policy. */
  70                image->control_page = crashk_res.start;
  71                image->type = KEXEC_TYPE_CRASH;
  72        }
  73
  74        ret = sanity_check_segment_list(image);
  75        if (ret)
  76                goto out_free_image;
  77
  78        /*
  79         * Find a location for the control code buffer, and add it
  80         * the vector of segments so that it's pages will also be
  81         * counted as destination pages.
  82         */
  83        ret = -ENOMEM;
  84        image->control_code_page = kimage_alloc_control_pages(image,
  85                                           get_order(KEXEC_CONTROL_PAGE_SIZE));
  86        if (!image->control_code_page) {
  87                pr_err("Could not allocate control_code_buffer\n");
  88                goto out_free_image;
  89        }
  90
  91        if (!kexec_on_panic) {
  92                image->swap_page = kimage_alloc_control_pages(image, 0);
  93                if (!image->swap_page) {
  94                        pr_err("Could not allocate swap buffer\n");
  95                        goto out_free_control_pages;
  96                }
  97        }
  98
  99        *rimage = image;
 100        return 0;
 101out_free_control_pages:
 102        kimage_free_page_list(&image->control_pages);
 103out_free_image:
 104        kfree(image);
 105        return ret;
 106}
 107
 108static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
 109                struct kexec_segment __user *segments, unsigned long flags)
 110{
 111        struct kimage **dest_image, *image;
 112        unsigned long i;
 113        int ret;
 114
 115        if (flags & KEXEC_ON_CRASH) {
 116                dest_image = &kexec_crash_image;
 117                if (kexec_crash_image)
 118                        arch_kexec_unprotect_crashkres();
 119        } else {
 120                dest_image = &kexec_image;
 121        }
 122
 123        if (nr_segments == 0) {
 124                /* Uninstall image */
 125                kimage_free(xchg(dest_image, NULL));
 126                return 0;
 127        }
 128        if (flags & KEXEC_ON_CRASH) {
 129                /*
 130                 * Loading another kernel to switch to if this one
 131                 * crashes.  Free any current crash dump kernel before
 132                 * we corrupt it.
 133                 */
 134                kimage_free(xchg(&kexec_crash_image, NULL));
 135        }
 136
 137        ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
 138        if (ret)
 139                return ret;
 140
 141        if (flags & KEXEC_PRESERVE_CONTEXT)
 142                image->preserve_context = 1;
 143
 144        ret = machine_kexec_prepare(image);
 145        if (ret)
 146                goto out;
 147
 148        /*
 149         * Some architecture(like S390) may touch the crash memory before
 150         * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
 151         */
 152        ret = kimage_crash_copy_vmcoreinfo(image);
 153        if (ret)
 154                goto out;
 155
 156        for (i = 0; i < nr_segments; i++) {
 157                ret = kimage_load_segment(image, &image->segment[i]);
 158                if (ret)
 159                        goto out;
 160        }
 161
 162        kimage_terminate(image);
 163
 164        /* Install the new kernel and uninstall the old */
 165        image = xchg(dest_image, image);
 166
 167out:
 168        if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
 169                arch_kexec_protect_crashkres();
 170
 171        kimage_free(image);
 172        return ret;
 173}
 174
 175/*
 176 * Exec Kernel system call: for obvious reasons only root may call it.
 177 *
 178 * This call breaks up into three pieces.
 179 * - A generic part which loads the new kernel from the current
 180 *   address space, and very carefully places the data in the
 181 *   allocated pages.
 182 *
 183 * - A generic part that interacts with the kernel and tells all of
 184 *   the devices to shut down.  Preventing on-going dmas, and placing
 185 *   the devices in a consistent state so a later kernel can
 186 *   reinitialize them.
 187 *
 188 * - A machine specific part that includes the syscall number
 189 *   and then copies the image to it's final destination.  And
 190 *   jumps into the image at entry.
 191 *
 192 * kexec does not sync, or unmount filesystems so if you need
 193 * that to happen you need to do that yourself.
 194 */
 195
 196static inline int kexec_load_check(unsigned long nr_segments,
 197                                   unsigned long flags)
 198{
 199        int result;
 200
 201        /* We only trust the superuser with rebooting the system. */
 202        if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
 203                return -EPERM;
 204
 205        /* Permit LSMs and IMA to fail the kexec */
 206        result = security_kernel_load_data(LOADING_KEXEC_IMAGE);
 207        if (result < 0)
 208                return result;
 209
 210        /*
 211         * kexec can be used to circumvent module loading restrictions, so
 212         * prevent loading in that case
 213         */
 214        if (kernel_is_locked_down("kexec of unsigned images"))
 215                return -EPERM;
 216
 217        /*
 218         * Verify we have a legal set of flags
 219         * This leaves us room for future extensions.
 220         */
 221        if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
 222                return -EINVAL;
 223
 224        /* Put an artificial cap on the number
 225         * of segments passed to kexec_load.
 226         */
 227        if (nr_segments > KEXEC_SEGMENT_MAX)
 228                return -EINVAL;
 229
 230        return 0;
 231}
 232
 233SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
 234                struct kexec_segment __user *, segments, unsigned long, flags)
 235{
 236        int result;
 237
 238        result = kexec_load_check(nr_segments, flags);
 239        if (result)
 240                return result;
 241
 242        /* Verify we are on the appropriate architecture */
 243        if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
 244                ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
 245                return -EINVAL;
 246
 247        /* Because we write directly to the reserved memory
 248         * region when loading crash kernels we need a mutex here to
 249         * prevent multiple crash  kernels from attempting to load
 250         * simultaneously, and to prevent a crash kernel from loading
 251         * over the top of a in use crash kernel.
 252         *
 253         * KISS: always take the mutex.
 254         */
 255        if (!mutex_trylock(&kexec_mutex))
 256                return -EBUSY;
 257
 258        result = do_kexec_load(entry, nr_segments, segments, flags);
 259
 260        mutex_unlock(&kexec_mutex);
 261
 262        return result;
 263}
 264
 265#ifdef CONFIG_COMPAT
 266COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
 267                       compat_ulong_t, nr_segments,
 268                       struct compat_kexec_segment __user *, segments,
 269                       compat_ulong_t, flags)
 270{
 271        struct compat_kexec_segment in;
 272        struct kexec_segment out, __user *ksegments;
 273        unsigned long i, result;
 274
 275        result = kexec_load_check(nr_segments, flags);
 276        if (result)
 277                return result;
 278
 279        /* Don't allow clients that don't understand the native
 280         * architecture to do anything.
 281         */
 282        if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
 283                return -EINVAL;
 284
 285        ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
 286        for (i = 0; i < nr_segments; i++) {
 287                result = copy_from_user(&in, &segments[i], sizeof(in));
 288                if (result)
 289                        return -EFAULT;
 290
 291                out.buf   = compat_ptr(in.buf);
 292                out.bufsz = in.bufsz;
 293                out.mem   = in.mem;
 294                out.memsz = in.memsz;
 295
 296                result = copy_to_user(&ksegments[i], &out, sizeof(out));
 297                if (result)
 298                        return -EFAULT;
 299        }
 300
 301        /* Because we write directly to the reserved memory
 302         * region when loading crash kernels we need a mutex here to
 303         * prevent multiple crash  kernels from attempting to load
 304         * simultaneously, and to prevent a crash kernel from loading
 305         * over the top of a in use crash kernel.
 306         *
 307         * KISS: always take the mutex.
 308         */
 309        if (!mutex_trylock(&kexec_mutex))
 310                return -EBUSY;
 311
 312        result = do_kexec_load(entry, nr_segments, ksegments, flags);
 313
 314        mutex_unlock(&kexec_mutex);
 315
 316        return result;
 317}
 318#endif
 319