linux/kernel/kexec.c
<<
>>
Prefs
   1/*
   2 * kexec.c - kexec_load system call
   3 * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
   4 *
   5 * This source code is licensed under the GNU General Public License,
   6 * Version 2.  See the file COPYING for more details.
   7 */
   8
   9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10
  11#include <linux/capability.h>
  12#include <linux/mm.h>
  13#include <linux/file.h>
  14#include <linux/kexec.h>
  15#include <linux/mutex.h>
  16#include <linux/list.h>
  17#include <linux/syscalls.h>
  18#include <linux/vmalloc.h>
  19#include <linux/slab.h>
  20
  21#include "kexec_internal.h"
  22
  23static int copy_user_segment_list(struct kimage *image,
  24                                  unsigned long nr_segments,
  25                                  struct kexec_segment __user *segments)
  26{
  27        int ret;
  28        size_t segment_bytes;
  29
  30        /* Read in the segments */
  31        image->nr_segments = nr_segments;
  32        segment_bytes = nr_segments * sizeof(*segments);
  33        ret = copy_from_user(image->segment, segments, segment_bytes);
  34        if (ret)
  35                ret = -EFAULT;
  36
  37        return ret;
  38}
  39
  40static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
  41                             unsigned long nr_segments,
  42                             struct kexec_segment __user *segments,
  43                             unsigned long flags)
  44{
  45        int ret;
  46        struct kimage *image;
  47        bool kexec_on_panic = flags & KEXEC_ON_CRASH;
  48
  49        if (kexec_on_panic) {
  50                /* Verify we have a valid entry point */
  51                if ((entry < phys_to_boot_phys(crashk_res.start)) ||
  52                    (entry > phys_to_boot_phys(crashk_res.end)))
  53                        return -EADDRNOTAVAIL;
  54        }
  55
  56        /* Allocate and initialize a controlling structure */
  57        image = do_kimage_alloc_init();
  58        if (!image)
  59                return -ENOMEM;
  60
  61        image->start = entry;
  62
  63        ret = copy_user_segment_list(image, nr_segments, segments);
  64        if (ret)
  65                goto out_free_image;
  66
  67        if (kexec_on_panic) {
  68                /* Enable special crash kernel control page alloc policy. */
  69                image->control_page = crashk_res.start;
  70                image->type = KEXEC_TYPE_CRASH;
  71        }
  72
  73        ret = sanity_check_segment_list(image);
  74        if (ret)
  75                goto out_free_image;
  76
  77        /*
  78         * Find a location for the control code buffer, and add it
  79         * the vector of segments so that it's pages will also be
  80         * counted as destination pages.
  81         */
  82        ret = -ENOMEM;
  83        image->control_code_page = kimage_alloc_control_pages(image,
  84                                           get_order(KEXEC_CONTROL_PAGE_SIZE));
  85        if (!image->control_code_page) {
  86                pr_err("Could not allocate control_code_buffer\n");
  87                goto out_free_image;
  88        }
  89
  90        if (!kexec_on_panic) {
  91                image->swap_page = kimage_alloc_control_pages(image, 0);
  92                if (!image->swap_page) {
  93                        pr_err("Could not allocate swap buffer\n");
  94                        goto out_free_control_pages;
  95                }
  96        }
  97
  98        *rimage = image;
  99        return 0;
 100out_free_control_pages:
 101        kimage_free_page_list(&image->control_pages);
 102out_free_image:
 103        kfree(image);
 104        return ret;
 105}
 106
 107static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
 108                struct kexec_segment __user *segments, unsigned long flags)
 109{
 110        struct kimage **dest_image, *image;
 111        unsigned long i;
 112        int ret;
 113
 114        if (flags & KEXEC_ON_CRASH) {
 115                dest_image = &kexec_crash_image;
 116                if (kexec_crash_image)
 117                        arch_kexec_unprotect_crashkres();
 118        } else {
 119                dest_image = &kexec_image;
 120        }
 121
 122        if (nr_segments == 0) {
 123                /* Uninstall image */
 124                kimage_free(xchg(dest_image, NULL));
 125                return 0;
 126        }
 127        if (flags & KEXEC_ON_CRASH) {
 128                /*
 129                 * Loading another kernel to switch to if this one
 130                 * crashes.  Free any current crash dump kernel before
 131                 * we corrupt it.
 132                 */
 133                kimage_free(xchg(&kexec_crash_image, NULL));
 134        }
 135
 136        ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
 137        if (ret)
 138                return ret;
 139
 140        if (flags & KEXEC_PRESERVE_CONTEXT)
 141                image->preserve_context = 1;
 142
 143        ret = machine_kexec_prepare(image);
 144        if (ret)
 145                goto out;
 146
 147        for (i = 0; i < nr_segments; i++) {
 148                ret = kimage_load_segment(image, &image->segment[i]);
 149                if (ret)
 150                        goto out;
 151        }
 152
 153        kimage_terminate(image);
 154
 155        /* Install the new kernel and uninstall the old */
 156        image = xchg(dest_image, image);
 157
 158out:
 159        if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
 160                arch_kexec_protect_crashkres();
 161
 162        kimage_free(image);
 163        return ret;
 164}
 165
 166/*
 167 * Exec Kernel system call: for obvious reasons only root may call it.
 168 *
 169 * This call breaks up into three pieces.
 170 * - A generic part which loads the new kernel from the current
 171 *   address space, and very carefully places the data in the
 172 *   allocated pages.
 173 *
 174 * - A generic part that interacts with the kernel and tells all of
 175 *   the devices to shut down.  Preventing on-going dmas, and placing
 176 *   the devices in a consistent state so a later kernel can
 177 *   reinitialize them.
 178 *
 179 * - A machine specific part that includes the syscall number
 180 *   and then copies the image to it's final destination.  And
 181 *   jumps into the image at entry.
 182 *
 183 * kexec does not sync, or unmount filesystems so if you need
 184 * that to happen you need to do that yourself.
 185 */
 186
 187SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
 188                struct kexec_segment __user *, segments, unsigned long, flags)
 189{
 190        int result;
 191
 192        /* We only trust the superuser with rebooting the system. */
 193        if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
 194                return -EPERM;
 195
 196        /*
 197         * Verify we have a legal set of flags
 198         * This leaves us room for future extensions.
 199         */
 200        if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
 201                return -EINVAL;
 202
 203        /* Verify we are on the appropriate architecture */
 204        if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
 205                ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
 206                return -EINVAL;
 207
 208        /* Put an artificial cap on the number
 209         * of segments passed to kexec_load.
 210         */
 211        if (nr_segments > KEXEC_SEGMENT_MAX)
 212                return -EINVAL;
 213
 214        /* Because we write directly to the reserved memory
 215         * region when loading crash kernels we need a mutex here to
 216         * prevent multiple crash  kernels from attempting to load
 217         * simultaneously, and to prevent a crash kernel from loading
 218         * over the top of a in use crash kernel.
 219         *
 220         * KISS: always take the mutex.
 221         */
 222        if (!mutex_trylock(&kexec_mutex))
 223                return -EBUSY;
 224
 225        result = do_kexec_load(entry, nr_segments, segments, flags);
 226
 227        mutex_unlock(&kexec_mutex);
 228
 229        return result;
 230}
 231
 232#ifdef CONFIG_COMPAT
 233COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
 234                       compat_ulong_t, nr_segments,
 235                       struct compat_kexec_segment __user *, segments,
 236                       compat_ulong_t, flags)
 237{
 238        struct compat_kexec_segment in;
 239        struct kexec_segment out, __user *ksegments;
 240        unsigned long i, result;
 241
 242        /* Don't allow clients that don't understand the native
 243         * architecture to do anything.
 244         */
 245        if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
 246                return -EINVAL;
 247
 248        if (nr_segments > KEXEC_SEGMENT_MAX)
 249                return -EINVAL;
 250
 251        ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
 252        for (i = 0; i < nr_segments; i++) {
 253                result = copy_from_user(&in, &segments[i], sizeof(in));
 254                if (result)
 255                        return -EFAULT;
 256
 257                out.buf   = compat_ptr(in.buf);
 258                out.bufsz = in.bufsz;
 259                out.mem   = in.mem;
 260                out.memsz = in.memsz;
 261
 262                result = copy_to_user(&ksegments[i], &out, sizeof(out));
 263                if (result)
 264                        return -EFAULT;
 265        }
 266
 267        return sys_kexec_load(entry, nr_segments, ksegments, flags);
 268}
 269#endif
 270