linux/fs/binfmt_elf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * linux/fs/binfmt_elf.c
   4 *
   5 * These are the functions used to load ELF format executables as used
   6 * on SVr4 machines.  Information on the format may be found in the book
   7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   8 * Tools".
   9 *
  10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/kernel.h>
  15#include <linux/fs.h>
  16#include <linux/mm.h>
  17#include <linux/mman.h>
  18#include <linux/errno.h>
  19#include <linux/signal.h>
  20#include <linux/binfmts.h>
  21#include <linux/string.h>
  22#include <linux/file.h>
  23#include <linux/slab.h>
  24#include <linux/personality.h>
  25#include <linux/elfcore.h>
  26#include <linux/init.h>
  27#include <linux/highuid.h>
  28#include <linux/compiler.h>
  29#include <linux/highmem.h>
  30#include <linux/pagemap.h>
  31#include <linux/vmalloc.h>
  32#include <linux/security.h>
  33#include <linux/random.h>
  34#include <linux/elf.h>
  35#include <linux/elf-randomize.h>
  36#include <linux/utsname.h>
  37#include <linux/coredump.h>
  38#include <linux/sched.h>
  39#include <linux/sched/coredump.h>
  40#include <linux/sched/task_stack.h>
  41#include <linux/sched/cputime.h>
  42#include <linux/cred.h>
  43#include <linux/dax.h>
  44#include <linux/uaccess.h>
  45#include <asm/param.h>
  46#include <asm/page.h>
  47
  48#ifndef user_long_t
  49#define user_long_t long
  50#endif
  51#ifndef user_siginfo_t
  52#define user_siginfo_t siginfo_t
  53#endif
  54
  55/* That's for binfmt_elf_fdpic to deal with */
  56#ifndef elf_check_fdpic
  57#define elf_check_fdpic(ex) false
  58#endif
  59
  60static int load_elf_binary(struct linux_binprm *bprm);
  61
  62#ifdef CONFIG_USELIB
  63static int load_elf_library(struct file *);
  64#else
  65#define load_elf_library NULL
  66#endif
  67
  68/*
  69 * If we don't support core dumping, then supply a NULL so we
  70 * don't even try.
  71 */
  72#ifdef CONFIG_ELF_CORE
  73static int elf_core_dump(struct coredump_params *cprm);
  74#else
  75#define elf_core_dump   NULL
  76#endif
  77
  78#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  79#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  80#else
  81#define ELF_MIN_ALIGN   PAGE_SIZE
  82#endif
  83
  84#ifndef ELF_CORE_EFLAGS
  85#define ELF_CORE_EFLAGS 0
  86#endif
  87
  88#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  89#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  90#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  91
  92static struct linux_binfmt elf_format = {
  93        .module         = THIS_MODULE,
  94        .load_binary    = load_elf_binary,
  95        .load_shlib     = load_elf_library,
  96        .core_dump      = elf_core_dump,
  97        .min_coredump   = ELF_EXEC_PAGESIZE,
  98};
  99
 100#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
 101
 102static int set_brk(unsigned long start, unsigned long end, int prot)
 103{
 104        start = ELF_PAGEALIGN(start);
 105        end = ELF_PAGEALIGN(end);
 106        if (end > start) {
 107                /*
 108                 * Map the last of the bss segment.
 109                 * If the header is requesting these pages to be
 110                 * executable, honour that (ppc32 needs this).
 111                 */
 112                int error = vm_brk_flags(start, end - start,
 113                                prot & PROT_EXEC ? VM_EXEC : 0);
 114                if (error)
 115                        return error;
 116        }
 117        current->mm->start_brk = current->mm->brk = end;
 118        return 0;
 119}
 120
 121/* We need to explicitly zero any fractional pages
 122   after the data section (i.e. bss).  This would
 123   contain the junk from the file that should not
 124   be in memory
 125 */
 126static int padzero(unsigned long elf_bss)
 127{
 128        unsigned long nbyte;
 129
 130        nbyte = ELF_PAGEOFFSET(elf_bss);
 131        if (nbyte) {
 132                nbyte = ELF_MIN_ALIGN - nbyte;
 133                if (clear_user((void __user *) elf_bss, nbyte))
 134                        return -EFAULT;
 135        }
 136        return 0;
 137}
 138
 139/* Let's use some macros to make this stack manipulation a little clearer */
 140#ifdef CONFIG_STACK_GROWSUP
 141#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 142#define STACK_ROUND(sp, items) \
 143        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 144#define STACK_ALLOC(sp, len) ({ \
 145        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 146        old_sp; })
 147#else
 148#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 149#define STACK_ROUND(sp, items) \
 150        (((unsigned long) (sp - items)) &~ 15UL)
 151#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 152#endif
 153
 154#ifndef ELF_BASE_PLATFORM
 155/*
 156 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 157 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 158 * will be copied to the user stack in the same manner as AT_PLATFORM.
 159 */
 160#define ELF_BASE_PLATFORM NULL
 161#endif
 162
 163static int
 164create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 165                unsigned long load_addr, unsigned long interp_load_addr)
 166{
 167        unsigned long p = bprm->p;
 168        int argc = bprm->argc;
 169        int envc = bprm->envc;
 170        elf_addr_t __user *sp;
 171        elf_addr_t __user *u_platform;
 172        elf_addr_t __user *u_base_platform;
 173        elf_addr_t __user *u_rand_bytes;
 174        const char *k_platform = ELF_PLATFORM;
 175        const char *k_base_platform = ELF_BASE_PLATFORM;
 176        unsigned char k_rand_bytes[16];
 177        int items;
 178        elf_addr_t *elf_info;
 179        int ei_index = 0;
 180        const struct cred *cred = current_cred();
 181        struct vm_area_struct *vma;
 182
 183        /*
 184         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 185         * evictions by the processes running on the same package. One
 186         * thing we can do is to shuffle the initial stack for them.
 187         */
 188
 189        p = arch_align_stack(p);
 190
 191        /*
 192         * If this architecture has a platform capability string, copy it
 193         * to userspace.  In some cases (Sparc), this info is impossible
 194         * for userspace to get any other way, in others (i386) it is
 195         * merely difficult.
 196         */
 197        u_platform = NULL;
 198        if (k_platform) {
 199                size_t len = strlen(k_platform) + 1;
 200
 201                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 202                if (__copy_to_user(u_platform, k_platform, len))
 203                        return -EFAULT;
 204        }
 205
 206        /*
 207         * If this architecture has a "base" platform capability
 208         * string, copy it to userspace.
 209         */
 210        u_base_platform = NULL;
 211        if (k_base_platform) {
 212                size_t len = strlen(k_base_platform) + 1;
 213
 214                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 215                if (__copy_to_user(u_base_platform, k_base_platform, len))
 216                        return -EFAULT;
 217        }
 218
 219        /*
 220         * Generate 16 random bytes for userspace PRNG seeding.
 221         */
 222        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 223        u_rand_bytes = (elf_addr_t __user *)
 224                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 225        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 226                return -EFAULT;
 227
 228        /* Create the ELF interpreter info */
 229        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 230        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 231#define NEW_AUX_ENT(id, val) \
 232        do { \
 233                elf_info[ei_index++] = id; \
 234                elf_info[ei_index++] = val; \
 235        } while (0)
 236
 237#ifdef ARCH_DLINFO
 238        /* 
 239         * ARCH_DLINFO must come first so PPC can do its special alignment of
 240         * AUXV.
 241         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 242         * ARCH_DLINFO changes
 243         */
 244        ARCH_DLINFO;
 245#endif
 246        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 247        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 248        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 249        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 250        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 251        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 252        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 253        NEW_AUX_ENT(AT_FLAGS, 0);
 254        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 255        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 256        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 257        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 258        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 259        NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
 260        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 261#ifdef ELF_HWCAP2
 262        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 263#endif
 264        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 265        if (k_platform) {
 266                NEW_AUX_ENT(AT_PLATFORM,
 267                            (elf_addr_t)(unsigned long)u_platform);
 268        }
 269        if (k_base_platform) {
 270                NEW_AUX_ENT(AT_BASE_PLATFORM,
 271                            (elf_addr_t)(unsigned long)u_base_platform);
 272        }
 273        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 274                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 275        }
 276#undef NEW_AUX_ENT
 277        /* AT_NULL is zero; clear the rest too */
 278        memset(&elf_info[ei_index], 0,
 279               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 280
 281        /* And advance past the AT_NULL entry.  */
 282        ei_index += 2;
 283
 284        sp = STACK_ADD(p, ei_index);
 285
 286        items = (argc + 1) + (envc + 1) + 1;
 287        bprm->p = STACK_ROUND(sp, items);
 288
 289        /* Point sp at the lowest address on the stack */
 290#ifdef CONFIG_STACK_GROWSUP
 291        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 292        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 293#else
 294        sp = (elf_addr_t __user *)bprm->p;
 295#endif
 296
 297
 298        /*
 299         * Grow the stack manually; some architectures have a limit on how
 300         * far ahead a user-space access may be in order to grow the stack.
 301         */
 302        vma = find_extend_vma(current->mm, bprm->p);
 303        if (!vma)
 304                return -EFAULT;
 305
 306        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 307        if (__put_user(argc, sp++))
 308                return -EFAULT;
 309
 310        /* Populate list of argv pointers back to argv strings. */
 311        p = current->mm->arg_end = current->mm->arg_start;
 312        while (argc-- > 0) {
 313                size_t len;
 314                if (__put_user((elf_addr_t)p, sp++))
 315                        return -EFAULT;
 316                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 317                if (!len || len > MAX_ARG_STRLEN)
 318                        return -EINVAL;
 319                p += len;
 320        }
 321        if (__put_user(0, sp++))
 322                return -EFAULT;
 323        current->mm->arg_end = p;
 324
 325        /* Populate list of envp pointers back to envp strings. */
 326        current->mm->env_end = current->mm->env_start = p;
 327        while (envc-- > 0) {
 328                size_t len;
 329                if (__put_user((elf_addr_t)p, sp++))
 330                        return -EFAULT;
 331                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 332                if (!len || len > MAX_ARG_STRLEN)
 333                        return -EINVAL;
 334                p += len;
 335        }
 336        if (__put_user(0, sp++))
 337                return -EFAULT;
 338        current->mm->env_end = p;
 339
 340        /* Put the elf_info on the stack in the right place.  */
 341        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 342                return -EFAULT;
 343        return 0;
 344}
 345
 346#ifndef elf_map
 347
 348static unsigned long elf_map(struct file *filep, unsigned long addr,
 349                const struct elf_phdr *eppnt, int prot, int type,
 350                unsigned long total_size)
 351{
 352        unsigned long map_addr;
 353        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 354        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 355        addr = ELF_PAGESTART(addr);
 356        size = ELF_PAGEALIGN(size);
 357
 358        /* mmap() will return -EINVAL if given a zero size, but a
 359         * segment with zero filesize is perfectly valid */
 360        if (!size)
 361                return addr;
 362
 363        /*
 364        * total_size is the size of the ELF (interpreter) image.
 365        * The _first_ mmap needs to know the full size, otherwise
 366        * randomization might put this image into an overlapping
 367        * position with the ELF binary image. (since size < total_size)
 368        * So we first map the 'big' image - and unmap the remainder at
 369        * the end. (which unmap is needed for ELF images with holes.)
 370        */
 371        if (total_size) {
 372                total_size = ELF_PAGEALIGN(total_size);
 373                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 374                if (!BAD_ADDR(map_addr))
 375                        vm_munmap(map_addr+size, total_size-size);
 376        } else
 377                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 378
 379        if ((type & MAP_FIXED_NOREPLACE) &&
 380            PTR_ERR((void *)map_addr) == -EEXIST)
 381                pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
 382                        task_pid_nr(current), current->comm, (void *)addr);
 383
 384        return(map_addr);
 385}
 386
 387#endif /* !elf_map */
 388
 389static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
 390{
 391        int i, first_idx = -1, last_idx = -1;
 392
 393        for (i = 0; i < nr; i++) {
 394                if (cmds[i].p_type == PT_LOAD) {
 395                        last_idx = i;
 396                        if (first_idx == -1)
 397                                first_idx = i;
 398                }
 399        }
 400        if (first_idx == -1)
 401                return 0;
 402
 403        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 404                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 405}
 406
 407/**
 408 * load_elf_phdrs() - load ELF program headers
 409 * @elf_ex:   ELF header of the binary whose program headers should be loaded
 410 * @elf_file: the opened ELF binary file
 411 *
 412 * Loads ELF program headers from the binary file elf_file, which has the ELF
 413 * header pointed to by elf_ex, into a newly allocated array. The caller is
 414 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
 415 */
 416static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
 417                                       struct file *elf_file)
 418{
 419        struct elf_phdr *elf_phdata = NULL;
 420        int retval, err = -1;
 421        loff_t pos = elf_ex->e_phoff;
 422        unsigned int size;
 423
 424        /*
 425         * If the size of this structure has changed, then punt, since
 426         * we will be doing the wrong thing.
 427         */
 428        if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
 429                goto out;
 430
 431        /* Sanity check the number of program headers... */
 432        /* ...and their total size. */
 433        size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
 434        if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
 435                goto out;
 436
 437        elf_phdata = kmalloc(size, GFP_KERNEL);
 438        if (!elf_phdata)
 439                goto out;
 440
 441        /* Read in the program headers */
 442        retval = kernel_read(elf_file, elf_phdata, size, &pos);
 443        if (retval != size) {
 444                err = (retval < 0) ? retval : -EIO;
 445                goto out;
 446        }
 447
 448        /* Success! */
 449        err = 0;
 450out:
 451        if (err) {
 452                kfree(elf_phdata);
 453                elf_phdata = NULL;
 454        }
 455        return elf_phdata;
 456}
 457
 458#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
 459
 460/**
 461 * struct arch_elf_state - arch-specific ELF loading state
 462 *
 463 * This structure is used to preserve architecture specific data during
 464 * the loading of an ELF file, throughout the checking of architecture
 465 * specific ELF headers & through to the point where the ELF load is
 466 * known to be proceeding (ie. SET_PERSONALITY).
 467 *
 468 * This implementation is a dummy for architectures which require no
 469 * specific state.
 470 */
 471struct arch_elf_state {
 472};
 473
 474#define INIT_ARCH_ELF_STATE {}
 475
 476/**
 477 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
 478 * @ehdr:       The main ELF header
 479 * @phdr:       The program header to check
 480 * @elf:        The open ELF file
 481 * @is_interp:  True if the phdr is from the interpreter of the ELF being
 482 *              loaded, else false.
 483 * @state:      Architecture-specific state preserved throughout the process
 484 *              of loading the ELF.
 485 *
 486 * Inspects the program header phdr to validate its correctness and/or
 487 * suitability for the system. Called once per ELF program header in the
 488 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
 489 * interpreter.
 490 *
 491 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 492 *         with that return code.
 493 */
 494static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
 495                                   struct elf_phdr *phdr,
 496                                   struct file *elf, bool is_interp,
 497                                   struct arch_elf_state *state)
 498{
 499        /* Dummy implementation, always proceed */
 500        return 0;
 501}
 502
 503/**
 504 * arch_check_elf() - check an ELF executable
 505 * @ehdr:       The main ELF header
 506 * @has_interp: True if the ELF has an interpreter, else false.
 507 * @interp_ehdr: The interpreter's ELF header
 508 * @state:      Architecture-specific state preserved throughout the process
 509 *              of loading the ELF.
 510 *
 511 * Provides a final opportunity for architecture code to reject the loading
 512 * of the ELF & cause an exec syscall to return an error. This is called after
 513 * all program headers to be checked by arch_elf_pt_proc have been.
 514 *
 515 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 516 *         with that return code.
 517 */
 518static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
 519                                 struct elfhdr *interp_ehdr,
 520                                 struct arch_elf_state *state)
 521{
 522        /* Dummy implementation, always proceed */
 523        return 0;
 524}
 525
 526#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
 527
 528static inline int make_prot(u32 p_flags)
 529{
 530        int prot = 0;
 531
 532        if (p_flags & PF_R)
 533                prot |= PROT_READ;
 534        if (p_flags & PF_W)
 535                prot |= PROT_WRITE;
 536        if (p_flags & PF_X)
 537                prot |= PROT_EXEC;
 538        return prot;
 539}
 540
 541/* This is much more generalized than the library routine read function,
 542   so we keep this separate.  Technically the library read function
 543   is only provided so that we can read a.out libraries that have
 544   an ELF header */
 545
 546static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 547                struct file *interpreter, unsigned long *interp_map_addr,
 548                unsigned long no_base, struct elf_phdr *interp_elf_phdata)
 549{
 550        struct elf_phdr *eppnt;
 551        unsigned long load_addr = 0;
 552        int load_addr_set = 0;
 553        unsigned long last_bss = 0, elf_bss = 0;
 554        int bss_prot = 0;
 555        unsigned long error = ~0UL;
 556        unsigned long total_size;
 557        int i;
 558
 559        /* First of all, some simple consistency checks */
 560        if (interp_elf_ex->e_type != ET_EXEC &&
 561            interp_elf_ex->e_type != ET_DYN)
 562                goto out;
 563        if (!elf_check_arch(interp_elf_ex) ||
 564            elf_check_fdpic(interp_elf_ex))
 565                goto out;
 566        if (!interpreter->f_op->mmap)
 567                goto out;
 568
 569        total_size = total_mapping_size(interp_elf_phdata,
 570                                        interp_elf_ex->e_phnum);
 571        if (!total_size) {
 572                error = -EINVAL;
 573                goto out;
 574        }
 575
 576        eppnt = interp_elf_phdata;
 577        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 578                if (eppnt->p_type == PT_LOAD) {
 579                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 580                        int elf_prot = make_prot(eppnt->p_flags);
 581                        unsigned long vaddr = 0;
 582                        unsigned long k, map_addr;
 583
 584                        vaddr = eppnt->p_vaddr;
 585                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 586                                elf_type |= MAP_FIXED_NOREPLACE;
 587                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 588                                load_addr = -vaddr;
 589
 590                        map_addr = elf_map(interpreter, load_addr + vaddr,
 591                                        eppnt, elf_prot, elf_type, total_size);
 592                        total_size = 0;
 593                        if (!*interp_map_addr)
 594                                *interp_map_addr = map_addr;
 595                        error = map_addr;
 596                        if (BAD_ADDR(map_addr))
 597                                goto out;
 598
 599                        if (!load_addr_set &&
 600                            interp_elf_ex->e_type == ET_DYN) {
 601                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 602                                load_addr_set = 1;
 603                        }
 604
 605                        /*
 606                         * Check to see if the section's size will overflow the
 607                         * allowed task size. Note that p_filesz must always be
 608                         * <= p_memsize so it's only necessary to check p_memsz.
 609                         */
 610                        k = load_addr + eppnt->p_vaddr;
 611                        if (BAD_ADDR(k) ||
 612                            eppnt->p_filesz > eppnt->p_memsz ||
 613                            eppnt->p_memsz > TASK_SIZE ||
 614                            TASK_SIZE - eppnt->p_memsz < k) {
 615                                error = -ENOMEM;
 616                                goto out;
 617                        }
 618
 619                        /*
 620                         * Find the end of the file mapping for this phdr, and
 621                         * keep track of the largest address we see for this.
 622                         */
 623                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 624                        if (k > elf_bss)
 625                                elf_bss = k;
 626
 627                        /*
 628                         * Do the same thing for the memory mapping - between
 629                         * elf_bss and last_bss is the bss section.
 630                         */
 631                        k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
 632                        if (k > last_bss) {
 633                                last_bss = k;
 634                                bss_prot = elf_prot;
 635                        }
 636                }
 637        }
 638
 639        /*
 640         * Now fill out the bss section: first pad the last page from
 641         * the file up to the page boundary, and zero it from elf_bss
 642         * up to the end of the page.
 643         */
 644        if (padzero(elf_bss)) {
 645                error = -EFAULT;
 646                goto out;
 647        }
 648        /*
 649         * Next, align both the file and mem bss up to the page size,
 650         * since this is where elf_bss was just zeroed up to, and where
 651         * last_bss will end after the vm_brk_flags() below.
 652         */
 653        elf_bss = ELF_PAGEALIGN(elf_bss);
 654        last_bss = ELF_PAGEALIGN(last_bss);
 655        /* Finally, if there is still more bss to allocate, do it. */
 656        if (last_bss > elf_bss) {
 657                error = vm_brk_flags(elf_bss, last_bss - elf_bss,
 658                                bss_prot & PROT_EXEC ? VM_EXEC : 0);
 659                if (error)
 660                        goto out;
 661        }
 662
 663        error = load_addr;
 664out:
 665        return error;
 666}
 667
 668/*
 669 * These are the functions used to load ELF style executables and shared
 670 * libraries.  There is no binary dependent code anywhere else.
 671 */
 672
 673#ifndef STACK_RND_MASK
 674#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 675#endif
 676
 677static unsigned long randomize_stack_top(unsigned long stack_top)
 678{
 679        unsigned long random_variable = 0;
 680
 681        if (current->flags & PF_RANDOMIZE) {
 682                random_variable = get_random_long();
 683                random_variable &= STACK_RND_MASK;
 684                random_variable <<= PAGE_SHIFT;
 685        }
 686#ifdef CONFIG_STACK_GROWSUP
 687        return PAGE_ALIGN(stack_top) + random_variable;
 688#else
 689        return PAGE_ALIGN(stack_top) - random_variable;
 690#endif
 691}
 692
 693static int load_elf_binary(struct linux_binprm *bprm)
 694{
 695        struct file *interpreter = NULL; /* to shut gcc up */
 696        unsigned long load_addr = 0, load_bias = 0;
 697        int load_addr_set = 0;
 698        unsigned long error;
 699        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
 700        unsigned long elf_bss, elf_brk;
 701        int bss_prot = 0;
 702        int retval, i;
 703        unsigned long elf_entry;
 704        unsigned long interp_load_addr = 0;
 705        unsigned long start_code, end_code, start_data, end_data;
 706        unsigned long reloc_func_desc __maybe_unused = 0;
 707        int executable_stack = EXSTACK_DEFAULT;
 708        struct {
 709                struct elfhdr elf_ex;
 710                struct elfhdr interp_elf_ex;
 711        } *loc;
 712        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
 713        struct pt_regs *regs;
 714
 715        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 716        if (!loc) {
 717                retval = -ENOMEM;
 718                goto out_ret;
 719        }
 720        
 721        /* Get the exec-header */
 722        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 723
 724        retval = -ENOEXEC;
 725        /* First of all, some simple consistency checks */
 726        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 727                goto out;
 728
 729        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 730                goto out;
 731        if (!elf_check_arch(&loc->elf_ex))
 732                goto out;
 733        if (elf_check_fdpic(&loc->elf_ex))
 734                goto out;
 735        if (!bprm->file->f_op->mmap)
 736                goto out;
 737
 738        elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
 739        if (!elf_phdata)
 740                goto out;
 741
 742        elf_ppnt = elf_phdata;
 743        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 744                char *elf_interpreter;
 745                loff_t pos;
 746
 747                if (elf_ppnt->p_type != PT_INTERP)
 748                        continue;
 749
 750                /*
 751                 * This is the program interpreter used for shared libraries -
 752                 * for now assume that this is an a.out format binary.
 753                 */
 754                retval = -ENOEXEC;
 755                if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
 756                        goto out_free_ph;
 757
 758                retval = -ENOMEM;
 759                elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
 760                if (!elf_interpreter)
 761                        goto out_free_ph;
 762
 763                pos = elf_ppnt->p_offset;
 764                retval = kernel_read(bprm->file, elf_interpreter,
 765                                     elf_ppnt->p_filesz, &pos);
 766                if (retval != elf_ppnt->p_filesz) {
 767                        if (retval >= 0)
 768                                retval = -EIO;
 769                        goto out_free_interp;
 770                }
 771                /* make sure path is NULL terminated */
 772                retval = -ENOEXEC;
 773                if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 774                        goto out_free_interp;
 775
 776                interpreter = open_exec(elf_interpreter);
 777                kfree(elf_interpreter);
 778                retval = PTR_ERR(interpreter);
 779                if (IS_ERR(interpreter))
 780                        goto out_free_ph;
 781
 782                /*
 783                 * If the binary is not readable then enforce mm->dumpable = 0
 784                 * regardless of the interpreter's permissions.
 785                 */
 786                would_dump(bprm, interpreter);
 787
 788                /* Get the exec headers */
 789                pos = 0;
 790                retval = kernel_read(interpreter, &loc->interp_elf_ex,
 791                                     sizeof(loc->interp_elf_ex), &pos);
 792                if (retval != sizeof(loc->interp_elf_ex)) {
 793                        if (retval >= 0)
 794                                retval = -EIO;
 795                        goto out_free_dentry;
 796                }
 797
 798                break;
 799
 800out_free_interp:
 801                kfree(elf_interpreter);
 802                goto out_free_ph;
 803        }
 804
 805        elf_ppnt = elf_phdata;
 806        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 807                switch (elf_ppnt->p_type) {
 808                case PT_GNU_STACK:
 809                        if (elf_ppnt->p_flags & PF_X)
 810                                executable_stack = EXSTACK_ENABLE_X;
 811                        else
 812                                executable_stack = EXSTACK_DISABLE_X;
 813                        break;
 814
 815                case PT_LOPROC ... PT_HIPROC:
 816                        retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
 817                                                  bprm->file, false,
 818                                                  &arch_state);
 819                        if (retval)
 820                                goto out_free_dentry;
 821                        break;
 822                }
 823
 824        /* Some simple consistency checks for the interpreter */
 825        if (interpreter) {
 826                retval = -ELIBBAD;
 827                /* Not an ELF interpreter */
 828                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 829                        goto out_free_dentry;
 830                /* Verify the interpreter has a valid arch */
 831                if (!elf_check_arch(&loc->interp_elf_ex) ||
 832                    elf_check_fdpic(&loc->interp_elf_ex))
 833                        goto out_free_dentry;
 834
 835                /* Load the interpreter program headers */
 836                interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
 837                                                   interpreter);
 838                if (!interp_elf_phdata)
 839                        goto out_free_dentry;
 840
 841                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
 842                elf_ppnt = interp_elf_phdata;
 843                for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
 844                        switch (elf_ppnt->p_type) {
 845                        case PT_LOPROC ... PT_HIPROC:
 846                                retval = arch_elf_pt_proc(&loc->interp_elf_ex,
 847                                                          elf_ppnt, interpreter,
 848                                                          true, &arch_state);
 849                                if (retval)
 850                                        goto out_free_dentry;
 851                                break;
 852                        }
 853        }
 854
 855        /*
 856         * Allow arch code to reject the ELF at this point, whilst it's
 857         * still possible to return an error to the code that invoked
 858         * the exec syscall.
 859         */
 860        retval = arch_check_elf(&loc->elf_ex,
 861                                !!interpreter, &loc->interp_elf_ex,
 862                                &arch_state);
 863        if (retval)
 864                goto out_free_dentry;
 865
 866        /* Flush all traces of the currently running executable */
 867        retval = flush_old_exec(bprm);
 868        if (retval)
 869                goto out_free_dentry;
 870
 871        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 872           may depend on the personality.  */
 873        SET_PERSONALITY2(loc->elf_ex, &arch_state);
 874        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 875                current->personality |= READ_IMPLIES_EXEC;
 876
 877        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 878                current->flags |= PF_RANDOMIZE;
 879
 880        setup_new_exec(bprm);
 881        install_exec_creds(bprm);
 882
 883        /* Do this so that we can load the interpreter, if need be.  We will
 884           change some of these later */
 885        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 886                                 executable_stack);
 887        if (retval < 0)
 888                goto out_free_dentry;
 889        
 890        elf_bss = 0;
 891        elf_brk = 0;
 892
 893        start_code = ~0UL;
 894        end_code = 0;
 895        start_data = 0;
 896        end_data = 0;
 897
 898        /* Now we do a little grungy work by mmapping the ELF image into
 899           the correct location in memory. */
 900        for(i = 0, elf_ppnt = elf_phdata;
 901            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 902                int elf_prot, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
 903                unsigned long k, vaddr;
 904                unsigned long total_size = 0;
 905
 906                if (elf_ppnt->p_type != PT_LOAD)
 907                        continue;
 908
 909                if (unlikely (elf_brk > elf_bss)) {
 910                        unsigned long nbyte;
 911                    
 912                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 913                           before this one. Map anonymous pages, if needed,
 914                           and clear the area.  */
 915                        retval = set_brk(elf_bss + load_bias,
 916                                         elf_brk + load_bias,
 917                                         bss_prot);
 918                        if (retval)
 919                                goto out_free_dentry;
 920                        nbyte = ELF_PAGEOFFSET(elf_bss);
 921                        if (nbyte) {
 922                                nbyte = ELF_MIN_ALIGN - nbyte;
 923                                if (nbyte > elf_brk - elf_bss)
 924                                        nbyte = elf_brk - elf_bss;
 925                                if (clear_user((void __user *)elf_bss +
 926                                                        load_bias, nbyte)) {
 927                                        /*
 928                                         * This bss-zeroing can fail if the ELF
 929                                         * file specifies odd protections. So
 930                                         * we don't check the return value
 931                                         */
 932                                }
 933                        }
 934
 935                        /*
 936                         * Some binaries have overlapping elf segments and then
 937                         * we have to forcefully map over an existing mapping
 938                         * e.g. over this newly established brk mapping.
 939                         */
 940                        elf_fixed = MAP_FIXED;
 941                }
 942
 943                elf_prot = make_prot(elf_ppnt->p_flags);
 944
 945                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 946
 947                vaddr = elf_ppnt->p_vaddr;
 948                /*
 949                 * If we are loading ET_EXEC or we have already performed
 950                 * the ET_DYN load_addr calculations, proceed normally.
 951                 */
 952                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 953                        elf_flags |= elf_fixed;
 954                } else if (loc->elf_ex.e_type == ET_DYN) {
 955                        /*
 956                         * This logic is run once for the first LOAD Program
 957                         * Header for ET_DYN binaries to calculate the
 958                         * randomization (load_bias) for all the LOAD
 959                         * Program Headers, and to calculate the entire
 960                         * size of the ELF mapping (total_size). (Note that
 961                         * load_addr_set is set to true later once the
 962                         * initial mapping is performed.)
 963                         *
 964                         * There are effectively two types of ET_DYN
 965                         * binaries: programs (i.e. PIE: ET_DYN with INTERP)
 966                         * and loaders (ET_DYN without INTERP, since they
 967                         * _are_ the ELF interpreter). The loaders must
 968                         * be loaded away from programs since the program
 969                         * may otherwise collide with the loader (especially
 970                         * for ET_EXEC which does not have a randomized
 971                         * position). For example to handle invocations of
 972                         * "./ld.so someprog" to test out a new version of
 973                         * the loader, the subsequent program that the
 974                         * loader loads must avoid the loader itself, so
 975                         * they cannot share the same load range. Sufficient
 976                         * room for the brk must be allocated with the
 977                         * loader as well, since brk must be available with
 978                         * the loader.
 979                         *
 980                         * Therefore, programs are loaded offset from
 981                         * ELF_ET_DYN_BASE and loaders are loaded into the
 982                         * independently randomized mmap region (0 load_bias
 983                         * without MAP_FIXED).
 984                         */
 985                        if (interpreter) {
 986                                load_bias = ELF_ET_DYN_BASE;
 987                                if (current->flags & PF_RANDOMIZE)
 988                                        load_bias += arch_mmap_rnd();
 989                                elf_flags |= elf_fixed;
 990                        } else
 991                                load_bias = 0;
 992
 993                        /*
 994                         * Since load_bias is used for all subsequent loading
 995                         * calculations, we must lower it by the first vaddr
 996                         * so that the remaining calculations based on the
 997                         * ELF vaddrs will be correctly offset. The result
 998                         * is then page aligned.
 999                         */
1000                        load_bias = ELF_PAGESTART(load_bias - vaddr);
1001
1002                        total_size = total_mapping_size(elf_phdata,
1003                                                        loc->elf_ex.e_phnum);
1004                        if (!total_size) {
1005                                retval = -EINVAL;
1006                                goto out_free_dentry;
1007                        }
1008                }
1009
1010                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1011                                elf_prot, elf_flags, total_size);
1012                if (BAD_ADDR(error)) {
1013                        retval = IS_ERR((void *)error) ?
1014                                PTR_ERR((void*)error) : -EINVAL;
1015                        goto out_free_dentry;
1016                }
1017
1018                if (!load_addr_set) {
1019                        load_addr_set = 1;
1020                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1021                        if (loc->elf_ex.e_type == ET_DYN) {
1022                                load_bias += error -
1023                                             ELF_PAGESTART(load_bias + vaddr);
1024                                load_addr += load_bias;
1025                                reloc_func_desc = load_bias;
1026                        }
1027                }
1028                k = elf_ppnt->p_vaddr;
1029                if (k < start_code)
1030                        start_code = k;
1031                if (start_data < k)
1032                        start_data = k;
1033
1034                /*
1035                 * Check to see if the section's size will overflow the
1036                 * allowed task size. Note that p_filesz must always be
1037                 * <= p_memsz so it is only necessary to check p_memsz.
1038                 */
1039                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1040                    elf_ppnt->p_memsz > TASK_SIZE ||
1041                    TASK_SIZE - elf_ppnt->p_memsz < k) {
1042                        /* set_brk can never work. Avoid overflows. */
1043                        retval = -EINVAL;
1044                        goto out_free_dentry;
1045                }
1046
1047                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1048
1049                if (k > elf_bss)
1050                        elf_bss = k;
1051                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1052                        end_code = k;
1053                if (end_data < k)
1054                        end_data = k;
1055                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1056                if (k > elf_brk) {
1057                        bss_prot = elf_prot;
1058                        elf_brk = k;
1059                }
1060        }
1061
1062        loc->elf_ex.e_entry += load_bias;
1063        elf_bss += load_bias;
1064        elf_brk += load_bias;
1065        start_code += load_bias;
1066        end_code += load_bias;
1067        start_data += load_bias;
1068        end_data += load_bias;
1069
1070        /* Calling set_brk effectively mmaps the pages that we need
1071         * for the bss and break sections.  We must do this before
1072         * mapping in the interpreter, to make sure it doesn't wind
1073         * up getting placed where the bss needs to go.
1074         */
1075        retval = set_brk(elf_bss, elf_brk, bss_prot);
1076        if (retval)
1077                goto out_free_dentry;
1078        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1079                retval = -EFAULT; /* Nobody gets to see this, but.. */
1080                goto out_free_dentry;
1081        }
1082
1083        if (interpreter) {
1084                unsigned long interp_map_addr = 0;
1085
1086                elf_entry = load_elf_interp(&loc->interp_elf_ex,
1087                                            interpreter,
1088                                            &interp_map_addr,
1089                                            load_bias, interp_elf_phdata);
1090                if (!IS_ERR((void *)elf_entry)) {
1091                        /*
1092                         * load_elf_interp() returns relocation
1093                         * adjustment
1094                         */
1095                        interp_load_addr = elf_entry;
1096                        elf_entry += loc->interp_elf_ex.e_entry;
1097                }
1098                if (BAD_ADDR(elf_entry)) {
1099                        retval = IS_ERR((void *)elf_entry) ?
1100                                        (int)elf_entry : -EINVAL;
1101                        goto out_free_dentry;
1102                }
1103                reloc_func_desc = interp_load_addr;
1104
1105                allow_write_access(interpreter);
1106                fput(interpreter);
1107        } else {
1108                elf_entry = loc->elf_ex.e_entry;
1109                if (BAD_ADDR(elf_entry)) {
1110                        retval = -EINVAL;
1111                        goto out_free_dentry;
1112                }
1113        }
1114
1115        kfree(interp_elf_phdata);
1116        kfree(elf_phdata);
1117
1118        set_binfmt(&elf_format);
1119
1120#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1121        retval = arch_setup_additional_pages(bprm, !!interpreter);
1122        if (retval < 0)
1123                goto out;
1124#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1125
1126        retval = create_elf_tables(bprm, &loc->elf_ex,
1127                          load_addr, interp_load_addr);
1128        if (retval < 0)
1129                goto out;
1130        current->mm->end_code = end_code;
1131        current->mm->start_code = start_code;
1132        current->mm->start_data = start_data;
1133        current->mm->end_data = end_data;
1134        current->mm->start_stack = bprm->p;
1135
1136        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1137                /*
1138                 * For architectures with ELF randomization, when executing
1139                 * a loader directly (i.e. no interpreter listed in ELF
1140                 * headers), move the brk area out of the mmap region
1141                 * (since it grows up, and may collide early with the stack
1142                 * growing down), and into the unused ELF_ET_DYN_BASE region.
1143                 */
1144                if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && !interpreter)
1145                        current->mm->brk = current->mm->start_brk =
1146                                ELF_ET_DYN_BASE;
1147
1148                current->mm->brk = current->mm->start_brk =
1149                        arch_randomize_brk(current->mm);
1150#ifdef compat_brk_randomized
1151                current->brk_randomized = 1;
1152#endif
1153        }
1154
1155        if (current->personality & MMAP_PAGE_ZERO) {
1156                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1157                   and some applications "depend" upon this behavior.
1158                   Since we do not have the power to recompile these, we
1159                   emulate the SVr4 behavior. Sigh. */
1160                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1161                                MAP_FIXED | MAP_PRIVATE, 0);
1162        }
1163
1164        regs = current_pt_regs();
1165#ifdef ELF_PLAT_INIT
1166        /*
1167         * The ABI may specify that certain registers be set up in special
1168         * ways (on i386 %edx is the address of a DT_FINI function, for
1169         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1170         * that the e_entry field is the address of the function descriptor
1171         * for the startup routine, rather than the address of the startup
1172         * routine itself.  This macro performs whatever initialization to
1173         * the regs structure is required as well as any relocations to the
1174         * function descriptor entries when executing dynamically links apps.
1175         */
1176        ELF_PLAT_INIT(regs, reloc_func_desc);
1177#endif
1178
1179        finalize_exec(bprm);
1180        start_thread(regs, elf_entry, bprm->p);
1181        retval = 0;
1182out:
1183        kfree(loc);
1184out_ret:
1185        return retval;
1186
1187        /* error cleanup */
1188out_free_dentry:
1189        kfree(interp_elf_phdata);
1190        allow_write_access(interpreter);
1191        if (interpreter)
1192                fput(interpreter);
1193out_free_ph:
1194        kfree(elf_phdata);
1195        goto out;
1196}
1197
1198#ifdef CONFIG_USELIB
1199/* This is really simpleminded and specialized - we are loading an
1200   a.out library that is given an ELF header. */
1201static int load_elf_library(struct file *file)
1202{
1203        struct elf_phdr *elf_phdata;
1204        struct elf_phdr *eppnt;
1205        unsigned long elf_bss, bss, len;
1206        int retval, error, i, j;
1207        struct elfhdr elf_ex;
1208        loff_t pos = 0;
1209
1210        error = -ENOEXEC;
1211        retval = kernel_read(file, &elf_ex, sizeof(elf_ex), &pos);
1212        if (retval != sizeof(elf_ex))
1213                goto out;
1214
1215        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1216                goto out;
1217
1218        /* First of all, some simple consistency checks */
1219        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1220            !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1221                goto out;
1222        if (elf_check_fdpic(&elf_ex))
1223                goto out;
1224
1225        /* Now read in all of the header information */
1226
1227        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1228        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1229
1230        error = -ENOMEM;
1231        elf_phdata = kmalloc(j, GFP_KERNEL);
1232        if (!elf_phdata)
1233                goto out;
1234
1235        eppnt = elf_phdata;
1236        error = -ENOEXEC;
1237        pos =  elf_ex.e_phoff;
1238        retval = kernel_read(file, eppnt, j, &pos);
1239        if (retval != j)
1240                goto out_free_ph;
1241
1242        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1243                if ((eppnt + i)->p_type == PT_LOAD)
1244                        j++;
1245        if (j != 1)
1246                goto out_free_ph;
1247
1248        while (eppnt->p_type != PT_LOAD)
1249                eppnt++;
1250
1251        /* Now use mmap to map the library into memory. */
1252        error = vm_mmap(file,
1253                        ELF_PAGESTART(eppnt->p_vaddr),
1254                        (eppnt->p_filesz +
1255                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1256                        PROT_READ | PROT_WRITE | PROT_EXEC,
1257                        MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1258                        (eppnt->p_offset -
1259                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1260        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1261                goto out_free_ph;
1262
1263        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1264        if (padzero(elf_bss)) {
1265                error = -EFAULT;
1266                goto out_free_ph;
1267        }
1268
1269        len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1270        bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1271        if (bss > len) {
1272                error = vm_brk(len, bss - len);
1273                if (error)
1274                        goto out_free_ph;
1275        }
1276        error = 0;
1277
1278out_free_ph:
1279        kfree(elf_phdata);
1280out:
1281        return error;
1282}
1283#endif /* #ifdef CONFIG_USELIB */
1284
1285#ifdef CONFIG_ELF_CORE
1286/*
1287 * ELF core dumper
1288 *
1289 * Modelled on fs/exec.c:aout_core_dump()
1290 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1291 */
1292
1293/*
1294 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1295 * that are useful for post-mortem analysis are included in every core dump.
1296 * In that way we ensure that the core dump is fully interpretable later
1297 * without matching up the same kernel and hardware config to see what PC values
1298 * meant. These special mappings include - vDSO, vsyscall, and other
1299 * architecture specific mappings
1300 */
1301static bool always_dump_vma(struct vm_area_struct *vma)
1302{
1303        /* Any vsyscall mappings? */
1304        if (vma == get_gate_vma(vma->vm_mm))
1305                return true;
1306
1307        /*
1308         * Assume that all vmas with a .name op should always be dumped.
1309         * If this changes, a new vm_ops field can easily be added.
1310         */
1311        if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1312                return true;
1313
1314        /*
1315         * arch_vma_name() returns non-NULL for special architecture mappings,
1316         * such as vDSO sections.
1317         */
1318        if (arch_vma_name(vma))
1319                return true;
1320
1321        return false;
1322}
1323
1324/*
1325 * Decide what to dump of a segment, part, all or none.
1326 */
1327static unsigned long vma_dump_size(struct vm_area_struct *vma,
1328                                   unsigned long mm_flags)
1329{
1330#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1331
1332        /* always dump the vdso and vsyscall sections */
1333        if (always_dump_vma(vma))
1334                goto whole;
1335
1336        if (vma->vm_flags & VM_DONTDUMP)
1337                return 0;
1338
1339        /* support for DAX */
1340        if (vma_is_dax(vma)) {
1341                if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1342                        goto whole;
1343                if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1344                        goto whole;
1345                return 0;
1346        }
1347
1348        /* Hugetlb memory check */
1349        if (vma->vm_flags & VM_HUGETLB) {
1350                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1351                        goto whole;
1352                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1353                        goto whole;
1354                return 0;
1355        }
1356
1357        /* Do not dump I/O mapped devices or special mappings */
1358        if (vma->vm_flags & VM_IO)
1359                return 0;
1360
1361        /* By default, dump shared memory if mapped from an anonymous file. */
1362        if (vma->vm_flags & VM_SHARED) {
1363                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1364                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1365                        goto whole;
1366                return 0;
1367        }
1368
1369        /* Dump segments that have been written to.  */
1370        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1371                goto whole;
1372        if (vma->vm_file == NULL)
1373                return 0;
1374
1375        if (FILTER(MAPPED_PRIVATE))
1376                goto whole;
1377
1378        /*
1379         * If this looks like the beginning of a DSO or executable mapping,
1380         * check for an ELF header.  If we find one, dump the first page to
1381         * aid in determining what was mapped here.
1382         */
1383        if (FILTER(ELF_HEADERS) &&
1384            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1385                u32 __user *header = (u32 __user *) vma->vm_start;
1386                u32 word;
1387                mm_segment_t fs = get_fs();
1388                /*
1389                 * Doing it this way gets the constant folded by GCC.
1390                 */
1391                union {
1392                        u32 cmp;
1393                        char elfmag[SELFMAG];
1394                } magic;
1395                BUILD_BUG_ON(SELFMAG != sizeof word);
1396                magic.elfmag[EI_MAG0] = ELFMAG0;
1397                magic.elfmag[EI_MAG1] = ELFMAG1;
1398                magic.elfmag[EI_MAG2] = ELFMAG2;
1399                magic.elfmag[EI_MAG3] = ELFMAG3;
1400                /*
1401                 * Switch to the user "segment" for get_user(),
1402                 * then put back what elf_core_dump() had in place.
1403                 */
1404                set_fs(USER_DS);
1405                if (unlikely(get_user(word, header)))
1406                        word = 0;
1407                set_fs(fs);
1408                if (word == magic.cmp)
1409                        return PAGE_SIZE;
1410        }
1411
1412#undef  FILTER
1413
1414        return 0;
1415
1416whole:
1417        return vma->vm_end - vma->vm_start;
1418}
1419
1420/* An ELF note in memory */
1421struct memelfnote
1422{
1423        const char *name;
1424        int type;
1425        unsigned int datasz;
1426        void *data;
1427};
1428
1429static int notesize(struct memelfnote *en)
1430{
1431        int sz;
1432
1433        sz = sizeof(struct elf_note);
1434        sz += roundup(strlen(en->name) + 1, 4);
1435        sz += roundup(en->datasz, 4);
1436
1437        return sz;
1438}
1439
1440static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1441{
1442        struct elf_note en;
1443        en.n_namesz = strlen(men->name) + 1;
1444        en.n_descsz = men->datasz;
1445        en.n_type = men->type;
1446
1447        return dump_emit(cprm, &en, sizeof(en)) &&
1448            dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1449            dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1450}
1451
1452static void fill_elf_header(struct elfhdr *elf, int segs,
1453                            u16 machine, u32 flags)
1454{
1455        memset(elf, 0, sizeof(*elf));
1456
1457        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1458        elf->e_ident[EI_CLASS] = ELF_CLASS;
1459        elf->e_ident[EI_DATA] = ELF_DATA;
1460        elf->e_ident[EI_VERSION] = EV_CURRENT;
1461        elf->e_ident[EI_OSABI] = ELF_OSABI;
1462
1463        elf->e_type = ET_CORE;
1464        elf->e_machine = machine;
1465        elf->e_version = EV_CURRENT;
1466        elf->e_phoff = sizeof(struct elfhdr);
1467        elf->e_flags = flags;
1468        elf->e_ehsize = sizeof(struct elfhdr);
1469        elf->e_phentsize = sizeof(struct elf_phdr);
1470        elf->e_phnum = segs;
1471}
1472
1473static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1474{
1475        phdr->p_type = PT_NOTE;
1476        phdr->p_offset = offset;
1477        phdr->p_vaddr = 0;
1478        phdr->p_paddr = 0;
1479        phdr->p_filesz = sz;
1480        phdr->p_memsz = 0;
1481        phdr->p_flags = 0;
1482        phdr->p_align = 0;
1483}
1484
1485static void fill_note(struct memelfnote *note, const char *name, int type, 
1486                unsigned int sz, void *data)
1487{
1488        note->name = name;
1489        note->type = type;
1490        note->datasz = sz;
1491        note->data = data;
1492}
1493
1494/*
1495 * fill up all the fields in prstatus from the given task struct, except
1496 * registers which need to be filled up separately.
1497 */
1498static void fill_prstatus(struct elf_prstatus *prstatus,
1499                struct task_struct *p, long signr)
1500{
1501        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1502        prstatus->pr_sigpend = p->pending.signal.sig[0];
1503        prstatus->pr_sighold = p->blocked.sig[0];
1504        rcu_read_lock();
1505        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1506        rcu_read_unlock();
1507        prstatus->pr_pid = task_pid_vnr(p);
1508        prstatus->pr_pgrp = task_pgrp_vnr(p);
1509        prstatus->pr_sid = task_session_vnr(p);
1510        if (thread_group_leader(p)) {
1511                struct task_cputime cputime;
1512
1513                /*
1514                 * This is the record for the group leader.  It shows the
1515                 * group-wide total, not its individual thread total.
1516                 */
1517                thread_group_cputime(p, &cputime);
1518                prstatus->pr_utime = ns_to_timeval(cputime.utime);
1519                prstatus->pr_stime = ns_to_timeval(cputime.stime);
1520        } else {
1521                u64 utime, stime;
1522
1523                task_cputime(p, &utime, &stime);
1524                prstatus->pr_utime = ns_to_timeval(utime);
1525                prstatus->pr_stime = ns_to_timeval(stime);
1526        }
1527
1528        prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1529        prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1530}
1531
1532static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1533                       struct mm_struct *mm)
1534{
1535        const struct cred *cred;
1536        unsigned int i, len;
1537        
1538        /* first copy the parameters from user space */
1539        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1540
1541        len = mm->arg_end - mm->arg_start;
1542        if (len >= ELF_PRARGSZ)
1543                len = ELF_PRARGSZ-1;
1544        if (copy_from_user(&psinfo->pr_psargs,
1545                           (const char __user *)mm->arg_start, len))
1546                return -EFAULT;
1547        for(i = 0; i < len; i++)
1548                if (psinfo->pr_psargs[i] == 0)
1549                        psinfo->pr_psargs[i] = ' ';
1550        psinfo->pr_psargs[len] = 0;
1551
1552        rcu_read_lock();
1553        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1554        rcu_read_unlock();
1555        psinfo->pr_pid = task_pid_vnr(p);
1556        psinfo->pr_pgrp = task_pgrp_vnr(p);
1557        psinfo->pr_sid = task_session_vnr(p);
1558
1559        i = p->state ? ffz(~p->state) + 1 : 0;
1560        psinfo->pr_state = i;
1561        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1562        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1563        psinfo->pr_nice = task_nice(p);
1564        psinfo->pr_flag = p->flags;
1565        rcu_read_lock();
1566        cred = __task_cred(p);
1567        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1568        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1569        rcu_read_unlock();
1570        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1571        
1572        return 0;
1573}
1574
1575static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1576{
1577        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1578        int i = 0;
1579        do
1580                i += 2;
1581        while (auxv[i - 2] != AT_NULL);
1582        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1583}
1584
1585static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1586                const kernel_siginfo_t *siginfo)
1587{
1588        mm_segment_t old_fs = get_fs();
1589        set_fs(KERNEL_DS);
1590        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1591        set_fs(old_fs);
1592        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1593}
1594
1595#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1596/*
1597 * Format of NT_FILE note:
1598 *
1599 * long count     -- how many files are mapped
1600 * long page_size -- units for file_ofs
1601 * array of [COUNT] elements of
1602 *   long start
1603 *   long end
1604 *   long file_ofs
1605 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1606 */
1607static int fill_files_note(struct memelfnote *note)
1608{
1609        struct vm_area_struct *vma;
1610        unsigned count, size, names_ofs, remaining, n;
1611        user_long_t *data;
1612        user_long_t *start_end_ofs;
1613        char *name_base, *name_curpos;
1614
1615        /* *Estimated* file count and total data size needed */
1616        count = current->mm->map_count;
1617        if (count > UINT_MAX / 64)
1618                return -EINVAL;
1619        size = count * 64;
1620
1621        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1622 alloc:
1623        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1624                return -EINVAL;
1625        size = round_up(size, PAGE_SIZE);
1626        data = kvmalloc(size, GFP_KERNEL);
1627        if (ZERO_OR_NULL_PTR(data))
1628                return -ENOMEM;
1629
1630        start_end_ofs = data + 2;
1631        name_base = name_curpos = ((char *)data) + names_ofs;
1632        remaining = size - names_ofs;
1633        count = 0;
1634        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1635                struct file *file;
1636                const char *filename;
1637
1638                file = vma->vm_file;
1639                if (!file)
1640                        continue;
1641                filename = file_path(file, name_curpos, remaining);
1642                if (IS_ERR(filename)) {
1643                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1644                                kvfree(data);
1645                                size = size * 5 / 4;
1646                                goto alloc;
1647                        }
1648                        continue;
1649                }
1650
1651                /* file_path() fills at the end, move name down */
1652                /* n = strlen(filename) + 1: */
1653                n = (name_curpos + remaining) - filename;
1654                remaining = filename - name_curpos;
1655                memmove(name_curpos, filename, n);
1656                name_curpos += n;
1657
1658                *start_end_ofs++ = vma->vm_start;
1659                *start_end_ofs++ = vma->vm_end;
1660                *start_end_ofs++ = vma->vm_pgoff;
1661                count++;
1662        }
1663
1664        /* Now we know exact count of files, can store it */
1665        data[0] = count;
1666        data[1] = PAGE_SIZE;
1667        /*
1668         * Count usually is less than current->mm->map_count,
1669         * we need to move filenames down.
1670         */
1671        n = current->mm->map_count - count;
1672        if (n != 0) {
1673                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1674                memmove(name_base - shift_bytes, name_base,
1675                        name_curpos - name_base);
1676                name_curpos -= shift_bytes;
1677        }
1678
1679        size = name_curpos - (char *)data;
1680        fill_note(note, "CORE", NT_FILE, size, data);
1681        return 0;
1682}
1683
1684#ifdef CORE_DUMP_USE_REGSET
1685#include <linux/regset.h>
1686
1687struct elf_thread_core_info {
1688        struct elf_thread_core_info *next;
1689        struct task_struct *task;
1690        struct elf_prstatus prstatus;
1691        struct memelfnote notes[0];
1692};
1693
1694struct elf_note_info {
1695        struct elf_thread_core_info *thread;
1696        struct memelfnote psinfo;
1697        struct memelfnote signote;
1698        struct memelfnote auxv;
1699        struct memelfnote files;
1700        user_siginfo_t csigdata;
1701        size_t size;
1702        int thread_notes;
1703};
1704
1705/*
1706 * When a regset has a writeback hook, we call it on each thread before
1707 * dumping user memory.  On register window machines, this makes sure the
1708 * user memory backing the register data is up to date before we read it.
1709 */
1710static void do_thread_regset_writeback(struct task_struct *task,
1711                                       const struct user_regset *regset)
1712{
1713        if (regset->writeback)
1714                regset->writeback(task, regset, 1);
1715}
1716
1717#ifndef PRSTATUS_SIZE
1718#define PRSTATUS_SIZE(S, R) sizeof(S)
1719#endif
1720
1721#ifndef SET_PR_FPVALID
1722#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1723#endif
1724
1725static int fill_thread_core_info(struct elf_thread_core_info *t,
1726                                 const struct user_regset_view *view,
1727                                 long signr, size_t *total)
1728{
1729        unsigned int i;
1730        unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
1731
1732        /*
1733         * NT_PRSTATUS is the one special case, because the regset data
1734         * goes into the pr_reg field inside the note contents, rather
1735         * than being the whole note contents.  We fill the reset in here.
1736         * We assume that regset 0 is NT_PRSTATUS.
1737         */
1738        fill_prstatus(&t->prstatus, t->task, signr);
1739        (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
1740                                    &t->prstatus.pr_reg, NULL);
1741
1742        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1743                  PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
1744        *total += notesize(&t->notes[0]);
1745
1746        do_thread_regset_writeback(t->task, &view->regsets[0]);
1747
1748        /*
1749         * Each other regset might generate a note too.  For each regset
1750         * that has no core_note_type or is inactive, we leave t->notes[i]
1751         * all zero and we'll know to skip writing it later.
1752         */
1753        for (i = 1; i < view->n; ++i) {
1754                const struct user_regset *regset = &view->regsets[i];
1755                do_thread_regset_writeback(t->task, regset);
1756                if (regset->core_note_type && regset->get &&
1757                    (!regset->active || regset->active(t->task, regset) > 0)) {
1758                        int ret;
1759                        size_t size = regset_size(t->task, regset);
1760                        void *data = kmalloc(size, GFP_KERNEL);
1761                        if (unlikely(!data))
1762                                return 0;
1763                        ret = regset->get(t->task, regset,
1764                                          0, size, data, NULL);
1765                        if (unlikely(ret))
1766                                kfree(data);
1767                        else {
1768                                if (regset->core_note_type != NT_PRFPREG)
1769                                        fill_note(&t->notes[i], "LINUX",
1770                                                  regset->core_note_type,
1771                                                  size, data);
1772                                else {
1773                                        SET_PR_FPVALID(&t->prstatus,
1774                                                        1, regset0_size);
1775                                        fill_note(&t->notes[i], "CORE",
1776                                                  NT_PRFPREG, size, data);
1777                                }
1778                                *total += notesize(&t->notes[i]);
1779                        }
1780                }
1781        }
1782
1783        return 1;
1784}
1785
1786static int fill_note_info(struct elfhdr *elf, int phdrs,
1787                          struct elf_note_info *info,
1788                          const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1789{
1790        struct task_struct *dump_task = current;
1791        const struct user_regset_view *view = task_user_regset_view(dump_task);
1792        struct elf_thread_core_info *t;
1793        struct elf_prpsinfo *psinfo;
1794        struct core_thread *ct;
1795        unsigned int i;
1796
1797        info->size = 0;
1798        info->thread = NULL;
1799
1800        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1801        if (psinfo == NULL) {
1802                info->psinfo.data = NULL; /* So we don't free this wrongly */
1803                return 0;
1804        }
1805
1806        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1807
1808        /*
1809         * Figure out how many notes we're going to need for each thread.
1810         */
1811        info->thread_notes = 0;
1812        for (i = 0; i < view->n; ++i)
1813                if (view->regsets[i].core_note_type != 0)
1814                        ++info->thread_notes;
1815
1816        /*
1817         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1818         * since it is our one special case.
1819         */
1820        if (unlikely(info->thread_notes == 0) ||
1821            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1822                WARN_ON(1);
1823                return 0;
1824        }
1825
1826        /*
1827         * Initialize the ELF file header.
1828         */
1829        fill_elf_header(elf, phdrs,
1830                        view->e_machine, view->e_flags);
1831
1832        /*
1833         * Allocate a structure for each thread.
1834         */
1835        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1836                t = kzalloc(offsetof(struct elf_thread_core_info,
1837                                     notes[info->thread_notes]),
1838                            GFP_KERNEL);
1839                if (unlikely(!t))
1840                        return 0;
1841
1842                t->task = ct->task;
1843                if (ct->task == dump_task || !info->thread) {
1844                        t->next = info->thread;
1845                        info->thread = t;
1846                } else {
1847                        /*
1848                         * Make sure to keep the original task at
1849                         * the head of the list.
1850                         */
1851                        t->next = info->thread->next;
1852                        info->thread->next = t;
1853                }
1854        }
1855
1856        /*
1857         * Now fill in each thread's information.
1858         */
1859        for (t = info->thread; t != NULL; t = t->next)
1860                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1861                        return 0;
1862
1863        /*
1864         * Fill in the two process-wide notes.
1865         */
1866        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1867        info->size += notesize(&info->psinfo);
1868
1869        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1870        info->size += notesize(&info->signote);
1871
1872        fill_auxv_note(&info->auxv, current->mm);
1873        info->size += notesize(&info->auxv);
1874
1875        if (fill_files_note(&info->files) == 0)
1876                info->size += notesize(&info->files);
1877
1878        return 1;
1879}
1880
1881static size_t get_note_info_size(struct elf_note_info *info)
1882{
1883        return info->size;
1884}
1885
1886/*
1887 * Write all the notes for each thread.  When writing the first thread, the
1888 * process-wide notes are interleaved after the first thread-specific note.
1889 */
1890static int write_note_info(struct elf_note_info *info,
1891                           struct coredump_params *cprm)
1892{
1893        bool first = true;
1894        struct elf_thread_core_info *t = info->thread;
1895
1896        do {
1897                int i;
1898
1899                if (!writenote(&t->notes[0], cprm))
1900                        return 0;
1901
1902                if (first && !writenote(&info->psinfo, cprm))
1903                        return 0;
1904                if (first && !writenote(&info->signote, cprm))
1905                        return 0;
1906                if (first && !writenote(&info->auxv, cprm))
1907                        return 0;
1908                if (first && info->files.data &&
1909                                !writenote(&info->files, cprm))
1910                        return 0;
1911
1912                for (i = 1; i < info->thread_notes; ++i)
1913                        if (t->notes[i].data &&
1914                            !writenote(&t->notes[i], cprm))
1915                                return 0;
1916
1917                first = false;
1918                t = t->next;
1919        } while (t);
1920
1921        return 1;
1922}
1923
1924static void free_note_info(struct elf_note_info *info)
1925{
1926        struct elf_thread_core_info *threads = info->thread;
1927        while (threads) {
1928                unsigned int i;
1929                struct elf_thread_core_info *t = threads;
1930                threads = t->next;
1931                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1932                for (i = 1; i < info->thread_notes; ++i)
1933                        kfree(t->notes[i].data);
1934                kfree(t);
1935        }
1936        kfree(info->psinfo.data);
1937        kvfree(info->files.data);
1938}
1939
1940#else
1941
1942/* Here is the structure in which status of each thread is captured. */
1943struct elf_thread_status
1944{
1945        struct list_head list;
1946        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1947        elf_fpregset_t fpu;             /* NT_PRFPREG */
1948        struct task_struct *thread;
1949#ifdef ELF_CORE_COPY_XFPREGS
1950        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1951#endif
1952        struct memelfnote notes[3];
1953        int num_notes;
1954};
1955
1956/*
1957 * In order to add the specific thread information for the elf file format,
1958 * we need to keep a linked list of every threads pr_status and then create
1959 * a single section for them in the final core file.
1960 */
1961static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1962{
1963        int sz = 0;
1964        struct task_struct *p = t->thread;
1965        t->num_notes = 0;
1966
1967        fill_prstatus(&t->prstatus, p, signr);
1968        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1969        
1970        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1971                  &(t->prstatus));
1972        t->num_notes++;
1973        sz += notesize(&t->notes[0]);
1974
1975        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1976                                                                &t->fpu))) {
1977                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1978                          &(t->fpu));
1979                t->num_notes++;
1980                sz += notesize(&t->notes[1]);
1981        }
1982
1983#ifdef ELF_CORE_COPY_XFPREGS
1984        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1985                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1986                          sizeof(t->xfpu), &t->xfpu);
1987                t->num_notes++;
1988                sz += notesize(&t->notes[2]);
1989        }
1990#endif  
1991        return sz;
1992}
1993
1994struct elf_note_info {
1995        struct memelfnote *notes;
1996        struct memelfnote *notes_files;
1997        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1998        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1999        struct list_head thread_list;
2000        elf_fpregset_t *fpu;
2001#ifdef ELF_CORE_COPY_XFPREGS
2002        elf_fpxregset_t *xfpu;
2003#endif
2004        user_siginfo_t csigdata;
2005        int thread_status_size;
2006        int numnote;
2007};
2008
2009static int elf_note_info_init(struct elf_note_info *info)
2010{
2011        memset(info, 0, sizeof(*info));
2012        INIT_LIST_HEAD(&info->thread_list);
2013
2014        /* Allocate space for ELF notes */
2015        info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
2016        if (!info->notes)
2017                return 0;
2018        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2019        if (!info->psinfo)
2020                return 0;
2021        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2022        if (!info->prstatus)
2023                return 0;
2024        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2025        if (!info->fpu)
2026                return 0;
2027#ifdef ELF_CORE_COPY_XFPREGS
2028        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
2029        if (!info->xfpu)
2030                return 0;
2031#endif
2032        return 1;
2033}
2034
2035static int fill_note_info(struct elfhdr *elf, int phdrs,
2036                          struct elf_note_info *info,
2037                          const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2038{
2039        struct core_thread *ct;
2040        struct elf_thread_status *ets;
2041
2042        if (!elf_note_info_init(info))
2043                return 0;
2044
2045        for (ct = current->mm->core_state->dumper.next;
2046                                        ct; ct = ct->next) {
2047                ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2048                if (!ets)
2049                        return 0;
2050
2051                ets->thread = ct->task;
2052                list_add(&ets->list, &info->thread_list);
2053        }
2054
2055        list_for_each_entry(ets, &info->thread_list, list) {
2056                int sz;
2057
2058                sz = elf_dump_thread_status(siginfo->si_signo, ets);
2059                info->thread_status_size += sz;
2060        }
2061        /* now collect the dump for the current */
2062        memset(info->prstatus, 0, sizeof(*info->prstatus));
2063        fill_prstatus(info->prstatus, current, siginfo->si_signo);
2064        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2065
2066        /* Set up header */
2067        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2068
2069        /*
2070         * Set up the notes in similar form to SVR4 core dumps made
2071         * with info from their /proc.
2072         */
2073
2074        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2075                  sizeof(*info->prstatus), info->prstatus);
2076        fill_psinfo(info->psinfo, current->group_leader, current->mm);
2077        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2078                  sizeof(*info->psinfo), info->psinfo);
2079
2080        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2081        fill_auxv_note(info->notes + 3, current->mm);
2082        info->numnote = 4;
2083
2084        if (fill_files_note(info->notes + info->numnote) == 0) {
2085                info->notes_files = info->notes + info->numnote;
2086                info->numnote++;
2087        }
2088
2089        /* Try to dump the FPU. */
2090        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2091                                                               info->fpu);
2092        if (info->prstatus->pr_fpvalid)
2093                fill_note(info->notes + info->numnote++,
2094                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2095#ifdef ELF_CORE_COPY_XFPREGS
2096        if (elf_core_copy_task_xfpregs(current, info->xfpu))
2097                fill_note(info->notes + info->numnote++,
2098                          "LINUX", ELF_CORE_XFPREG_TYPE,
2099                          sizeof(*info->xfpu), info->xfpu);
2100#endif
2101
2102        return 1;
2103}
2104
2105static size_t get_note_info_size(struct elf_note_info *info)
2106{
2107        int sz = 0;
2108        int i;
2109
2110        for (i = 0; i < info->numnote; i++)
2111                sz += notesize(info->notes + i);
2112
2113        sz += info->thread_status_size;
2114
2115        return sz;
2116}
2117
2118static int write_note_info(struct elf_note_info *info,
2119                           struct coredump_params *cprm)
2120{
2121        struct elf_thread_status *ets;
2122        int i;
2123
2124        for (i = 0; i < info->numnote; i++)
2125                if (!writenote(info->notes + i, cprm))
2126                        return 0;
2127
2128        /* write out the thread status notes section */
2129        list_for_each_entry(ets, &info->thread_list, list) {
2130                for (i = 0; i < ets->num_notes; i++)
2131                        if (!writenote(&ets->notes[i], cprm))
2132                                return 0;
2133        }
2134
2135        return 1;
2136}
2137
2138static void free_note_info(struct elf_note_info *info)
2139{
2140        while (!list_empty(&info->thread_list)) {
2141                struct list_head *tmp = info->thread_list.next;
2142                list_del(tmp);
2143                kfree(list_entry(tmp, struct elf_thread_status, list));
2144        }
2145
2146        /* Free data possibly allocated by fill_files_note(): */
2147        if (info->notes_files)
2148                kvfree(info->notes_files->data);
2149
2150        kfree(info->prstatus);
2151        kfree(info->psinfo);
2152        kfree(info->notes);
2153        kfree(info->fpu);
2154#ifdef ELF_CORE_COPY_XFPREGS
2155        kfree(info->xfpu);
2156#endif
2157}
2158
2159#endif
2160
2161static struct vm_area_struct *first_vma(struct task_struct *tsk,
2162                                        struct vm_area_struct *gate_vma)
2163{
2164        struct vm_area_struct *ret = tsk->mm->mmap;
2165
2166        if (ret)
2167                return ret;
2168        return gate_vma;
2169}
2170/*
2171 * Helper function for iterating across a vma list.  It ensures that the caller
2172 * will visit `gate_vma' prior to terminating the search.
2173 */
2174static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2175                                        struct vm_area_struct *gate_vma)
2176{
2177        struct vm_area_struct *ret;
2178
2179        ret = this_vma->vm_next;
2180        if (ret)
2181                return ret;
2182        if (this_vma == gate_vma)
2183                return NULL;
2184        return gate_vma;
2185}
2186
2187static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2188                             elf_addr_t e_shoff, int segs)
2189{
2190        elf->e_shoff = e_shoff;
2191        elf->e_shentsize = sizeof(*shdr4extnum);
2192        elf->e_shnum = 1;
2193        elf->e_shstrndx = SHN_UNDEF;
2194
2195        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2196
2197        shdr4extnum->sh_type = SHT_NULL;
2198        shdr4extnum->sh_size = elf->e_shnum;
2199        shdr4extnum->sh_link = elf->e_shstrndx;
2200        shdr4extnum->sh_info = segs;
2201}
2202
2203/*
2204 * Actual dumper
2205 *
2206 * This is a two-pass process; first we find the offsets of the bits,
2207 * and then they are actually written out.  If we run out of core limit
2208 * we just truncate.
2209 */
2210static int elf_core_dump(struct coredump_params *cprm)
2211{
2212        int has_dumped = 0;
2213        mm_segment_t fs;
2214        int segs, i;
2215        size_t vma_data_size = 0;
2216        struct vm_area_struct *vma, *gate_vma;
2217        struct elfhdr *elf = NULL;
2218        loff_t offset = 0, dataoff;
2219        struct elf_note_info info = { };
2220        struct elf_phdr *phdr4note = NULL;
2221        struct elf_shdr *shdr4extnum = NULL;
2222        Elf_Half e_phnum;
2223        elf_addr_t e_shoff;
2224        elf_addr_t *vma_filesz = NULL;
2225
2226        /*
2227         * We no longer stop all VM operations.
2228         * 
2229         * This is because those proceses that could possibly change map_count
2230         * or the mmap / vma pages are now blocked in do_exit on current
2231         * finishing this core dump.
2232         *
2233         * Only ptrace can touch these memory addresses, but it doesn't change
2234         * the map_count or the pages allocated. So no possibility of crashing
2235         * exists while dumping the mm->vm_next areas to the core file.
2236         */
2237  
2238        /* alloc memory for large data structures: too large to be on stack */
2239        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2240        if (!elf)
2241                goto out;
2242        /*
2243         * The number of segs are recored into ELF header as 16bit value.
2244         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2245         */
2246        segs = current->mm->map_count;
2247        segs += elf_core_extra_phdrs();
2248
2249        gate_vma = get_gate_vma(current->mm);
2250        if (gate_vma != NULL)
2251                segs++;
2252
2253        /* for notes section */
2254        segs++;
2255
2256        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2257         * this, kernel supports extended numbering. Have a look at
2258         * include/linux/elf.h for further information. */
2259        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2260
2261        /*
2262         * Collect all the non-memory information about the process for the
2263         * notes.  This also sets up the file header.
2264         */
2265        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2266                goto cleanup;
2267
2268        has_dumped = 1;
2269
2270        fs = get_fs();
2271        set_fs(KERNEL_DS);
2272
2273        offset += sizeof(*elf);                         /* Elf header */
2274        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2275
2276        /* Write notes phdr entry */
2277        {
2278                size_t sz = get_note_info_size(&info);
2279
2280                sz += elf_coredump_extra_notes_size();
2281
2282                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2283                if (!phdr4note)
2284                        goto end_coredump;
2285
2286                fill_elf_note_phdr(phdr4note, sz, offset);
2287                offset += sz;
2288        }
2289
2290        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2291
2292        if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2293                goto end_coredump;
2294        vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
2295                              GFP_KERNEL);
2296        if (ZERO_OR_NULL_PTR(vma_filesz))
2297                goto end_coredump;
2298
2299        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2300                        vma = next_vma(vma, gate_vma)) {
2301                unsigned long dump_size;
2302
2303                dump_size = vma_dump_size(vma, cprm->mm_flags);
2304                vma_filesz[i++] = dump_size;
2305                vma_data_size += dump_size;
2306        }
2307
2308        offset += vma_data_size;
2309        offset += elf_core_extra_data_size();
2310        e_shoff = offset;
2311
2312        if (e_phnum == PN_XNUM) {
2313                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2314                if (!shdr4extnum)
2315                        goto end_coredump;
2316                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2317        }
2318
2319        offset = dataoff;
2320
2321        if (!dump_emit(cprm, elf, sizeof(*elf)))
2322                goto end_coredump;
2323
2324        if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2325                goto end_coredump;
2326
2327        /* Write program headers for segments dump */
2328        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2329                        vma = next_vma(vma, gate_vma)) {
2330                struct elf_phdr phdr;
2331
2332                phdr.p_type = PT_LOAD;
2333                phdr.p_offset = offset;
2334                phdr.p_vaddr = vma->vm_start;
2335                phdr.p_paddr = 0;
2336                phdr.p_filesz = vma_filesz[i++];
2337                phdr.p_memsz = vma->vm_end - vma->vm_start;
2338                offset += phdr.p_filesz;
2339                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2340                if (vma->vm_flags & VM_WRITE)
2341                        phdr.p_flags |= PF_W;
2342                if (vma->vm_flags & VM_EXEC)
2343                        phdr.p_flags |= PF_X;
2344                phdr.p_align = ELF_EXEC_PAGESIZE;
2345
2346                if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2347                        goto end_coredump;
2348        }
2349
2350        if (!elf_core_write_extra_phdrs(cprm, offset))
2351                goto end_coredump;
2352
2353        /* write out the notes section */
2354        if (!write_note_info(&info, cprm))
2355                goto end_coredump;
2356
2357        if (elf_coredump_extra_notes_write(cprm))
2358                goto end_coredump;
2359
2360        /* Align to page */
2361        if (!dump_skip(cprm, dataoff - cprm->pos))
2362                goto end_coredump;
2363
2364        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2365                        vma = next_vma(vma, gate_vma)) {
2366                unsigned long addr;
2367                unsigned long end;
2368
2369                end = vma->vm_start + vma_filesz[i++];
2370
2371                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2372                        struct page *page;
2373                        int stop;
2374
2375                        page = get_dump_page(addr);
2376                        if (page) {
2377                                void *kaddr = kmap(page);
2378                                stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2379                                kunmap(page);
2380                                put_page(page);
2381                        } else
2382                                stop = !dump_skip(cprm, PAGE_SIZE);
2383                        if (stop)
2384                                goto end_coredump;
2385                }
2386        }
2387        dump_truncate(cprm);
2388
2389        if (!elf_core_write_extra_data(cprm))
2390                goto end_coredump;
2391
2392        if (e_phnum == PN_XNUM) {
2393                if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2394                        goto end_coredump;
2395        }
2396
2397end_coredump:
2398        set_fs(fs);
2399
2400cleanup:
2401        free_note_info(&info);
2402        kfree(shdr4extnum);
2403        kvfree(vma_filesz);
2404        kfree(phdr4note);
2405        kfree(elf);
2406out:
2407        return has_dumped;
2408}
2409
2410#endif          /* CONFIG_ELF_CORE */
2411
2412static int __init init_elf_binfmt(void)
2413{
2414        register_binfmt(&elf_format);
2415        return 0;
2416}
2417
2418static void __exit exit_elf_binfmt(void)
2419{
2420        /* Remove the COFF and ELF loaders. */
2421        unregister_binfmt(&elf_format);
2422}
2423
2424core_initcall(init_elf_binfmt);
2425module_exit(exit_elf_binfmt);
2426MODULE_LICENSE("GPL");
2427