linux/fs/binfmt_elf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * linux/fs/binfmt_elf.c
   4 *
   5 * These are the functions used to load ELF format executables as used
   6 * on SVr4 machines.  Information on the format may be found in the book
   7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   8 * Tools".
   9 *
  10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/kernel.h>
  15#include <linux/fs.h>
  16#include <linux/mm.h>
  17#include <linux/mman.h>
  18#include <linux/errno.h>
  19#include <linux/signal.h>
  20#include <linux/binfmts.h>
  21#include <linux/string.h>
  22#include <linux/file.h>
  23#include <linux/slab.h>
  24#include <linux/personality.h>
  25#include <linux/elfcore.h>
  26#include <linux/init.h>
  27#include <linux/highuid.h>
  28#include <linux/compiler.h>
  29#include <linux/highmem.h>
  30#include <linux/pagemap.h>
  31#include <linux/vmalloc.h>
  32#include <linux/security.h>
  33#include <linux/random.h>
  34#include <linux/elf.h>
  35#include <linux/elf-randomize.h>
  36#include <linux/utsname.h>
  37#include <linux/coredump.h>
  38#include <linux/sched.h>
  39#include <linux/sched/coredump.h>
  40#include <linux/sched/task_stack.h>
  41#include <linux/sched/cputime.h>
  42#include <linux/cred.h>
  43#include <linux/dax.h>
  44#include <linux/uaccess.h>
  45#include <asm/param.h>
  46#include <asm/page.h>
  47
  48#ifndef user_long_t
  49#define user_long_t long
  50#endif
  51#ifndef user_siginfo_t
  52#define user_siginfo_t siginfo_t
  53#endif
  54
  55/* That's for binfmt_elf_fdpic to deal with */
  56#ifndef elf_check_fdpic
  57#define elf_check_fdpic(ex) false
  58#endif
  59
  60static int load_elf_binary(struct linux_binprm *bprm);
  61
  62#ifdef CONFIG_USELIB
  63static int load_elf_library(struct file *);
  64#else
  65#define load_elf_library NULL
  66#endif
  67
  68/*
  69 * If we don't support core dumping, then supply a NULL so we
  70 * don't even try.
  71 */
  72#ifdef CONFIG_ELF_CORE
  73static int elf_core_dump(struct coredump_params *cprm);
  74#else
  75#define elf_core_dump   NULL
  76#endif
  77
  78#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  79#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  80#else
  81#define ELF_MIN_ALIGN   PAGE_SIZE
  82#endif
  83
  84#ifndef ELF_CORE_EFLAGS
  85#define ELF_CORE_EFLAGS 0
  86#endif
  87
  88#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  89#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  90#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  91
  92static struct linux_binfmt elf_format = {
  93        .module         = THIS_MODULE,
  94        .load_binary    = load_elf_binary,
  95        .load_shlib     = load_elf_library,
  96        .core_dump      = elf_core_dump,
  97        .min_coredump   = ELF_EXEC_PAGESIZE,
  98};
  99
 100#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
 101
 102static int set_brk(unsigned long start, unsigned long end, int prot)
 103{
 104        start = ELF_PAGEALIGN(start);
 105        end = ELF_PAGEALIGN(end);
 106        if (end > start) {
 107                /*
 108                 * Map the last of the bss segment.
 109                 * If the header is requesting these pages to be
 110                 * executable, honour that (ppc32 needs this).
 111                 */
 112                int error = vm_brk_flags(start, end - start,
 113                                prot & PROT_EXEC ? VM_EXEC : 0);
 114                if (error)
 115                        return error;
 116        }
 117        current->mm->start_brk = current->mm->brk = end;
 118        return 0;
 119}
 120
 121/* We need to explicitly zero any fractional pages
 122   after the data section (i.e. bss).  This would
 123   contain the junk from the file that should not
 124   be in memory
 125 */
 126static int padzero(unsigned long elf_bss)
 127{
 128        unsigned long nbyte;
 129
 130        nbyte = ELF_PAGEOFFSET(elf_bss);
 131        if (nbyte) {
 132                nbyte = ELF_MIN_ALIGN - nbyte;
 133                if (clear_user((void __user *) elf_bss, nbyte))
 134                        return -EFAULT;
 135        }
 136        return 0;
 137}
 138
 139/* Let's use some macros to make this stack manipulation a little clearer */
 140#ifdef CONFIG_STACK_GROWSUP
 141#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 142#define STACK_ROUND(sp, items) \
 143        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 144#define STACK_ALLOC(sp, len) ({ \
 145        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 146        old_sp; })
 147#else
 148#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 149#define STACK_ROUND(sp, items) \
 150        (((unsigned long) (sp - items)) &~ 15UL)
 151#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 152#endif
 153
 154#ifndef ELF_BASE_PLATFORM
 155/*
 156 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 157 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 158 * will be copied to the user stack in the same manner as AT_PLATFORM.
 159 */
 160#define ELF_BASE_PLATFORM NULL
 161#endif
 162
 163static int
 164create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 165                unsigned long load_addr, unsigned long interp_load_addr)
 166{
 167        unsigned long p = bprm->p;
 168        int argc = bprm->argc;
 169        int envc = bprm->envc;
 170        elf_addr_t __user *sp;
 171        elf_addr_t __user *u_platform;
 172        elf_addr_t __user *u_base_platform;
 173        elf_addr_t __user *u_rand_bytes;
 174        const char *k_platform = ELF_PLATFORM;
 175        const char *k_base_platform = ELF_BASE_PLATFORM;
 176        unsigned char k_rand_bytes[16];
 177        int items;
 178        elf_addr_t *elf_info;
 179        int ei_index = 0;
 180        const struct cred *cred = current_cred();
 181        struct vm_area_struct *vma;
 182
 183        /*
 184         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 185         * evictions by the processes running on the same package. One
 186         * thing we can do is to shuffle the initial stack for them.
 187         */
 188
 189        p = arch_align_stack(p);
 190
 191        /*
 192         * If this architecture has a platform capability string, copy it
 193         * to userspace.  In some cases (Sparc), this info is impossible
 194         * for userspace to get any other way, in others (i386) it is
 195         * merely difficult.
 196         */
 197        u_platform = NULL;
 198        if (k_platform) {
 199                size_t len = strlen(k_platform) + 1;
 200
 201                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 202                if (__copy_to_user(u_platform, k_platform, len))
 203                        return -EFAULT;
 204        }
 205
 206        /*
 207         * If this architecture has a "base" platform capability
 208         * string, copy it to userspace.
 209         */
 210        u_base_platform = NULL;
 211        if (k_base_platform) {
 212                size_t len = strlen(k_base_platform) + 1;
 213
 214                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 215                if (__copy_to_user(u_base_platform, k_base_platform, len))
 216                        return -EFAULT;
 217        }
 218
 219        /*
 220         * Generate 16 random bytes for userspace PRNG seeding.
 221         */
 222        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 223        u_rand_bytes = (elf_addr_t __user *)
 224                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 225        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 226                return -EFAULT;
 227
 228        /* Create the ELF interpreter info */
 229        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 230        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 231#define NEW_AUX_ENT(id, val) \
 232        do { \
 233                elf_info[ei_index++] = id; \
 234                elf_info[ei_index++] = val; \
 235        } while (0)
 236
 237#ifdef ARCH_DLINFO
 238        /* 
 239         * ARCH_DLINFO must come first so PPC can do its special alignment of
 240         * AUXV.
 241         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 242         * ARCH_DLINFO changes
 243         */
 244        ARCH_DLINFO;
 245#endif
 246        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 247        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 248        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 249        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 250        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 251        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 252        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 253        NEW_AUX_ENT(AT_FLAGS, 0);
 254        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 255        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 256        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 257        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 258        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 259        NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
 260        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 261#ifdef ELF_HWCAP2
 262        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 263#endif
 264        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 265        if (k_platform) {
 266                NEW_AUX_ENT(AT_PLATFORM,
 267                            (elf_addr_t)(unsigned long)u_platform);
 268        }
 269        if (k_base_platform) {
 270                NEW_AUX_ENT(AT_BASE_PLATFORM,
 271                            (elf_addr_t)(unsigned long)u_base_platform);
 272        }
 273        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 274                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 275        }
 276#undef NEW_AUX_ENT
 277        /* AT_NULL is zero; clear the rest too */
 278        memset(&elf_info[ei_index], 0,
 279               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 280
 281        /* And advance past the AT_NULL entry.  */
 282        ei_index += 2;
 283
 284        sp = STACK_ADD(p, ei_index);
 285
 286        items = (argc + 1) + (envc + 1) + 1;
 287        bprm->p = STACK_ROUND(sp, items);
 288
 289        /* Point sp at the lowest address on the stack */
 290#ifdef CONFIG_STACK_GROWSUP
 291        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 292        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 293#else
 294        sp = (elf_addr_t __user *)bprm->p;
 295#endif
 296
 297
 298        /*
 299         * Grow the stack manually; some architectures have a limit on how
 300         * far ahead a user-space access may be in order to grow the stack.
 301         */
 302        vma = find_extend_vma(current->mm, bprm->p);
 303        if (!vma)
 304                return -EFAULT;
 305
 306        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 307        if (__put_user(argc, sp++))
 308                return -EFAULT;
 309
 310        /* Populate list of argv pointers back to argv strings. */
 311        p = current->mm->arg_end = current->mm->arg_start;
 312        while (argc-- > 0) {
 313                size_t len;
 314                if (__put_user((elf_addr_t)p, sp++))
 315                        return -EFAULT;
 316                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 317                if (!len || len > MAX_ARG_STRLEN)
 318                        return -EINVAL;
 319                p += len;
 320        }
 321        if (__put_user(0, sp++))
 322                return -EFAULT;
 323        current->mm->arg_end = p;
 324
 325        /* Populate list of envp pointers back to envp strings. */
 326        current->mm->env_end = current->mm->env_start = p;
 327        while (envc-- > 0) {
 328                size_t len;
 329                if (__put_user((elf_addr_t)p, sp++))
 330                        return -EFAULT;
 331                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 332                if (!len || len > MAX_ARG_STRLEN)
 333                        return -EINVAL;
 334                p += len;
 335        }
 336        if (__put_user(0, sp++))
 337                return -EFAULT;
 338        current->mm->env_end = p;
 339
 340        /* Put the elf_info on the stack in the right place.  */
 341        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 342                return -EFAULT;
 343        return 0;
 344}
 345
 346#ifndef elf_map
 347
 348static unsigned long elf_map(struct file *filep, unsigned long addr,
 349                const struct elf_phdr *eppnt, int prot, int type,
 350                unsigned long total_size)
 351{
 352        unsigned long map_addr;
 353        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 354        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 355        addr = ELF_PAGESTART(addr);
 356        size = ELF_PAGEALIGN(size);
 357
 358        /* mmap() will return -EINVAL if given a zero size, but a
 359         * segment with zero filesize is perfectly valid */
 360        if (!size)
 361                return addr;
 362
 363        /*
 364        * total_size is the size of the ELF (interpreter) image.
 365        * The _first_ mmap needs to know the full size, otherwise
 366        * randomization might put this image into an overlapping
 367        * position with the ELF binary image. (since size < total_size)
 368        * So we first map the 'big' image - and unmap the remainder at
 369        * the end. (which unmap is needed for ELF images with holes.)
 370        */
 371        if (total_size) {
 372                total_size = ELF_PAGEALIGN(total_size);
 373                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 374                if (!BAD_ADDR(map_addr))
 375                        vm_munmap(map_addr+size, total_size-size);
 376        } else
 377                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 378
 379        if ((type & MAP_FIXED_NOREPLACE) &&
 380            PTR_ERR((void *)map_addr) == -EEXIST)
 381                pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
 382                        task_pid_nr(current), current->comm, (void *)addr);
 383
 384        return(map_addr);
 385}
 386
 387#endif /* !elf_map */
 388
 389static unsigned long total_mapping_size(const struct elf_phdr *cmds, int nr)
 390{
 391        int i, first_idx = -1, last_idx = -1;
 392
 393        for (i = 0; i < nr; i++) {
 394                if (cmds[i].p_type == PT_LOAD) {
 395                        last_idx = i;
 396                        if (first_idx == -1)
 397                                first_idx = i;
 398                }
 399        }
 400        if (first_idx == -1)
 401                return 0;
 402
 403        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 404                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 405}
 406
 407/**
 408 * load_elf_phdrs() - load ELF program headers
 409 * @elf_ex:   ELF header of the binary whose program headers should be loaded
 410 * @elf_file: the opened ELF binary file
 411 *
 412 * Loads ELF program headers from the binary file elf_file, which has the ELF
 413 * header pointed to by elf_ex, into a newly allocated array. The caller is
 414 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
 415 */
 416static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
 417                                       struct file *elf_file)
 418{
 419        struct elf_phdr *elf_phdata = NULL;
 420        int retval, err = -1;
 421        loff_t pos = elf_ex->e_phoff;
 422        unsigned int size;
 423
 424        /*
 425         * If the size of this structure has changed, then punt, since
 426         * we will be doing the wrong thing.
 427         */
 428        if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
 429                goto out;
 430
 431        /* Sanity check the number of program headers... */
 432        /* ...and their total size. */
 433        size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
 434        if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
 435                goto out;
 436
 437        elf_phdata = kmalloc(size, GFP_KERNEL);
 438        if (!elf_phdata)
 439                goto out;
 440
 441        /* Read in the program headers */
 442        retval = kernel_read(elf_file, elf_phdata, size, &pos);
 443        if (retval != size) {
 444                err = (retval < 0) ? retval : -EIO;
 445                goto out;
 446        }
 447
 448        /* Success! */
 449        err = 0;
 450out:
 451        if (err) {
 452                kfree(elf_phdata);
 453                elf_phdata = NULL;
 454        }
 455        return elf_phdata;
 456}
 457
 458#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
 459
 460/**
 461 * struct arch_elf_state - arch-specific ELF loading state
 462 *
 463 * This structure is used to preserve architecture specific data during
 464 * the loading of an ELF file, throughout the checking of architecture
 465 * specific ELF headers & through to the point where the ELF load is
 466 * known to be proceeding (ie. SET_PERSONALITY).
 467 *
 468 * This implementation is a dummy for architectures which require no
 469 * specific state.
 470 */
 471struct arch_elf_state {
 472};
 473
 474#define INIT_ARCH_ELF_STATE {}
 475
 476/**
 477 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
 478 * @ehdr:       The main ELF header
 479 * @phdr:       The program header to check
 480 * @elf:        The open ELF file
 481 * @is_interp:  True if the phdr is from the interpreter of the ELF being
 482 *              loaded, else false.
 483 * @state:      Architecture-specific state preserved throughout the process
 484 *              of loading the ELF.
 485 *
 486 * Inspects the program header phdr to validate its correctness and/or
 487 * suitability for the system. Called once per ELF program header in the
 488 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
 489 * interpreter.
 490 *
 491 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 492 *         with that return code.
 493 */
 494static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
 495                                   struct elf_phdr *phdr,
 496                                   struct file *elf, bool is_interp,
 497                                   struct arch_elf_state *state)
 498{
 499        /* Dummy implementation, always proceed */
 500        return 0;
 501}
 502
 503/**
 504 * arch_check_elf() - check an ELF executable
 505 * @ehdr:       The main ELF header
 506 * @has_interp: True if the ELF has an interpreter, else false.
 507 * @interp_ehdr: The interpreter's ELF header
 508 * @state:      Architecture-specific state preserved throughout the process
 509 *              of loading the ELF.
 510 *
 511 * Provides a final opportunity for architecture code to reject the loading
 512 * of the ELF & cause an exec syscall to return an error. This is called after
 513 * all program headers to be checked by arch_elf_pt_proc have been.
 514 *
 515 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 516 *         with that return code.
 517 */
 518static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
 519                                 struct elfhdr *interp_ehdr,
 520                                 struct arch_elf_state *state)
 521{
 522        /* Dummy implementation, always proceed */
 523        return 0;
 524}
 525
 526#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
 527
 528static inline int make_prot(u32 p_flags)
 529{
 530        int prot = 0;
 531
 532        if (p_flags & PF_R)
 533                prot |= PROT_READ;
 534        if (p_flags & PF_W)
 535                prot |= PROT_WRITE;
 536        if (p_flags & PF_X)
 537                prot |= PROT_EXEC;
 538        return prot;
 539}
 540
 541/* This is much more generalized than the library routine read function,
 542   so we keep this separate.  Technically the library read function
 543   is only provided so that we can read a.out libraries that have
 544   an ELF header */
 545
 546static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 547                struct file *interpreter, unsigned long *interp_map_addr,
 548                unsigned long no_base, struct elf_phdr *interp_elf_phdata)
 549{
 550        struct elf_phdr *eppnt;
 551        unsigned long load_addr = 0;
 552        int load_addr_set = 0;
 553        unsigned long last_bss = 0, elf_bss = 0;
 554        int bss_prot = 0;
 555        unsigned long error = ~0UL;
 556        unsigned long total_size;
 557        int i;
 558
 559        /* First of all, some simple consistency checks */
 560        if (interp_elf_ex->e_type != ET_EXEC &&
 561            interp_elf_ex->e_type != ET_DYN)
 562                goto out;
 563        if (!elf_check_arch(interp_elf_ex) ||
 564            elf_check_fdpic(interp_elf_ex))
 565                goto out;
 566        if (!interpreter->f_op->mmap)
 567                goto out;
 568
 569        total_size = total_mapping_size(interp_elf_phdata,
 570                                        interp_elf_ex->e_phnum);
 571        if (!total_size) {
 572                error = -EINVAL;
 573                goto out;
 574        }
 575
 576        eppnt = interp_elf_phdata;
 577        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 578                if (eppnt->p_type == PT_LOAD) {
 579                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 580                        int elf_prot = make_prot(eppnt->p_flags);
 581                        unsigned long vaddr = 0;
 582                        unsigned long k, map_addr;
 583
 584                        vaddr = eppnt->p_vaddr;
 585                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 586                                elf_type |= MAP_FIXED_NOREPLACE;
 587                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 588                                load_addr = -vaddr;
 589
 590                        map_addr = elf_map(interpreter, load_addr + vaddr,
 591                                        eppnt, elf_prot, elf_type, total_size);
 592                        total_size = 0;
 593                        if (!*interp_map_addr)
 594                                *interp_map_addr = map_addr;
 595                        error = map_addr;
 596                        if (BAD_ADDR(map_addr))
 597                                goto out;
 598
 599                        if (!load_addr_set &&
 600                            interp_elf_ex->e_type == ET_DYN) {
 601                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 602                                load_addr_set = 1;
 603                        }
 604
 605                        /*
 606                         * Check to see if the section's size will overflow the
 607                         * allowed task size. Note that p_filesz must always be
 608                         * <= p_memsize so it's only necessary to check p_memsz.
 609                         */
 610                        k = load_addr + eppnt->p_vaddr;
 611                        if (BAD_ADDR(k) ||
 612                            eppnt->p_filesz > eppnt->p_memsz ||
 613                            eppnt->p_memsz > TASK_SIZE ||
 614                            TASK_SIZE - eppnt->p_memsz < k) {
 615                                error = -ENOMEM;
 616                                goto out;
 617                        }
 618
 619                        /*
 620                         * Find the end of the file mapping for this phdr, and
 621                         * keep track of the largest address we see for this.
 622                         */
 623                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 624                        if (k > elf_bss)
 625                                elf_bss = k;
 626
 627                        /*
 628                         * Do the same thing for the memory mapping - between
 629                         * elf_bss and last_bss is the bss section.
 630                         */
 631                        k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
 632                        if (k > last_bss) {
 633                                last_bss = k;
 634                                bss_prot = elf_prot;
 635                        }
 636                }
 637        }
 638
 639        /*
 640         * Now fill out the bss section: first pad the last page from
 641         * the file up to the page boundary, and zero it from elf_bss
 642         * up to the end of the page.
 643         */
 644        if (padzero(elf_bss)) {
 645                error = -EFAULT;
 646                goto out;
 647        }
 648        /*
 649         * Next, align both the file and mem bss up to the page size,
 650         * since this is where elf_bss was just zeroed up to, and where
 651         * last_bss will end after the vm_brk_flags() below.
 652         */
 653        elf_bss = ELF_PAGEALIGN(elf_bss);
 654        last_bss = ELF_PAGEALIGN(last_bss);
 655        /* Finally, if there is still more bss to allocate, do it. */
 656        if (last_bss > elf_bss) {
 657                error = vm_brk_flags(elf_bss, last_bss - elf_bss,
 658                                bss_prot & PROT_EXEC ? VM_EXEC : 0);
 659                if (error)
 660                        goto out;
 661        }
 662
 663        error = load_addr;
 664out:
 665        return error;
 666}
 667
 668/*
 669 * These are the functions used to load ELF style executables and shared
 670 * libraries.  There is no binary dependent code anywhere else.
 671 */
 672
 673#ifndef STACK_RND_MASK
 674#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 675#endif
 676
 677static unsigned long randomize_stack_top(unsigned long stack_top)
 678{
 679        unsigned long random_variable = 0;
 680
 681        if (current->flags & PF_RANDOMIZE) {
 682                random_variable = get_random_long();
 683                random_variable &= STACK_RND_MASK;
 684                random_variable <<= PAGE_SHIFT;
 685        }
 686#ifdef CONFIG_STACK_GROWSUP
 687        return PAGE_ALIGN(stack_top) + random_variable;
 688#else
 689        return PAGE_ALIGN(stack_top) - random_variable;
 690#endif
 691}
 692
 693static int load_elf_binary(struct linux_binprm *bprm)
 694{
 695        struct file *interpreter = NULL; /* to shut gcc up */
 696        unsigned long load_addr = 0, load_bias = 0;
 697        int load_addr_set = 0;
 698        unsigned long error;
 699        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
 700        unsigned long elf_bss, elf_brk;
 701        int bss_prot = 0;
 702        int retval, i;
 703        unsigned long elf_entry;
 704        unsigned long interp_load_addr = 0;
 705        unsigned long start_code, end_code, start_data, end_data;
 706        unsigned long reloc_func_desc __maybe_unused = 0;
 707        int executable_stack = EXSTACK_DEFAULT;
 708        struct {
 709                struct elfhdr elf_ex;
 710                struct elfhdr interp_elf_ex;
 711        } *loc;
 712        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
 713        struct pt_regs *regs;
 714
 715        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 716        if (!loc) {
 717                retval = -ENOMEM;
 718                goto out_ret;
 719        }
 720        
 721        /* Get the exec-header */
 722        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 723
 724        retval = -ENOEXEC;
 725        /* First of all, some simple consistency checks */
 726        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 727                goto out;
 728
 729        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 730                goto out;
 731        if (!elf_check_arch(&loc->elf_ex))
 732                goto out;
 733        if (elf_check_fdpic(&loc->elf_ex))
 734                goto out;
 735        if (!bprm->file->f_op->mmap)
 736                goto out;
 737
 738        elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
 739        if (!elf_phdata)
 740                goto out;
 741
 742        elf_ppnt = elf_phdata;
 743        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 744                char *elf_interpreter;
 745                loff_t pos;
 746
 747                if (elf_ppnt->p_type != PT_INTERP)
 748                        continue;
 749
 750                /*
 751                 * This is the program interpreter used for shared libraries -
 752                 * for now assume that this is an a.out format binary.
 753                 */
 754                retval = -ENOEXEC;
 755                if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
 756                        goto out_free_ph;
 757
 758                retval = -ENOMEM;
 759                elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
 760                if (!elf_interpreter)
 761                        goto out_free_ph;
 762
 763                pos = elf_ppnt->p_offset;
 764                retval = kernel_read(bprm->file, elf_interpreter,
 765                                     elf_ppnt->p_filesz, &pos);
 766                if (retval != elf_ppnt->p_filesz) {
 767                        if (retval >= 0)
 768                                retval = -EIO;
 769                        goto out_free_interp;
 770                }
 771                /* make sure path is NULL terminated */
 772                retval = -ENOEXEC;
 773                if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 774                        goto out_free_interp;
 775
 776                interpreter = open_exec(elf_interpreter);
 777                kfree(elf_interpreter);
 778                retval = PTR_ERR(interpreter);
 779                if (IS_ERR(interpreter))
 780                        goto out_free_ph;
 781
 782                /*
 783                 * If the binary is not readable then enforce mm->dumpable = 0
 784                 * regardless of the interpreter's permissions.
 785                 */
 786                would_dump(bprm, interpreter);
 787
 788                /* Get the exec headers */
 789                pos = 0;
 790                retval = kernel_read(interpreter, &loc->interp_elf_ex,
 791                                     sizeof(loc->interp_elf_ex), &pos);
 792                if (retval != sizeof(loc->interp_elf_ex)) {
 793                        if (retval >= 0)
 794                                retval = -EIO;
 795                        goto out_free_dentry;
 796                }
 797
 798                break;
 799
 800out_free_interp:
 801                kfree(elf_interpreter);
 802                goto out_free_ph;
 803        }
 804
 805        elf_ppnt = elf_phdata;
 806        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 807                switch (elf_ppnt->p_type) {
 808                case PT_GNU_STACK:
 809                        if (elf_ppnt->p_flags & PF_X)
 810                                executable_stack = EXSTACK_ENABLE_X;
 811                        else
 812                                executable_stack = EXSTACK_DISABLE_X;
 813                        break;
 814
 815                case PT_LOPROC ... PT_HIPROC:
 816                        retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
 817                                                  bprm->file, false,
 818                                                  &arch_state);
 819                        if (retval)
 820                                goto out_free_dentry;
 821                        break;
 822                }
 823
 824        /* Some simple consistency checks for the interpreter */
 825        if (interpreter) {
 826                retval = -ELIBBAD;
 827                /* Not an ELF interpreter */
 828                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 829                        goto out_free_dentry;
 830                /* Verify the interpreter has a valid arch */
 831                if (!elf_check_arch(&loc->interp_elf_ex) ||
 832                    elf_check_fdpic(&loc->interp_elf_ex))
 833                        goto out_free_dentry;
 834
 835                /* Load the interpreter program headers */
 836                interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
 837                                                   interpreter);
 838                if (!interp_elf_phdata)
 839                        goto out_free_dentry;
 840
 841                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
 842                elf_ppnt = interp_elf_phdata;
 843                for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
 844                        switch (elf_ppnt->p_type) {
 845                        case PT_LOPROC ... PT_HIPROC:
 846                                retval = arch_elf_pt_proc(&loc->interp_elf_ex,
 847                                                          elf_ppnt, interpreter,
 848                                                          true, &arch_state);
 849                                if (retval)
 850                                        goto out_free_dentry;
 851                                break;
 852                        }
 853        }
 854
 855        /*
 856         * Allow arch code to reject the ELF at this point, whilst it's
 857         * still possible to return an error to the code that invoked
 858         * the exec syscall.
 859         */
 860        retval = arch_check_elf(&loc->elf_ex,
 861                                !!interpreter, &loc->interp_elf_ex,
 862                                &arch_state);
 863        if (retval)
 864                goto out_free_dentry;
 865
 866        /* Flush all traces of the currently running executable */
 867        retval = flush_old_exec(bprm);
 868        if (retval)
 869                goto out_free_dentry;
 870
 871        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 872           may depend on the personality.  */
 873        SET_PERSONALITY2(loc->elf_ex, &arch_state);
 874        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 875                current->personality |= READ_IMPLIES_EXEC;
 876
 877        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 878                current->flags |= PF_RANDOMIZE;
 879
 880        setup_new_exec(bprm);
 881        install_exec_creds(bprm);
 882
 883        /* Do this so that we can load the interpreter, if need be.  We will
 884           change some of these later */
 885        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 886                                 executable_stack);
 887        if (retval < 0)
 888                goto out_free_dentry;
 889        
 890        elf_bss = 0;
 891        elf_brk = 0;
 892
 893        start_code = ~0UL;
 894        end_code = 0;
 895        start_data = 0;
 896        end_data = 0;
 897
 898        /* Now we do a little grungy work by mmapping the ELF image into
 899           the correct location in memory. */
 900        for(i = 0, elf_ppnt = elf_phdata;
 901            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 902                int elf_prot, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
 903                unsigned long k, vaddr;
 904                unsigned long total_size = 0;
 905
 906                if (elf_ppnt->p_type != PT_LOAD)
 907                        continue;
 908
 909                if (unlikely (elf_brk > elf_bss)) {
 910                        unsigned long nbyte;
 911                    
 912                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 913                           before this one. Map anonymous pages, if needed,
 914                           and clear the area.  */
 915                        retval = set_brk(elf_bss + load_bias,
 916                                         elf_brk + load_bias,
 917                                         bss_prot);
 918                        if (retval)
 919                                goto out_free_dentry;
 920                        nbyte = ELF_PAGEOFFSET(elf_bss);
 921                        if (nbyte) {
 922                                nbyte = ELF_MIN_ALIGN - nbyte;
 923                                if (nbyte > elf_brk - elf_bss)
 924                                        nbyte = elf_brk - elf_bss;
 925                                if (clear_user((void __user *)elf_bss +
 926                                                        load_bias, nbyte)) {
 927                                        /*
 928                                         * This bss-zeroing can fail if the ELF
 929                                         * file specifies odd protections. So
 930                                         * we don't check the return value
 931                                         */
 932                                }
 933                        }
 934
 935                        /*
 936                         * Some binaries have overlapping elf segments and then
 937                         * we have to forcefully map over an existing mapping
 938                         * e.g. over this newly established brk mapping.
 939                         */
 940                        elf_fixed = MAP_FIXED;
 941                }
 942
 943                elf_prot = make_prot(elf_ppnt->p_flags);
 944
 945                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 946
 947                vaddr = elf_ppnt->p_vaddr;
 948                /*
 949                 * If we are loading ET_EXEC or we have already performed
 950                 * the ET_DYN load_addr calculations, proceed normally.
 951                 */
 952                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 953                        elf_flags |= elf_fixed;
 954                } else if (loc->elf_ex.e_type == ET_DYN) {
 955                        /*
 956                         * This logic is run once for the first LOAD Program
 957                         * Header for ET_DYN binaries to calculate the
 958                         * randomization (load_bias) for all the LOAD
 959                         * Program Headers, and to calculate the entire
 960                         * size of the ELF mapping (total_size). (Note that
 961                         * load_addr_set is set to true later once the
 962                         * initial mapping is performed.)
 963                         *
 964                         * There are effectively two types of ET_DYN
 965                         * binaries: programs (i.e. PIE: ET_DYN with INTERP)
 966                         * and loaders (ET_DYN without INTERP, since they
 967                         * _are_ the ELF interpreter). The loaders must
 968                         * be loaded away from programs since the program
 969                         * may otherwise collide with the loader (especially
 970                         * for ET_EXEC which does not have a randomized
 971                         * position). For example to handle invocations of
 972                         * "./ld.so someprog" to test out a new version of
 973                         * the loader, the subsequent program that the
 974                         * loader loads must avoid the loader itself, so
 975                         * they cannot share the same load range. Sufficient
 976                         * room for the brk must be allocated with the
 977                         * loader as well, since brk must be available with
 978                         * the loader.
 979                         *
 980                         * Therefore, programs are loaded offset from
 981                         * ELF_ET_DYN_BASE and loaders are loaded into the
 982                         * independently randomized mmap region (0 load_bias
 983                         * without MAP_FIXED).
 984                         */
 985                        if (interpreter) {
 986                                load_bias = ELF_ET_DYN_BASE;
 987                                if (current->flags & PF_RANDOMIZE)
 988                                        load_bias += arch_mmap_rnd();
 989                                elf_flags |= elf_fixed;
 990                        } else
 991                                load_bias = 0;
 992
 993                        /*
 994                         * Since load_bias is used for all subsequent loading
 995                         * calculations, we must lower it by the first vaddr
 996                         * so that the remaining calculations based on the
 997                         * ELF vaddrs will be correctly offset. The result
 998                         * is then page aligned.
 999                         */
1000                        load_bias = ELF_PAGESTART(load_bias - vaddr);
1001
1002                        total_size = total_mapping_size(elf_phdata,
1003                                                        loc->elf_ex.e_phnum);
1004                        if (!total_size) {
1005                                retval = -EINVAL;
1006                                goto out_free_dentry;
1007                        }
1008                }
1009
1010                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1011                                elf_prot, elf_flags, total_size);
1012                if (BAD_ADDR(error)) {
1013                        retval = IS_ERR((void *)error) ?
1014                                PTR_ERR((void*)error) : -EINVAL;
1015                        goto out_free_dentry;
1016                }
1017
1018                if (!load_addr_set) {
1019                        load_addr_set = 1;
1020                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1021                        if (loc->elf_ex.e_type == ET_DYN) {
1022                                load_bias += error -
1023                                             ELF_PAGESTART(load_bias + vaddr);
1024                                load_addr += load_bias;
1025                                reloc_func_desc = load_bias;
1026                        }
1027                }
1028                k = elf_ppnt->p_vaddr;
1029                if (k < start_code)
1030                        start_code = k;
1031                if (start_data < k)
1032                        start_data = k;
1033
1034                /*
1035                 * Check to see if the section's size will overflow the
1036                 * allowed task size. Note that p_filesz must always be
1037                 * <= p_memsz so it is only necessary to check p_memsz.
1038                 */
1039                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1040                    elf_ppnt->p_memsz > TASK_SIZE ||
1041                    TASK_SIZE - elf_ppnt->p_memsz < k) {
1042                        /* set_brk can never work. Avoid overflows. */
1043                        retval = -EINVAL;
1044                        goto out_free_dentry;
1045                }
1046
1047                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1048
1049                if (k > elf_bss)
1050                        elf_bss = k;
1051                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1052                        end_code = k;
1053                if (end_data < k)
1054                        end_data = k;
1055                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1056                if (k > elf_brk) {
1057                        bss_prot = elf_prot;
1058                        elf_brk = k;
1059                }
1060        }
1061
1062        loc->elf_ex.e_entry += load_bias;
1063        elf_bss += load_bias;
1064        elf_brk += load_bias;
1065        start_code += load_bias;
1066        end_code += load_bias;
1067        start_data += load_bias;
1068        end_data += load_bias;
1069
1070        /* Calling set_brk effectively mmaps the pages that we need
1071         * for the bss and break sections.  We must do this before
1072         * mapping in the interpreter, to make sure it doesn't wind
1073         * up getting placed where the bss needs to go.
1074         */
1075        retval = set_brk(elf_bss, elf_brk, bss_prot);
1076        if (retval)
1077                goto out_free_dentry;
1078        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1079                retval = -EFAULT; /* Nobody gets to see this, but.. */
1080                goto out_free_dentry;
1081        }
1082
1083        if (interpreter) {
1084                unsigned long interp_map_addr = 0;
1085
1086                elf_entry = load_elf_interp(&loc->interp_elf_ex,
1087                                            interpreter,
1088                                            &interp_map_addr,
1089                                            load_bias, interp_elf_phdata);
1090                if (!IS_ERR((void *)elf_entry)) {
1091                        /*
1092                         * load_elf_interp() returns relocation
1093                         * adjustment
1094                         */
1095                        interp_load_addr = elf_entry;
1096                        elf_entry += loc->interp_elf_ex.e_entry;
1097                }
1098                if (BAD_ADDR(elf_entry)) {
1099                        retval = IS_ERR((void *)elf_entry) ?
1100                                        (int)elf_entry : -EINVAL;
1101                        goto out_free_dentry;
1102                }
1103                reloc_func_desc = interp_load_addr;
1104
1105                allow_write_access(interpreter);
1106                fput(interpreter);
1107        } else {
1108                elf_entry = loc->elf_ex.e_entry;
1109                if (BAD_ADDR(elf_entry)) {
1110                        retval = -EINVAL;
1111                        goto out_free_dentry;
1112                }
1113        }
1114
1115        kfree(interp_elf_phdata);
1116        kfree(elf_phdata);
1117
1118        set_binfmt(&elf_format);
1119
1120#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1121        retval = arch_setup_additional_pages(bprm, !!interpreter);
1122        if (retval < 0)
1123                goto out;
1124#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1125
1126        retval = create_elf_tables(bprm, &loc->elf_ex,
1127                          load_addr, interp_load_addr);
1128        if (retval < 0)
1129                goto out;
1130        /* N.B. passed_fileno might not be initialized? */
1131        current->mm->end_code = end_code;
1132        current->mm->start_code = start_code;
1133        current->mm->start_data = start_data;
1134        current->mm->end_data = end_data;
1135        current->mm->start_stack = bprm->p;
1136
1137        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1138                /*
1139                 * For architectures with ELF randomization, when executing
1140                 * a loader directly (i.e. no interpreter listed in ELF
1141                 * headers), move the brk area out of the mmap region
1142                 * (since it grows up, and may collide early with the stack
1143                 * growing down), and into the unused ELF_ET_DYN_BASE region.
1144                 */
1145                if (IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) && !interpreter)
1146                        current->mm->brk = current->mm->start_brk =
1147                                ELF_ET_DYN_BASE;
1148
1149                current->mm->brk = current->mm->start_brk =
1150                        arch_randomize_brk(current->mm);
1151#ifdef compat_brk_randomized
1152                current->brk_randomized = 1;
1153#endif
1154        }
1155
1156        if (current->personality & MMAP_PAGE_ZERO) {
1157                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1158                   and some applications "depend" upon this behavior.
1159                   Since we do not have the power to recompile these, we
1160                   emulate the SVr4 behavior. Sigh. */
1161                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1162                                MAP_FIXED | MAP_PRIVATE, 0);
1163        }
1164
1165        regs = current_pt_regs();
1166#ifdef ELF_PLAT_INIT
1167        /*
1168         * The ABI may specify that certain registers be set up in special
1169         * ways (on i386 %edx is the address of a DT_FINI function, for
1170         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1171         * that the e_entry field is the address of the function descriptor
1172         * for the startup routine, rather than the address of the startup
1173         * routine itself.  This macro performs whatever initialization to
1174         * the regs structure is required as well as any relocations to the
1175         * function descriptor entries when executing dynamically links apps.
1176         */
1177        ELF_PLAT_INIT(regs, reloc_func_desc);
1178#endif
1179
1180        finalize_exec(bprm);
1181        start_thread(regs, elf_entry, bprm->p);
1182        retval = 0;
1183out:
1184        kfree(loc);
1185out_ret:
1186        return retval;
1187
1188        /* error cleanup */
1189out_free_dentry:
1190        kfree(interp_elf_phdata);
1191        allow_write_access(interpreter);
1192        if (interpreter)
1193                fput(interpreter);
1194out_free_ph:
1195        kfree(elf_phdata);
1196        goto out;
1197}
1198
1199#ifdef CONFIG_USELIB
1200/* This is really simpleminded and specialized - we are loading an
1201   a.out library that is given an ELF header. */
1202static int load_elf_library(struct file *file)
1203{
1204        struct elf_phdr *elf_phdata;
1205        struct elf_phdr *eppnt;
1206        unsigned long elf_bss, bss, len;
1207        int retval, error, i, j;
1208        struct elfhdr elf_ex;
1209        loff_t pos = 0;
1210
1211        error = -ENOEXEC;
1212        retval = kernel_read(file, &elf_ex, sizeof(elf_ex), &pos);
1213        if (retval != sizeof(elf_ex))
1214                goto out;
1215
1216        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1217                goto out;
1218
1219        /* First of all, some simple consistency checks */
1220        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1221            !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1222                goto out;
1223        if (elf_check_fdpic(&elf_ex))
1224                goto out;
1225
1226        /* Now read in all of the header information */
1227
1228        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1229        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1230
1231        error = -ENOMEM;
1232        elf_phdata = kmalloc(j, GFP_KERNEL);
1233        if (!elf_phdata)
1234                goto out;
1235
1236        eppnt = elf_phdata;
1237        error = -ENOEXEC;
1238        pos =  elf_ex.e_phoff;
1239        retval = kernel_read(file, eppnt, j, &pos);
1240        if (retval != j)
1241                goto out_free_ph;
1242
1243        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1244                if ((eppnt + i)->p_type == PT_LOAD)
1245                        j++;
1246        if (j != 1)
1247                goto out_free_ph;
1248
1249        while (eppnt->p_type != PT_LOAD)
1250                eppnt++;
1251
1252        /* Now use mmap to map the library into memory. */
1253        error = vm_mmap(file,
1254                        ELF_PAGESTART(eppnt->p_vaddr),
1255                        (eppnt->p_filesz +
1256                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1257                        PROT_READ | PROT_WRITE | PROT_EXEC,
1258                        MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1259                        (eppnt->p_offset -
1260                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1261        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1262                goto out_free_ph;
1263
1264        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1265        if (padzero(elf_bss)) {
1266                error = -EFAULT;
1267                goto out_free_ph;
1268        }
1269
1270        len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
1271        bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
1272        if (bss > len) {
1273                error = vm_brk(len, bss - len);
1274                if (error)
1275                        goto out_free_ph;
1276        }
1277        error = 0;
1278
1279out_free_ph:
1280        kfree(elf_phdata);
1281out:
1282        return error;
1283}
1284#endif /* #ifdef CONFIG_USELIB */
1285
1286#ifdef CONFIG_ELF_CORE
1287/*
1288 * ELF core dumper
1289 *
1290 * Modelled on fs/exec.c:aout_core_dump()
1291 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1292 */
1293
1294/*
1295 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1296 * that are useful for post-mortem analysis are included in every core dump.
1297 * In that way we ensure that the core dump is fully interpretable later
1298 * without matching up the same kernel and hardware config to see what PC values
1299 * meant. These special mappings include - vDSO, vsyscall, and other
1300 * architecture specific mappings
1301 */
1302static bool always_dump_vma(struct vm_area_struct *vma)
1303{
1304        /* Any vsyscall mappings? */
1305        if (vma == get_gate_vma(vma->vm_mm))
1306                return true;
1307
1308        /*
1309         * Assume that all vmas with a .name op should always be dumped.
1310         * If this changes, a new vm_ops field can easily be added.
1311         */
1312        if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1313                return true;
1314
1315        /*
1316         * arch_vma_name() returns non-NULL for special architecture mappings,
1317         * such as vDSO sections.
1318         */
1319        if (arch_vma_name(vma))
1320                return true;
1321
1322        return false;
1323}
1324
1325/*
1326 * Decide what to dump of a segment, part, all or none.
1327 */
1328static unsigned long vma_dump_size(struct vm_area_struct *vma,
1329                                   unsigned long mm_flags)
1330{
1331#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1332
1333        /* always dump the vdso and vsyscall sections */
1334        if (always_dump_vma(vma))
1335                goto whole;
1336
1337        if (vma->vm_flags & VM_DONTDUMP)
1338                return 0;
1339
1340        /* support for DAX */
1341        if (vma_is_dax(vma)) {
1342                if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1343                        goto whole;
1344                if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1345                        goto whole;
1346                return 0;
1347        }
1348
1349        /* Hugetlb memory check */
1350        if (vma->vm_flags & VM_HUGETLB) {
1351                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1352                        goto whole;
1353                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1354                        goto whole;
1355                return 0;
1356        }
1357
1358        /* Do not dump I/O mapped devices or special mappings */
1359        if (vma->vm_flags & VM_IO)
1360                return 0;
1361
1362        /* By default, dump shared memory if mapped from an anonymous file. */
1363        if (vma->vm_flags & VM_SHARED) {
1364                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1365                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1366                        goto whole;
1367                return 0;
1368        }
1369
1370        /* Dump segments that have been written to.  */
1371        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1372                goto whole;
1373        if (vma->vm_file == NULL)
1374                return 0;
1375
1376        if (FILTER(MAPPED_PRIVATE))
1377                goto whole;
1378
1379        /*
1380         * If this looks like the beginning of a DSO or executable mapping,
1381         * check for an ELF header.  If we find one, dump the first page to
1382         * aid in determining what was mapped here.
1383         */
1384        if (FILTER(ELF_HEADERS) &&
1385            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1386                u32 __user *header = (u32 __user *) vma->vm_start;
1387                u32 word;
1388                mm_segment_t fs = get_fs();
1389                /*
1390                 * Doing it this way gets the constant folded by GCC.
1391                 */
1392                union {
1393                        u32 cmp;
1394                        char elfmag[SELFMAG];
1395                } magic;
1396                BUILD_BUG_ON(SELFMAG != sizeof word);
1397                magic.elfmag[EI_MAG0] = ELFMAG0;
1398                magic.elfmag[EI_MAG1] = ELFMAG1;
1399                magic.elfmag[EI_MAG2] = ELFMAG2;
1400                magic.elfmag[EI_MAG3] = ELFMAG3;
1401                /*
1402                 * Switch to the user "segment" for get_user(),
1403                 * then put back what elf_core_dump() had in place.
1404                 */
1405                set_fs(USER_DS);
1406                if (unlikely(get_user(word, header)))
1407                        word = 0;
1408                set_fs(fs);
1409                if (word == magic.cmp)
1410                        return PAGE_SIZE;
1411        }
1412
1413#undef  FILTER
1414
1415        return 0;
1416
1417whole:
1418        return vma->vm_end - vma->vm_start;
1419}
1420
1421/* An ELF note in memory */
1422struct memelfnote
1423{
1424        const char *name;
1425        int type;
1426        unsigned int datasz;
1427        void *data;
1428};
1429
1430static int notesize(struct memelfnote *en)
1431{
1432        int sz;
1433
1434        sz = sizeof(struct elf_note);
1435        sz += roundup(strlen(en->name) + 1, 4);
1436        sz += roundup(en->datasz, 4);
1437
1438        return sz;
1439}
1440
1441static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1442{
1443        struct elf_note en;
1444        en.n_namesz = strlen(men->name) + 1;
1445        en.n_descsz = men->datasz;
1446        en.n_type = men->type;
1447
1448        return dump_emit(cprm, &en, sizeof(en)) &&
1449            dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1450            dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1451}
1452
1453static void fill_elf_header(struct elfhdr *elf, int segs,
1454                            u16 machine, u32 flags)
1455{
1456        memset(elf, 0, sizeof(*elf));
1457
1458        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1459        elf->e_ident[EI_CLASS] = ELF_CLASS;
1460        elf->e_ident[EI_DATA] = ELF_DATA;
1461        elf->e_ident[EI_VERSION] = EV_CURRENT;
1462        elf->e_ident[EI_OSABI] = ELF_OSABI;
1463
1464        elf->e_type = ET_CORE;
1465        elf->e_machine = machine;
1466        elf->e_version = EV_CURRENT;
1467        elf->e_phoff = sizeof(struct elfhdr);
1468        elf->e_flags = flags;
1469        elf->e_ehsize = sizeof(struct elfhdr);
1470        elf->e_phentsize = sizeof(struct elf_phdr);
1471        elf->e_phnum = segs;
1472}
1473
1474static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1475{
1476        phdr->p_type = PT_NOTE;
1477        phdr->p_offset = offset;
1478        phdr->p_vaddr = 0;
1479        phdr->p_paddr = 0;
1480        phdr->p_filesz = sz;
1481        phdr->p_memsz = 0;
1482        phdr->p_flags = 0;
1483        phdr->p_align = 0;
1484}
1485
1486static void fill_note(struct memelfnote *note, const char *name, int type, 
1487                unsigned int sz, void *data)
1488{
1489        note->name = name;
1490        note->type = type;
1491        note->datasz = sz;
1492        note->data = data;
1493}
1494
1495/*
1496 * fill up all the fields in prstatus from the given task struct, except
1497 * registers which need to be filled up separately.
1498 */
1499static void fill_prstatus(struct elf_prstatus *prstatus,
1500                struct task_struct *p, long signr)
1501{
1502        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1503        prstatus->pr_sigpend = p->pending.signal.sig[0];
1504        prstatus->pr_sighold = p->blocked.sig[0];
1505        rcu_read_lock();
1506        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1507        rcu_read_unlock();
1508        prstatus->pr_pid = task_pid_vnr(p);
1509        prstatus->pr_pgrp = task_pgrp_vnr(p);
1510        prstatus->pr_sid = task_session_vnr(p);
1511        if (thread_group_leader(p)) {
1512                struct task_cputime cputime;
1513
1514                /*
1515                 * This is the record for the group leader.  It shows the
1516                 * group-wide total, not its individual thread total.
1517                 */
1518                thread_group_cputime(p, &cputime);
1519                prstatus->pr_utime = ns_to_timeval(cputime.utime);
1520                prstatus->pr_stime = ns_to_timeval(cputime.stime);
1521        } else {
1522                u64 utime, stime;
1523
1524                task_cputime(p, &utime, &stime);
1525                prstatus->pr_utime = ns_to_timeval(utime);
1526                prstatus->pr_stime = ns_to_timeval(stime);
1527        }
1528
1529        prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1530        prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1531}
1532
1533static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1534                       struct mm_struct *mm)
1535{
1536        const struct cred *cred;
1537        unsigned int i, len;
1538        
1539        /* first copy the parameters from user space */
1540        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1541
1542        len = mm->arg_end - mm->arg_start;
1543        if (len >= ELF_PRARGSZ)
1544                len = ELF_PRARGSZ-1;
1545        if (copy_from_user(&psinfo->pr_psargs,
1546                           (const char __user *)mm->arg_start, len))
1547                return -EFAULT;
1548        for(i = 0; i < len; i++)
1549                if (psinfo->pr_psargs[i] == 0)
1550                        psinfo->pr_psargs[i] = ' ';
1551        psinfo->pr_psargs[len] = 0;
1552
1553        rcu_read_lock();
1554        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1555        rcu_read_unlock();
1556        psinfo->pr_pid = task_pid_vnr(p);
1557        psinfo->pr_pgrp = task_pgrp_vnr(p);
1558        psinfo->pr_sid = task_session_vnr(p);
1559
1560        i = p->state ? ffz(~p->state) + 1 : 0;
1561        psinfo->pr_state = i;
1562        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1563        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1564        psinfo->pr_nice = task_nice(p);
1565        psinfo->pr_flag = p->flags;
1566        rcu_read_lock();
1567        cred = __task_cred(p);
1568        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1569        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1570        rcu_read_unlock();
1571        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1572        
1573        return 0;
1574}
1575
1576static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1577{
1578        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1579        int i = 0;
1580        do
1581                i += 2;
1582        while (auxv[i - 2] != AT_NULL);
1583        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1584}
1585
1586static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1587                const kernel_siginfo_t *siginfo)
1588{
1589        mm_segment_t old_fs = get_fs();
1590        set_fs(KERNEL_DS);
1591        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1592        set_fs(old_fs);
1593        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1594}
1595
1596#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1597/*
1598 * Format of NT_FILE note:
1599 *
1600 * long count     -- how many files are mapped
1601 * long page_size -- units for file_ofs
1602 * array of [COUNT] elements of
1603 *   long start
1604 *   long end
1605 *   long file_ofs
1606 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1607 */
1608static int fill_files_note(struct memelfnote *note)
1609{
1610        struct vm_area_struct *vma;
1611        unsigned count, size, names_ofs, remaining, n;
1612        user_long_t *data;
1613        user_long_t *start_end_ofs;
1614        char *name_base, *name_curpos;
1615
1616        /* *Estimated* file count and total data size needed */
1617        count = current->mm->map_count;
1618        if (count > UINT_MAX / 64)
1619                return -EINVAL;
1620        size = count * 64;
1621
1622        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1623 alloc:
1624        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1625                return -EINVAL;
1626        size = round_up(size, PAGE_SIZE);
1627        data = kvmalloc(size, GFP_KERNEL);
1628        if (ZERO_OR_NULL_PTR(data))
1629                return -ENOMEM;
1630
1631        start_end_ofs = data + 2;
1632        name_base = name_curpos = ((char *)data) + names_ofs;
1633        remaining = size - names_ofs;
1634        count = 0;
1635        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1636                struct file *file;
1637                const char *filename;
1638
1639                file = vma->vm_file;
1640                if (!file)
1641                        continue;
1642                filename = file_path(file, name_curpos, remaining);
1643                if (IS_ERR(filename)) {
1644                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1645                                kvfree(data);
1646                                size = size * 5 / 4;
1647                                goto alloc;
1648                        }
1649                        continue;
1650                }
1651
1652                /* file_path() fills at the end, move name down */
1653                /* n = strlen(filename) + 1: */
1654                n = (name_curpos + remaining) - filename;
1655                remaining = filename - name_curpos;
1656                memmove(name_curpos, filename, n);
1657                name_curpos += n;
1658
1659                *start_end_ofs++ = vma->vm_start;
1660                *start_end_ofs++ = vma->vm_end;
1661                *start_end_ofs++ = vma->vm_pgoff;
1662                count++;
1663        }
1664
1665        /* Now we know exact count of files, can store it */
1666        data[0] = count;
1667        data[1] = PAGE_SIZE;
1668        /*
1669         * Count usually is less than current->mm->map_count,
1670         * we need to move filenames down.
1671         */
1672        n = current->mm->map_count - count;
1673        if (n != 0) {
1674                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1675                memmove(name_base - shift_bytes, name_base,
1676                        name_curpos - name_base);
1677                name_curpos -= shift_bytes;
1678        }
1679
1680        size = name_curpos - (char *)data;
1681        fill_note(note, "CORE", NT_FILE, size, data);
1682        return 0;
1683}
1684
1685#ifdef CORE_DUMP_USE_REGSET
1686#include <linux/regset.h>
1687
1688struct elf_thread_core_info {
1689        struct elf_thread_core_info *next;
1690        struct task_struct *task;
1691        struct elf_prstatus prstatus;
1692        struct memelfnote notes[0];
1693};
1694
1695struct elf_note_info {
1696        struct elf_thread_core_info *thread;
1697        struct memelfnote psinfo;
1698        struct memelfnote signote;
1699        struct memelfnote auxv;
1700        struct memelfnote files;
1701        user_siginfo_t csigdata;
1702        size_t size;
1703        int thread_notes;
1704};
1705
1706/*
1707 * When a regset has a writeback hook, we call it on each thread before
1708 * dumping user memory.  On register window machines, this makes sure the
1709 * user memory backing the register data is up to date before we read it.
1710 */
1711static void do_thread_regset_writeback(struct task_struct *task,
1712                                       const struct user_regset *regset)
1713{
1714        if (regset->writeback)
1715                regset->writeback(task, regset, 1);
1716}
1717
1718#ifndef PRSTATUS_SIZE
1719#define PRSTATUS_SIZE(S, R) sizeof(S)
1720#endif
1721
1722#ifndef SET_PR_FPVALID
1723#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1724#endif
1725
1726static int fill_thread_core_info(struct elf_thread_core_info *t,
1727                                 const struct user_regset_view *view,
1728                                 long signr, size_t *total)
1729{
1730        unsigned int i;
1731        unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
1732
1733        /*
1734         * NT_PRSTATUS is the one special case, because the regset data
1735         * goes into the pr_reg field inside the note contents, rather
1736         * than being the whole note contents.  We fill the reset in here.
1737         * We assume that regset 0 is NT_PRSTATUS.
1738         */
1739        fill_prstatus(&t->prstatus, t->task, signr);
1740        (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
1741                                    &t->prstatus.pr_reg, NULL);
1742
1743        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1744                  PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
1745        *total += notesize(&t->notes[0]);
1746
1747        do_thread_regset_writeback(t->task, &view->regsets[0]);
1748
1749        /*
1750         * Each other regset might generate a note too.  For each regset
1751         * that has no core_note_type or is inactive, we leave t->notes[i]
1752         * all zero and we'll know to skip writing it later.
1753         */
1754        for (i = 1; i < view->n; ++i) {
1755                const struct user_regset *regset = &view->regsets[i];
1756                do_thread_regset_writeback(t->task, regset);
1757                if (regset->core_note_type && regset->get &&
1758                    (!regset->active || regset->active(t->task, regset) > 0)) {
1759                        int ret;
1760                        size_t size = regset_size(t->task, regset);
1761                        void *data = kmalloc(size, GFP_KERNEL);
1762                        if (unlikely(!data))
1763                                return 0;
1764                        ret = regset->get(t->task, regset,
1765                                          0, size, data, NULL);
1766                        if (unlikely(ret))
1767                                kfree(data);
1768                        else {
1769                                if (regset->core_note_type != NT_PRFPREG)
1770                                        fill_note(&t->notes[i], "LINUX",
1771                                                  regset->core_note_type,
1772                                                  size, data);
1773                                else {
1774                                        SET_PR_FPVALID(&t->prstatus,
1775                                                        1, regset0_size);
1776                                        fill_note(&t->notes[i], "CORE",
1777                                                  NT_PRFPREG, size, data);
1778                                }
1779                                *total += notesize(&t->notes[i]);
1780                        }
1781                }
1782        }
1783
1784        return 1;
1785}
1786
1787static int fill_note_info(struct elfhdr *elf, int phdrs,
1788                          struct elf_note_info *info,
1789                          const kernel_siginfo_t *siginfo, struct pt_regs *regs)
1790{
1791        struct task_struct *dump_task = current;
1792        const struct user_regset_view *view = task_user_regset_view(dump_task);
1793        struct elf_thread_core_info *t;
1794        struct elf_prpsinfo *psinfo;
1795        struct core_thread *ct;
1796        unsigned int i;
1797
1798        info->size = 0;
1799        info->thread = NULL;
1800
1801        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1802        if (psinfo == NULL) {
1803                info->psinfo.data = NULL; /* So we don't free this wrongly */
1804                return 0;
1805        }
1806
1807        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1808
1809        /*
1810         * Figure out how many notes we're going to need for each thread.
1811         */
1812        info->thread_notes = 0;
1813        for (i = 0; i < view->n; ++i)
1814                if (view->regsets[i].core_note_type != 0)
1815                        ++info->thread_notes;
1816
1817        /*
1818         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1819         * since it is our one special case.
1820         */
1821        if (unlikely(info->thread_notes == 0) ||
1822            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1823                WARN_ON(1);
1824                return 0;
1825        }
1826
1827        /*
1828         * Initialize the ELF file header.
1829         */
1830        fill_elf_header(elf, phdrs,
1831                        view->e_machine, view->e_flags);
1832
1833        /*
1834         * Allocate a structure for each thread.
1835         */
1836        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1837                t = kzalloc(offsetof(struct elf_thread_core_info,
1838                                     notes[info->thread_notes]),
1839                            GFP_KERNEL);
1840                if (unlikely(!t))
1841                        return 0;
1842
1843                t->task = ct->task;
1844                if (ct->task == dump_task || !info->thread) {
1845                        t->next = info->thread;
1846                        info->thread = t;
1847                } else {
1848                        /*
1849                         * Make sure to keep the original task at
1850                         * the head of the list.
1851                         */
1852                        t->next = info->thread->next;
1853                        info->thread->next = t;
1854                }
1855        }
1856
1857        /*
1858         * Now fill in each thread's information.
1859         */
1860        for (t = info->thread; t != NULL; t = t->next)
1861                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1862                        return 0;
1863
1864        /*
1865         * Fill in the two process-wide notes.
1866         */
1867        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1868        info->size += notesize(&info->psinfo);
1869
1870        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1871        info->size += notesize(&info->signote);
1872
1873        fill_auxv_note(&info->auxv, current->mm);
1874        info->size += notesize(&info->auxv);
1875
1876        if (fill_files_note(&info->files) == 0)
1877                info->size += notesize(&info->files);
1878
1879        return 1;
1880}
1881
1882static size_t get_note_info_size(struct elf_note_info *info)
1883{
1884        return info->size;
1885}
1886
1887/*
1888 * Write all the notes for each thread.  When writing the first thread, the
1889 * process-wide notes are interleaved after the first thread-specific note.
1890 */
1891static int write_note_info(struct elf_note_info *info,
1892                           struct coredump_params *cprm)
1893{
1894        bool first = true;
1895        struct elf_thread_core_info *t = info->thread;
1896
1897        do {
1898                int i;
1899
1900                if (!writenote(&t->notes[0], cprm))
1901                        return 0;
1902
1903                if (first && !writenote(&info->psinfo, cprm))
1904                        return 0;
1905                if (first && !writenote(&info->signote, cprm))
1906                        return 0;
1907                if (first && !writenote(&info->auxv, cprm))
1908                        return 0;
1909                if (first && info->files.data &&
1910                                !writenote(&info->files, cprm))
1911                        return 0;
1912
1913                for (i = 1; i < info->thread_notes; ++i)
1914                        if (t->notes[i].data &&
1915                            !writenote(&t->notes[i], cprm))
1916                                return 0;
1917
1918                first = false;
1919                t = t->next;
1920        } while (t);
1921
1922        return 1;
1923}
1924
1925static void free_note_info(struct elf_note_info *info)
1926{
1927        struct elf_thread_core_info *threads = info->thread;
1928        while (threads) {
1929                unsigned int i;
1930                struct elf_thread_core_info *t = threads;
1931                threads = t->next;
1932                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1933                for (i = 1; i < info->thread_notes; ++i)
1934                        kfree(t->notes[i].data);
1935                kfree(t);
1936        }
1937        kfree(info->psinfo.data);
1938        kvfree(info->files.data);
1939}
1940
1941#else
1942
1943/* Here is the structure in which status of each thread is captured. */
1944struct elf_thread_status
1945{
1946        struct list_head list;
1947        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1948        elf_fpregset_t fpu;             /* NT_PRFPREG */
1949        struct task_struct *thread;
1950#ifdef ELF_CORE_COPY_XFPREGS
1951        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1952#endif
1953        struct memelfnote notes[3];
1954        int num_notes;
1955};
1956
1957/*
1958 * In order to add the specific thread information for the elf file format,
1959 * we need to keep a linked list of every threads pr_status and then create
1960 * a single section for them in the final core file.
1961 */
1962static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1963{
1964        int sz = 0;
1965        struct task_struct *p = t->thread;
1966        t->num_notes = 0;
1967
1968        fill_prstatus(&t->prstatus, p, signr);
1969        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1970        
1971        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1972                  &(t->prstatus));
1973        t->num_notes++;
1974        sz += notesize(&t->notes[0]);
1975
1976        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1977                                                                &t->fpu))) {
1978                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1979                          &(t->fpu));
1980                t->num_notes++;
1981                sz += notesize(&t->notes[1]);
1982        }
1983
1984#ifdef ELF_CORE_COPY_XFPREGS
1985        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1986                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1987                          sizeof(t->xfpu), &t->xfpu);
1988                t->num_notes++;
1989                sz += notesize(&t->notes[2]);
1990        }
1991#endif  
1992        return sz;
1993}
1994
1995struct elf_note_info {
1996        struct memelfnote *notes;
1997        struct memelfnote *notes_files;
1998        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1999        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
2000        struct list_head thread_list;
2001        elf_fpregset_t *fpu;
2002#ifdef ELF_CORE_COPY_XFPREGS
2003        elf_fpxregset_t *xfpu;
2004#endif
2005        user_siginfo_t csigdata;
2006        int thread_status_size;
2007        int numnote;
2008};
2009
2010static int elf_note_info_init(struct elf_note_info *info)
2011{
2012        memset(info, 0, sizeof(*info));
2013        INIT_LIST_HEAD(&info->thread_list);
2014
2015        /* Allocate space for ELF notes */
2016        info->notes = kmalloc_array(8, sizeof(struct memelfnote), GFP_KERNEL);
2017        if (!info->notes)
2018                return 0;
2019        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2020        if (!info->psinfo)
2021                return 0;
2022        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2023        if (!info->prstatus)
2024                return 0;
2025        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2026        if (!info->fpu)
2027                return 0;
2028#ifdef ELF_CORE_COPY_XFPREGS
2029        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
2030        if (!info->xfpu)
2031                return 0;
2032#endif
2033        return 1;
2034}
2035
2036static int fill_note_info(struct elfhdr *elf, int phdrs,
2037                          struct elf_note_info *info,
2038                          const kernel_siginfo_t *siginfo, struct pt_regs *regs)
2039{
2040        struct core_thread *ct;
2041        struct elf_thread_status *ets;
2042
2043        if (!elf_note_info_init(info))
2044                return 0;
2045
2046        for (ct = current->mm->core_state->dumper.next;
2047                                        ct; ct = ct->next) {
2048                ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2049                if (!ets)
2050                        return 0;
2051
2052                ets->thread = ct->task;
2053                list_add(&ets->list, &info->thread_list);
2054        }
2055
2056        list_for_each_entry(ets, &info->thread_list, list) {
2057                int sz;
2058
2059                sz = elf_dump_thread_status(siginfo->si_signo, ets);
2060                info->thread_status_size += sz;
2061        }
2062        /* now collect the dump for the current */
2063        memset(info->prstatus, 0, sizeof(*info->prstatus));
2064        fill_prstatus(info->prstatus, current, siginfo->si_signo);
2065        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2066
2067        /* Set up header */
2068        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2069
2070        /*
2071         * Set up the notes in similar form to SVR4 core dumps made
2072         * with info from their /proc.
2073         */
2074
2075        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2076                  sizeof(*info->prstatus), info->prstatus);
2077        fill_psinfo(info->psinfo, current->group_leader, current->mm);
2078        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2079                  sizeof(*info->psinfo), info->psinfo);
2080
2081        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2082        fill_auxv_note(info->notes + 3, current->mm);
2083        info->numnote = 4;
2084
2085        if (fill_files_note(info->notes + info->numnote) == 0) {
2086                info->notes_files = info->notes + info->numnote;
2087                info->numnote++;
2088        }
2089
2090        /* Try to dump the FPU. */
2091        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2092                                                               info->fpu);
2093        if (info->prstatus->pr_fpvalid)
2094                fill_note(info->notes + info->numnote++,
2095                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2096#ifdef ELF_CORE_COPY_XFPREGS
2097        if (elf_core_copy_task_xfpregs(current, info->xfpu))
2098                fill_note(info->notes + info->numnote++,
2099                          "LINUX", ELF_CORE_XFPREG_TYPE,
2100                          sizeof(*info->xfpu), info->xfpu);
2101#endif
2102
2103        return 1;
2104}
2105
2106static size_t get_note_info_size(struct elf_note_info *info)
2107{
2108        int sz = 0;
2109        int i;
2110
2111        for (i = 0; i < info->numnote; i++)
2112                sz += notesize(info->notes + i);
2113
2114        sz += info->thread_status_size;
2115
2116        return sz;
2117}
2118
2119static int write_note_info(struct elf_note_info *info,
2120                           struct coredump_params *cprm)
2121{
2122        struct elf_thread_status *ets;
2123        int i;
2124
2125        for (i = 0; i < info->numnote; i++)
2126                if (!writenote(info->notes + i, cprm))
2127                        return 0;
2128
2129        /* write out the thread status notes section */
2130        list_for_each_entry(ets, &info->thread_list, list) {
2131                for (i = 0; i < ets->num_notes; i++)
2132                        if (!writenote(&ets->notes[i], cprm))
2133                                return 0;
2134        }
2135
2136        return 1;
2137}
2138
2139static void free_note_info(struct elf_note_info *info)
2140{
2141        while (!list_empty(&info->thread_list)) {
2142                struct list_head *tmp = info->thread_list.next;
2143                list_del(tmp);
2144                kfree(list_entry(tmp, struct elf_thread_status, list));
2145        }
2146
2147        /* Free data possibly allocated by fill_files_note(): */
2148        if (info->notes_files)
2149                kvfree(info->notes_files->data);
2150
2151        kfree(info->prstatus);
2152        kfree(info->psinfo);
2153        kfree(info->notes);
2154        kfree(info->fpu);
2155#ifdef ELF_CORE_COPY_XFPREGS
2156        kfree(info->xfpu);
2157#endif
2158}
2159
2160#endif
2161
2162static struct vm_area_struct *first_vma(struct task_struct *tsk,
2163                                        struct vm_area_struct *gate_vma)
2164{
2165        struct vm_area_struct *ret = tsk->mm->mmap;
2166
2167        if (ret)
2168                return ret;
2169        return gate_vma;
2170}
2171/*
2172 * Helper function for iterating across a vma list.  It ensures that the caller
2173 * will visit `gate_vma' prior to terminating the search.
2174 */
2175static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2176                                        struct vm_area_struct *gate_vma)
2177{
2178        struct vm_area_struct *ret;
2179
2180        ret = this_vma->vm_next;
2181        if (ret)
2182                return ret;
2183        if (this_vma == gate_vma)
2184                return NULL;
2185        return gate_vma;
2186}
2187
2188static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2189                             elf_addr_t e_shoff, int segs)
2190{
2191        elf->e_shoff = e_shoff;
2192        elf->e_shentsize = sizeof(*shdr4extnum);
2193        elf->e_shnum = 1;
2194        elf->e_shstrndx = SHN_UNDEF;
2195
2196        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2197
2198        shdr4extnum->sh_type = SHT_NULL;
2199        shdr4extnum->sh_size = elf->e_shnum;
2200        shdr4extnum->sh_link = elf->e_shstrndx;
2201        shdr4extnum->sh_info = segs;
2202}
2203
2204/*
2205 * Actual dumper
2206 *
2207 * This is a two-pass process; first we find the offsets of the bits,
2208 * and then they are actually written out.  If we run out of core limit
2209 * we just truncate.
2210 */
2211static int elf_core_dump(struct coredump_params *cprm)
2212{
2213        int has_dumped = 0;
2214        mm_segment_t fs;
2215        int segs, i;
2216        size_t vma_data_size = 0;
2217        struct vm_area_struct *vma, *gate_vma;
2218        struct elfhdr *elf = NULL;
2219        loff_t offset = 0, dataoff;
2220        struct elf_note_info info = { };
2221        struct elf_phdr *phdr4note = NULL;
2222        struct elf_shdr *shdr4extnum = NULL;
2223        Elf_Half e_phnum;
2224        elf_addr_t e_shoff;
2225        elf_addr_t *vma_filesz = NULL;
2226
2227        /*
2228         * We no longer stop all VM operations.
2229         * 
2230         * This is because those proceses that could possibly change map_count
2231         * or the mmap / vma pages are now blocked in do_exit on current
2232         * finishing this core dump.
2233         *
2234         * Only ptrace can touch these memory addresses, but it doesn't change
2235         * the map_count or the pages allocated. So no possibility of crashing
2236         * exists while dumping the mm->vm_next areas to the core file.
2237         */
2238  
2239        /* alloc memory for large data structures: too large to be on stack */
2240        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2241        if (!elf)
2242                goto out;
2243        /*
2244         * The number of segs are recored into ELF header as 16bit value.
2245         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2246         */
2247        segs = current->mm->map_count;
2248        segs += elf_core_extra_phdrs();
2249
2250        gate_vma = get_gate_vma(current->mm);
2251        if (gate_vma != NULL)
2252                segs++;
2253
2254        /* for notes section */
2255        segs++;
2256
2257        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2258         * this, kernel supports extended numbering. Have a look at
2259         * include/linux/elf.h for further information. */
2260        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2261
2262        /*
2263         * Collect all the non-memory information about the process for the
2264         * notes.  This also sets up the file header.
2265         */
2266        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2267                goto cleanup;
2268
2269        has_dumped = 1;
2270
2271        fs = get_fs();
2272        set_fs(KERNEL_DS);
2273
2274        offset += sizeof(*elf);                         /* Elf header */
2275        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2276
2277        /* Write notes phdr entry */
2278        {
2279                size_t sz = get_note_info_size(&info);
2280
2281                sz += elf_coredump_extra_notes_size();
2282
2283                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2284                if (!phdr4note)
2285                        goto end_coredump;
2286
2287                fill_elf_note_phdr(phdr4note, sz, offset);
2288                offset += sz;
2289        }
2290
2291        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2292
2293        if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2294                goto end_coredump;
2295        vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
2296                              GFP_KERNEL);
2297        if (ZERO_OR_NULL_PTR(vma_filesz))
2298                goto end_coredump;
2299
2300        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2301                        vma = next_vma(vma, gate_vma)) {
2302                unsigned long dump_size;
2303
2304                dump_size = vma_dump_size(vma, cprm->mm_flags);
2305                vma_filesz[i++] = dump_size;
2306                vma_data_size += dump_size;
2307        }
2308
2309        offset += vma_data_size;
2310        offset += elf_core_extra_data_size();
2311        e_shoff = offset;
2312
2313        if (e_phnum == PN_XNUM) {
2314                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2315                if (!shdr4extnum)
2316                        goto end_coredump;
2317                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2318        }
2319
2320        offset = dataoff;
2321
2322        if (!dump_emit(cprm, elf, sizeof(*elf)))
2323                goto end_coredump;
2324
2325        if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2326                goto end_coredump;
2327
2328        /* Write program headers for segments dump */
2329        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2330                        vma = next_vma(vma, gate_vma)) {
2331                struct elf_phdr phdr;
2332
2333                phdr.p_type = PT_LOAD;
2334                phdr.p_offset = offset;
2335                phdr.p_vaddr = vma->vm_start;
2336                phdr.p_paddr = 0;
2337                phdr.p_filesz = vma_filesz[i++];
2338                phdr.p_memsz = vma->vm_end - vma->vm_start;
2339                offset += phdr.p_filesz;
2340                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2341                if (vma->vm_flags & VM_WRITE)
2342                        phdr.p_flags |= PF_W;
2343                if (vma->vm_flags & VM_EXEC)
2344                        phdr.p_flags |= PF_X;
2345                phdr.p_align = ELF_EXEC_PAGESIZE;
2346
2347                if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2348                        goto end_coredump;
2349        }
2350
2351        if (!elf_core_write_extra_phdrs(cprm, offset))
2352                goto end_coredump;
2353
2354        /* write out the notes section */
2355        if (!write_note_info(&info, cprm))
2356                goto end_coredump;
2357
2358        if (elf_coredump_extra_notes_write(cprm))
2359                goto end_coredump;
2360
2361        /* Align to page */
2362        if (!dump_skip(cprm, dataoff - cprm->pos))
2363                goto end_coredump;
2364
2365        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2366                        vma = next_vma(vma, gate_vma)) {
2367                unsigned long addr;
2368                unsigned long end;
2369
2370                end = vma->vm_start + vma_filesz[i++];
2371
2372                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2373                        struct page *page;
2374                        int stop;
2375
2376                        page = get_dump_page(addr);
2377                        if (page) {
2378                                void *kaddr = kmap(page);
2379                                stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2380                                kunmap(page);
2381                                put_page(page);
2382                        } else
2383                                stop = !dump_skip(cprm, PAGE_SIZE);
2384                        if (stop)
2385                                goto end_coredump;
2386                }
2387        }
2388        dump_truncate(cprm);
2389
2390        if (!elf_core_write_extra_data(cprm))
2391                goto end_coredump;
2392
2393        if (e_phnum == PN_XNUM) {
2394                if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2395                        goto end_coredump;
2396        }
2397
2398end_coredump:
2399        set_fs(fs);
2400
2401cleanup:
2402        free_note_info(&info);
2403        kfree(shdr4extnum);
2404        kvfree(vma_filesz);
2405        kfree(phdr4note);
2406        kfree(elf);
2407out:
2408        return has_dumped;
2409}
2410
2411#endif          /* CONFIG_ELF_CORE */
2412
2413static int __init init_elf_binfmt(void)
2414{
2415        register_binfmt(&elf_format);
2416        return 0;
2417}
2418
2419static void __exit exit_elf_binfmt(void)
2420{
2421        /* Remove the COFF and ELF loaders. */
2422        unregister_binfmt(&elf_format);
2423}
2424
2425core_initcall(init_elf_binfmt);
2426module_exit(exit_elf_binfmt);
2427MODULE_LICENSE("GPL");
2428