linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/elf-randomize.h>
  35#include <linux/utsname.h>
  36#include <linux/coredump.h>
  37#include <linux/sched.h>
  38#include <linux/sched/coredump.h>
  39#include <linux/sched/task_stack.h>
  40#include <linux/sched/cputime.h>
  41#include <linux/cred.h>
  42#include <linux/dax.h>
  43#include <linux/uaccess.h>
  44#include <asm/param.h>
  45#include <asm/page.h>
  46
  47#ifndef user_long_t
  48#define user_long_t long
  49#endif
  50#ifndef user_siginfo_t
  51#define user_siginfo_t siginfo_t
  52#endif
  53
  54/* That's for binfmt_elf_fdpic to deal with */
  55#ifndef elf_check_fdpic
  56#define elf_check_fdpic(ex) false
  57#endif
  58
  59static int load_elf_binary(struct linux_binprm *bprm);
  60static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  61                                int, int, unsigned long);
  62
  63#ifdef CONFIG_USELIB
  64static int load_elf_library(struct file *);
  65#else
  66#define load_elf_library NULL
  67#endif
  68
  69/*
  70 * If we don't support core dumping, then supply a NULL so we
  71 * don't even try.
  72 */
  73#ifdef CONFIG_ELF_CORE
  74static int elf_core_dump(struct coredump_params *cprm);
  75#else
  76#define elf_core_dump   NULL
  77#endif
  78
  79#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  80#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  81#else
  82#define ELF_MIN_ALIGN   PAGE_SIZE
  83#endif
  84
  85#ifndef ELF_CORE_EFLAGS
  86#define ELF_CORE_EFLAGS 0
  87#endif
  88
  89#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  90#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  91#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  92
  93static struct linux_binfmt elf_format = {
  94        .module         = THIS_MODULE,
  95        .load_binary    = load_elf_binary,
  96        .load_shlib     = load_elf_library,
  97        .core_dump      = elf_core_dump,
  98        .min_coredump   = ELF_EXEC_PAGESIZE,
  99};
 100
 101#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
 102
 103static int set_brk(unsigned long start, unsigned long end, int prot)
 104{
 105        start = ELF_PAGEALIGN(start);
 106        end = ELF_PAGEALIGN(end);
 107        if (end > start) {
 108                /*
 109                 * Map the last of the bss segment.
 110                 * If the header is requesting these pages to be
 111                 * executable, honour that (ppc32 needs this).
 112                 */
 113                int error = vm_brk_flags(start, end - start,
 114                                prot & PROT_EXEC ? VM_EXEC : 0);
 115                if (error)
 116                        return error;
 117        }
 118        current->mm->start_brk = current->mm->brk = end;
 119        return 0;
 120}
 121
 122/* We need to explicitly zero any fractional pages
 123   after the data section (i.e. bss).  This would
 124   contain the junk from the file that should not
 125   be in memory
 126 */
 127static int padzero(unsigned long elf_bss)
 128{
 129        unsigned long nbyte;
 130
 131        nbyte = ELF_PAGEOFFSET(elf_bss);
 132        if (nbyte) {
 133                nbyte = ELF_MIN_ALIGN - nbyte;
 134                if (clear_user((void __user *) elf_bss, nbyte))
 135                        return -EFAULT;
 136        }
 137        return 0;
 138}
 139
 140/* Let's use some macros to make this stack manipulation a little clearer */
 141#ifdef CONFIG_STACK_GROWSUP
 142#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 143#define STACK_ROUND(sp, items) \
 144        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 145#define STACK_ALLOC(sp, len) ({ \
 146        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 147        old_sp; })
 148#else
 149#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 150#define STACK_ROUND(sp, items) \
 151        (((unsigned long) (sp - items)) &~ 15UL)
 152#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 153#endif
 154
 155#ifndef ELF_BASE_PLATFORM
 156/*
 157 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 158 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 159 * will be copied to the user stack in the same manner as AT_PLATFORM.
 160 */
 161#define ELF_BASE_PLATFORM NULL
 162#endif
 163
 164static int
 165create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 166                unsigned long load_addr, unsigned long interp_load_addr)
 167{
 168        unsigned long p = bprm->p;
 169        int argc = bprm->argc;
 170        int envc = bprm->envc;
 171        elf_addr_t __user *sp;
 172        elf_addr_t __user *u_platform;
 173        elf_addr_t __user *u_base_platform;
 174        elf_addr_t __user *u_rand_bytes;
 175        const char *k_platform = ELF_PLATFORM;
 176        const char *k_base_platform = ELF_BASE_PLATFORM;
 177        unsigned char k_rand_bytes[16];
 178        int items;
 179        elf_addr_t *elf_info;
 180        int ei_index = 0;
 181        const struct cred *cred = current_cred();
 182        struct vm_area_struct *vma;
 183
 184        /*
 185         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 186         * evictions by the processes running on the same package. One
 187         * thing we can do is to shuffle the initial stack for them.
 188         */
 189
 190        p = arch_align_stack(p);
 191
 192        /*
 193         * If this architecture has a platform capability string, copy it
 194         * to userspace.  In some cases (Sparc), this info is impossible
 195         * for userspace to get any other way, in others (i386) it is
 196         * merely difficult.
 197         */
 198        u_platform = NULL;
 199        if (k_platform) {
 200                size_t len = strlen(k_platform) + 1;
 201
 202                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 203                if (__copy_to_user(u_platform, k_platform, len))
 204                        return -EFAULT;
 205        }
 206
 207        /*
 208         * If this architecture has a "base" platform capability
 209         * string, copy it to userspace.
 210         */
 211        u_base_platform = NULL;
 212        if (k_base_platform) {
 213                size_t len = strlen(k_base_platform) + 1;
 214
 215                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 216                if (__copy_to_user(u_base_platform, k_base_platform, len))
 217                        return -EFAULT;
 218        }
 219
 220        /*
 221         * Generate 16 random bytes for userspace PRNG seeding.
 222         */
 223        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 224        u_rand_bytes = (elf_addr_t __user *)
 225                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 226        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 227                return -EFAULT;
 228
 229        /* Create the ELF interpreter info */
 230        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 231        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 232#define NEW_AUX_ENT(id, val) \
 233        do { \
 234                elf_info[ei_index++] = id; \
 235                elf_info[ei_index++] = val; \
 236        } while (0)
 237
 238#ifdef ARCH_DLINFO
 239        /* 
 240         * ARCH_DLINFO must come first so PPC can do its special alignment of
 241         * AUXV.
 242         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 243         * ARCH_DLINFO changes
 244         */
 245        ARCH_DLINFO;
 246#endif
 247        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 248        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 249        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 250        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 251        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 252        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 253        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 254        NEW_AUX_ENT(AT_FLAGS, 0);
 255        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 256        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 257        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 258        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 259        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 260        NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
 261        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 262#ifdef ELF_HWCAP2
 263        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 264#endif
 265        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 266        if (k_platform) {
 267                NEW_AUX_ENT(AT_PLATFORM,
 268                            (elf_addr_t)(unsigned long)u_platform);
 269        }
 270        if (k_base_platform) {
 271                NEW_AUX_ENT(AT_BASE_PLATFORM,
 272                            (elf_addr_t)(unsigned long)u_base_platform);
 273        }
 274        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 275                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 276        }
 277#undef NEW_AUX_ENT
 278        /* AT_NULL is zero; clear the rest too */
 279        memset(&elf_info[ei_index], 0,
 280               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 281
 282        /* And advance past the AT_NULL entry.  */
 283        ei_index += 2;
 284
 285        sp = STACK_ADD(p, ei_index);
 286
 287        items = (argc + 1) + (envc + 1) + 1;
 288        bprm->p = STACK_ROUND(sp, items);
 289
 290        /* Point sp at the lowest address on the stack */
 291#ifdef CONFIG_STACK_GROWSUP
 292        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 293        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 294#else
 295        sp = (elf_addr_t __user *)bprm->p;
 296#endif
 297
 298
 299        /*
 300         * Grow the stack manually; some architectures have a limit on how
 301         * far ahead a user-space access may be in order to grow the stack.
 302         */
 303        vma = find_extend_vma(current->mm, bprm->p);
 304        if (!vma)
 305                return -EFAULT;
 306
 307        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 308        if (__put_user(argc, sp++))
 309                return -EFAULT;
 310
 311        /* Populate list of argv pointers back to argv strings. */
 312        p = current->mm->arg_end = current->mm->arg_start;
 313        while (argc-- > 0) {
 314                size_t len;
 315                if (__put_user((elf_addr_t)p, sp++))
 316                        return -EFAULT;
 317                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 318                if (!len || len > MAX_ARG_STRLEN)
 319                        return -EINVAL;
 320                p += len;
 321        }
 322        if (__put_user(0, sp++))
 323                return -EFAULT;
 324        current->mm->arg_end = p;
 325
 326        /* Populate list of envp pointers back to envp strings. */
 327        current->mm->env_end = current->mm->env_start = p;
 328        while (envc-- > 0) {
 329                size_t len;
 330                if (__put_user((elf_addr_t)p, sp++))
 331                        return -EFAULT;
 332                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 333                if (!len || len > MAX_ARG_STRLEN)
 334                        return -EINVAL;
 335                p += len;
 336        }
 337        if (__put_user(0, sp++))
 338                return -EFAULT;
 339        current->mm->env_end = p;
 340
 341        /* Put the elf_info on the stack in the right place.  */
 342        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 343                return -EFAULT;
 344        return 0;
 345}
 346
 347#ifndef elf_map
 348
 349static unsigned long elf_map(struct file *filep, unsigned long addr,
 350                struct elf_phdr *eppnt, int prot, int type,
 351                unsigned long total_size)
 352{
 353        unsigned long map_addr;
 354        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 355        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 356        addr = ELF_PAGESTART(addr);
 357        size = ELF_PAGEALIGN(size);
 358
 359        /* mmap() will return -EINVAL if given a zero size, but a
 360         * segment with zero filesize is perfectly valid */
 361        if (!size)
 362                return addr;
 363
 364        /*
 365        * total_size is the size of the ELF (interpreter) image.
 366        * The _first_ mmap needs to know the full size, otherwise
 367        * randomization might put this image into an overlapping
 368        * position with the ELF binary image. (since size < total_size)
 369        * So we first map the 'big' image - and unmap the remainder at
 370        * the end. (which unmap is needed for ELF images with holes.)
 371        */
 372        if (total_size) {
 373                total_size = ELF_PAGEALIGN(total_size);
 374                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 375                if (!BAD_ADDR(map_addr))
 376                        vm_munmap(map_addr+size, total_size-size);
 377        } else
 378                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 379
 380        if ((type & MAP_FIXED_NOREPLACE) &&
 381            PTR_ERR((void *)map_addr) == -EEXIST)
 382                pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
 383                        task_pid_nr(current), current->comm, (void *)addr);
 384
 385        return(map_addr);
 386}
 387
 388#endif /* !elf_map */
 389
 390static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 391{
 392        int i, first_idx = -1, last_idx = -1;
 393
 394        for (i = 0; i < nr; i++) {
 395                if (cmds[i].p_type == PT_LOAD) {
 396                        last_idx = i;
 397                        if (first_idx == -1)
 398                                first_idx = i;
 399                }
 400        }
 401        if (first_idx == -1)
 402                return 0;
 403
 404        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 405                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 406}
 407
 408/**
 409 * load_elf_phdrs() - load ELF program headers
 410 * @elf_ex:   ELF header of the binary whose program headers should be loaded
 411 * @elf_file: the opened ELF binary file
 412 *
 413 * Loads ELF program headers from the binary file elf_file, which has the ELF
 414 * header pointed to by elf_ex, into a newly allocated array. The caller is
 415 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
 416 */
 417static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
 418                                       struct file *elf_file)
 419{
 420        struct elf_phdr *elf_phdata = NULL;
 421        int retval, size, err = -1;
 422        loff_t pos = elf_ex->e_phoff;
 423
 424        /*
 425         * If the size of this structure has changed, then punt, since
 426         * we will be doing the wrong thing.
 427         */
 428        if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
 429                goto out;
 430
 431        /* Sanity check the number of program headers... */
 432        if (elf_ex->e_phnum < 1 ||
 433                elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 434                goto out;
 435
 436        /* ...and their total size. */
 437        size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
 438        if (size > ELF_MIN_ALIGN)
 439                goto out;
 440
 441        elf_phdata = kmalloc(size, GFP_KERNEL);
 442        if (!elf_phdata)
 443                goto out;
 444
 445        /* Read in the program headers */
 446        retval = kernel_read(elf_file, elf_phdata, size, &pos);
 447        if (retval != size) {
 448                err = (retval < 0) ? retval : -EIO;
 449                goto out;
 450        }
 451
 452        /* Success! */
 453        err = 0;
 454out:
 455        if (err) {
 456                kfree(elf_phdata);
 457                elf_phdata = NULL;
 458        }
 459        return elf_phdata;
 460}
 461
 462#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
 463
 464/**
 465 * struct arch_elf_state - arch-specific ELF loading state
 466 *
 467 * This structure is used to preserve architecture specific data during
 468 * the loading of an ELF file, throughout the checking of architecture
 469 * specific ELF headers & through to the point where the ELF load is
 470 * known to be proceeding (ie. SET_PERSONALITY).
 471 *
 472 * This implementation is a dummy for architectures which require no
 473 * specific state.
 474 */
 475struct arch_elf_state {
 476};
 477
 478#define INIT_ARCH_ELF_STATE {}
 479
 480/**
 481 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
 482 * @ehdr:       The main ELF header
 483 * @phdr:       The program header to check
 484 * @elf:        The open ELF file
 485 * @is_interp:  True if the phdr is from the interpreter of the ELF being
 486 *              loaded, else false.
 487 * @state:      Architecture-specific state preserved throughout the process
 488 *              of loading the ELF.
 489 *
 490 * Inspects the program header phdr to validate its correctness and/or
 491 * suitability for the system. Called once per ELF program header in the
 492 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
 493 * interpreter.
 494 *
 495 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 496 *         with that return code.
 497 */
 498static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
 499                                   struct elf_phdr *phdr,
 500                                   struct file *elf, bool is_interp,
 501                                   struct arch_elf_state *state)
 502{
 503        /* Dummy implementation, always proceed */
 504        return 0;
 505}
 506
 507/**
 508 * arch_check_elf() - check an ELF executable
 509 * @ehdr:       The main ELF header
 510 * @has_interp: True if the ELF has an interpreter, else false.
 511 * @interp_ehdr: The interpreter's ELF header
 512 * @state:      Architecture-specific state preserved throughout the process
 513 *              of loading the ELF.
 514 *
 515 * Provides a final opportunity for architecture code to reject the loading
 516 * of the ELF & cause an exec syscall to return an error. This is called after
 517 * all program headers to be checked by arch_elf_pt_proc have been.
 518 *
 519 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 520 *         with that return code.
 521 */
 522static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
 523                                 struct elfhdr *interp_ehdr,
 524                                 struct arch_elf_state *state)
 525{
 526        /* Dummy implementation, always proceed */
 527        return 0;
 528}
 529
 530#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
 531
 532/* This is much more generalized than the library routine read function,
 533   so we keep this separate.  Technically the library read function
 534   is only provided so that we can read a.out libraries that have
 535   an ELF header */
 536
 537static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 538                struct file *interpreter, unsigned long *interp_map_addr,
 539                unsigned long no_base, struct elf_phdr *interp_elf_phdata)
 540{
 541        struct elf_phdr *eppnt;
 542        unsigned long load_addr = 0;
 543        int load_addr_set = 0;
 544        unsigned long last_bss = 0, elf_bss = 0;
 545        int bss_prot = 0;
 546        unsigned long error = ~0UL;
 547        unsigned long total_size;
 548        int i;
 549
 550        /* First of all, some simple consistency checks */
 551        if (interp_elf_ex->e_type != ET_EXEC &&
 552            interp_elf_ex->e_type != ET_DYN)
 553                goto out;
 554        if (!elf_check_arch(interp_elf_ex) ||
 555            elf_check_fdpic(interp_elf_ex))
 556                goto out;
 557        if (!interpreter->f_op->mmap)
 558                goto out;
 559
 560        total_size = total_mapping_size(interp_elf_phdata,
 561                                        interp_elf_ex->e_phnum);
 562        if (!total_size) {
 563                error = -EINVAL;
 564                goto out;
 565        }
 566
 567        eppnt = interp_elf_phdata;
 568        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 569                if (eppnt->p_type == PT_LOAD) {
 570                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 571                        int elf_prot = 0;
 572                        unsigned long vaddr = 0;
 573                        unsigned long k, map_addr;
 574
 575                        if (eppnt->p_flags & PF_R)
 576                                elf_prot = PROT_READ;
 577                        if (eppnt->p_flags & PF_W)
 578                                elf_prot |= PROT_WRITE;
 579                        if (eppnt->p_flags & PF_X)
 580                                elf_prot |= PROT_EXEC;
 581                        vaddr = eppnt->p_vaddr;
 582                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 583                                elf_type |= MAP_FIXED_NOREPLACE;
 584                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 585                                load_addr = -vaddr;
 586
 587                        map_addr = elf_map(interpreter, load_addr + vaddr,
 588                                        eppnt, elf_prot, elf_type, total_size);
 589                        total_size = 0;
 590                        if (!*interp_map_addr)
 591                                *interp_map_addr = map_addr;
 592                        error = map_addr;
 593                        if (BAD_ADDR(map_addr))
 594                                goto out;
 595
 596                        if (!load_addr_set &&
 597                            interp_elf_ex->e_type == ET_DYN) {
 598                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 599                                load_addr_set = 1;
 600                        }
 601
 602                        /*
 603                         * Check to see if the section's size will overflow the
 604                         * allowed task size. Note that p_filesz must always be
 605                         * <= p_memsize so it's only necessary to check p_memsz.
 606                         */
 607                        k = load_addr + eppnt->p_vaddr;
 608                        if (BAD_ADDR(k) ||
 609                            eppnt->p_filesz > eppnt->p_memsz ||
 610                            eppnt->p_memsz > TASK_SIZE ||
 611                            TASK_SIZE - eppnt->p_memsz < k) {
 612                                error = -ENOMEM;
 613                                goto out;
 614                        }
 615
 616                        /*
 617                         * Find the end of the file mapping for this phdr, and
 618                         * keep track of the largest address we see for this.
 619                         */
 620                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 621                        if (k > elf_bss)
 622                                elf_bss = k;
 623
 624                        /*
 625                         * Do the same thing for the memory mapping - between
 626                         * elf_bss and last_bss is the bss section.
 627                         */
 628                        k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
 629                        if (k > last_bss) {
 630                                last_bss = k;
 631                                bss_prot = elf_prot;
 632                        }
 633                }
 634        }
 635
 636        /*
 637         * Now fill out the bss section: first pad the last page from
 638         * the file up to the page boundary, and zero it from elf_bss
 639         * up to the end of the page.
 640         */
 641        if (padzero(elf_bss)) {
 642                error = -EFAULT;
 643                goto out;
 644        }
 645        /*
 646         * Next, align both the file and mem bss up to the page size,
 647         * since this is where elf_bss was just zeroed up to, and where
 648         * last_bss will end after the vm_brk_flags() below.
 649         */
 650        elf_bss = ELF_PAGEALIGN(elf_bss);
 651        last_bss = ELF_PAGEALIGN(last_bss);
 652        /* Finally, if there is still more bss to allocate, do it. */
 653        if (last_bss > elf_bss) {
 654                error = vm_brk_flags(elf_bss, last_bss - elf_bss,
 655                                bss_prot & PROT_EXEC ? VM_EXEC : 0);
 656                if (error)
 657                        goto out;
 658        }
 659
 660        error = load_addr;
 661out:
 662        return error;
 663}
 664
 665/*
 666 * These are the functions used to load ELF style executables and shared
 667 * libraries.  There is no binary dependent code anywhere else.
 668 */
 669
 670#ifndef STACK_RND_MASK
 671#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 672#endif
 673
 674static unsigned long randomize_stack_top(unsigned long stack_top)
 675{
 676        unsigned long random_variable = 0;
 677
 678        if (current->flags & PF_RANDOMIZE) {
 679                random_variable = get_random_long();
 680                random_variable &= STACK_RND_MASK;
 681                random_variable <<= PAGE_SHIFT;
 682        }
 683#ifdef CONFIG_STACK_GROWSUP
 684        return PAGE_ALIGN(stack_top) + random_variable;
 685#else
 686        return PAGE_ALIGN(stack_top) - random_variable;
 687#endif
 688}
 689
 690static int load_elf_binary(struct linux_binprm *bprm)
 691{
 692        struct file *interpreter = NULL; /* to shut gcc up */
 693        unsigned long load_addr = 0, load_bias = 0;
 694        int load_addr_set = 0;
 695        char * elf_interpreter = NULL;
 696        unsigned long error;
 697        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
 698        unsigned long elf_bss, elf_brk;
 699        int bss_prot = 0;
 700        int retval, i;
 701        unsigned long elf_entry;
 702        unsigned long interp_load_addr = 0;
 703        unsigned long start_code, end_code, start_data, end_data;
 704        unsigned long reloc_func_desc __maybe_unused = 0;
 705        int executable_stack = EXSTACK_DEFAULT;
 706        struct pt_regs *regs = current_pt_regs();
 707        struct {
 708                struct elfhdr elf_ex;
 709                struct elfhdr interp_elf_ex;
 710        } *loc;
 711        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
 712        loff_t pos;
 713
 714        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 715        if (!loc) {
 716                retval = -ENOMEM;
 717                goto out_ret;
 718        }
 719        
 720        /* Get the exec-header */
 721        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 722
 723        retval = -ENOEXEC;
 724        /* First of all, some simple consistency checks */
 725        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 726                goto out;
 727
 728        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 729                goto out;
 730        if (!elf_check_arch(&loc->elf_ex))
 731                goto out;
 732        if (elf_check_fdpic(&loc->elf_ex))
 733                goto out;
 734        if (!bprm->file->f_op->mmap)
 735                goto out;
 736
 737        elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
 738        if (!elf_phdata)
 739                goto out;
 740
 741        elf_ppnt = elf_phdata;
 742        elf_bss = 0;
 743        elf_brk = 0;
 744
 745        start_code = ~0UL;
 746        end_code = 0;
 747        start_data = 0;
 748        end_data = 0;
 749
 750        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 751                if (elf_ppnt->p_type == PT_INTERP) {
 752                        /* This is the program interpreter used for
 753                         * shared libraries - for now assume that this
 754                         * is an a.out format binary
 755                         */
 756                        retval = -ENOEXEC;
 757                        if (elf_ppnt->p_filesz > PATH_MAX || 
 758                            elf_ppnt->p_filesz < 2)
 759                                goto out_free_ph;
 760
 761                        retval = -ENOMEM;
 762                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 763                                                  GFP_KERNEL);
 764                        if (!elf_interpreter)
 765                                goto out_free_ph;
 766
 767                        pos = elf_ppnt->p_offset;
 768                        retval = kernel_read(bprm->file, elf_interpreter,
 769                                             elf_ppnt->p_filesz, &pos);
 770                        if (retval != elf_ppnt->p_filesz) {
 771                                if (retval >= 0)
 772                                        retval = -EIO;
 773                                goto out_free_interp;
 774                        }
 775                        /* make sure path is NULL terminated */
 776                        retval = -ENOEXEC;
 777                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 778                                goto out_free_interp;
 779
 780                        interpreter = open_exec(elf_interpreter);
 781                        retval = PTR_ERR(interpreter);
 782                        if (IS_ERR(interpreter))
 783                                goto out_free_interp;
 784
 785                        /*
 786                         * If the binary is not readable then enforce
 787                         * mm->dumpable = 0 regardless of the interpreter's
 788                         * permissions.
 789                         */
 790                        would_dump(bprm, interpreter);
 791
 792                        /* Get the exec headers */
 793                        pos = 0;
 794                        retval = kernel_read(interpreter, &loc->interp_elf_ex,
 795                                             sizeof(loc->interp_elf_ex), &pos);
 796                        if (retval != sizeof(loc->interp_elf_ex)) {
 797                                if (retval >= 0)
 798                                        retval = -EIO;
 799                                goto out_free_dentry;
 800                        }
 801
 802                        break;
 803                }
 804                elf_ppnt++;
 805        }
 806
 807        elf_ppnt = elf_phdata;
 808        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 809                switch (elf_ppnt->p_type) {
 810                case PT_GNU_STACK:
 811                        if (elf_ppnt->p_flags & PF_X)
 812                                executable_stack = EXSTACK_ENABLE_X;
 813                        else
 814                                executable_stack = EXSTACK_DISABLE_X;
 815                        break;
 816
 817                case PT_LOPROC ... PT_HIPROC:
 818                        retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
 819                                                  bprm->file, false,
 820                                                  &arch_state);
 821                        if (retval)
 822                                goto out_free_dentry;
 823                        break;
 824                }
 825
 826        /* Some simple consistency checks for the interpreter */
 827        if (elf_interpreter) {
 828                retval = -ELIBBAD;
 829                /* Not an ELF interpreter */
 830                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 831                        goto out_free_dentry;
 832                /* Verify the interpreter has a valid arch */
 833                if (!elf_check_arch(&loc->interp_elf_ex) ||
 834                    elf_check_fdpic(&loc->interp_elf_ex))
 835                        goto out_free_dentry;
 836
 837                /* Load the interpreter program headers */
 838                interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
 839                                                   interpreter);
 840                if (!interp_elf_phdata)
 841                        goto out_free_dentry;
 842
 843                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
 844                elf_ppnt = interp_elf_phdata;
 845                for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
 846                        switch (elf_ppnt->p_type) {
 847                        case PT_LOPROC ... PT_HIPROC:
 848                                retval = arch_elf_pt_proc(&loc->interp_elf_ex,
 849                                                          elf_ppnt, interpreter,
 850                                                          true, &arch_state);
 851                                if (retval)
 852                                        goto out_free_dentry;
 853                                break;
 854                        }
 855        }
 856
 857        /*
 858         * Allow arch code to reject the ELF at this point, whilst it's
 859         * still possible to return an error to the code that invoked
 860         * the exec syscall.
 861         */
 862        retval = arch_check_elf(&loc->elf_ex,
 863                                !!interpreter, &loc->interp_elf_ex,
 864                                &arch_state);
 865        if (retval)
 866                goto out_free_dentry;
 867
 868        /* Flush all traces of the currently running executable */
 869        retval = flush_old_exec(bprm);
 870        if (retval)
 871                goto out_free_dentry;
 872
 873        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 874           may depend on the personality.  */
 875        SET_PERSONALITY2(loc->elf_ex, &arch_state);
 876        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 877                current->personality |= READ_IMPLIES_EXEC;
 878
 879        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 880                current->flags |= PF_RANDOMIZE;
 881
 882        setup_new_exec(bprm);
 883        install_exec_creds(bprm);
 884
 885        /* Do this so that we can load the interpreter, if need be.  We will
 886           change some of these later */
 887        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 888                                 executable_stack);
 889        if (retval < 0)
 890                goto out_free_dentry;
 891        
 892        current->mm->start_stack = bprm->p;
 893
 894        /* Now we do a little grungy work by mmapping the ELF image into
 895           the correct location in memory. */
 896        for(i = 0, elf_ppnt = elf_phdata;
 897            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 898                int elf_prot = 0, elf_flags, elf_fixed = MAP_FIXED_NOREPLACE;
 899                unsigned long k, vaddr;
 900                unsigned long total_size = 0;
 901
 902                if (elf_ppnt->p_type != PT_LOAD)
 903                        continue;
 904
 905                if (unlikely (elf_brk > elf_bss)) {
 906                        unsigned long nbyte;
 907                    
 908                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 909                           before this one. Map anonymous pages, if needed,
 910                           and clear the area.  */
 911                        retval = set_brk(elf_bss + load_bias,
 912                                         elf_brk + load_bias,
 913                                         bss_prot);
 914                        if (retval)
 915                                goto out_free_dentry;
 916                        nbyte = ELF_PAGEOFFSET(elf_bss);
 917                        if (nbyte) {
 918                                nbyte = ELF_MIN_ALIGN - nbyte;
 919                                if (nbyte > elf_brk - elf_bss)
 920                                        nbyte = elf_brk - elf_bss;
 921                                if (clear_user((void __user *)elf_bss +
 922                                                        load_bias, nbyte)) {
 923                                        /*
 924                                         * This bss-zeroing can fail if the ELF
 925                                         * file specifies odd protections. So
 926                                         * we don't check the return value
 927                                         */
 928                                }
 929                        }
 930
 931                        /*
 932                         * Some binaries have overlapping elf segments and then
 933                         * we have to forcefully map over an existing mapping
 934                         * e.g. over this newly established brk mapping.
 935                         */
 936                        elf_fixed = MAP_FIXED;
 937                }
 938
 939                if (elf_ppnt->p_flags & PF_R)
 940                        elf_prot |= PROT_READ;
 941                if (elf_ppnt->p_flags & PF_W)
 942                        elf_prot |= PROT_WRITE;
 943                if (elf_ppnt->p_flags & PF_X)
 944                        elf_prot |= PROT_EXEC;
 945
 946                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 947
 948                vaddr = elf_ppnt->p_vaddr;
 949                /*
 950                 * If we are loading ET_EXEC or we have already performed
 951                 * the ET_DYN load_addr calculations, proceed normally.
 952                 */
 953                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 954                        elf_flags |= elf_fixed;
 955                } else if (loc->elf_ex.e_type == ET_DYN) {
 956                        /*
 957                         * This logic is run once for the first LOAD Program
 958                         * Header for ET_DYN binaries to calculate the
 959                         * randomization (load_bias) for all the LOAD
 960                         * Program Headers, and to calculate the entire
 961                         * size of the ELF mapping (total_size). (Note that
 962                         * load_addr_set is set to true later once the
 963                         * initial mapping is performed.)
 964                         *
 965                         * There are effectively two types of ET_DYN
 966                         * binaries: programs (i.e. PIE: ET_DYN with INTERP)
 967                         * and loaders (ET_DYN without INTERP, since they
 968                         * _are_ the ELF interpreter). The loaders must
 969                         * be loaded away from programs since the program
 970                         * may otherwise collide with the loader (especially
 971                         * for ET_EXEC which does not have a randomized
 972                         * position). For example to handle invocations of
 973                         * "./ld.so someprog" to test out a new version of
 974                         * the loader, the subsequent program that the
 975                         * loader loads must avoid the loader itself, so
 976                         * they cannot share the same load range. Sufficient
 977                         * room for the brk must be allocated with the
 978                         * loader as well, since brk must be available with
 979                         * the loader.
 980                         *
 981                         * Therefore, programs are loaded offset from
 982                         * ELF_ET_DYN_BASE and loaders are loaded into the
 983                         * independently randomized mmap region (0 load_bias
 984                         * without MAP_FIXED).
 985                         */
 986                        if (elf_interpreter) {
 987                                load_bias = ELF_ET_DYN_BASE;
 988                                if (current->flags & PF_RANDOMIZE)
 989                                        load_bias += arch_mmap_rnd();
 990                                elf_flags |= elf_fixed;
 991                        } else
 992                                load_bias = 0;
 993
 994                        /*
 995                         * Since load_bias is used for all subsequent loading
 996                         * calculations, we must lower it by the first vaddr
 997                         * so that the remaining calculations based on the
 998                         * ELF vaddrs will be correctly offset. The result
 999                         * is then page aligned.
1000                         */
1001                        load_bias = ELF_PAGESTART(load_bias - vaddr);
1002
1003                        total_size = total_mapping_size(elf_phdata,
1004                                                        loc->elf_ex.e_phnum);
1005                        if (!total_size) {
1006                                retval = -EINVAL;
1007                                goto out_free_dentry;
1008                        }
1009                }
1010
1011                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
1012                                elf_prot, elf_flags, total_size);
1013                if (BAD_ADDR(error)) {
1014                        retval = IS_ERR((void *)error) ?
1015                                PTR_ERR((void*)error) : -EINVAL;
1016                        goto out_free_dentry;
1017                }
1018
1019                if (!load_addr_set) {
1020                        load_addr_set = 1;
1021                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
1022                        if (loc->elf_ex.e_type == ET_DYN) {
1023                                load_bias += error -
1024                                             ELF_PAGESTART(load_bias + vaddr);
1025                                load_addr += load_bias;
1026                                reloc_func_desc = load_bias;
1027                        }
1028                }
1029                k = elf_ppnt->p_vaddr;
1030                if (k < start_code)
1031                        start_code = k;
1032                if (start_data < k)
1033                        start_data = k;
1034
1035                /*
1036                 * Check to see if the section's size will overflow the
1037                 * allowed task size. Note that p_filesz must always be
1038                 * <= p_memsz so it is only necessary to check p_memsz.
1039                 */
1040                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1041                    elf_ppnt->p_memsz > TASK_SIZE ||
1042                    TASK_SIZE - elf_ppnt->p_memsz < k) {
1043                        /* set_brk can never work. Avoid overflows. */
1044                        retval = -EINVAL;
1045                        goto out_free_dentry;
1046                }
1047
1048                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1049
1050                if (k > elf_bss)
1051                        elf_bss = k;
1052                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1053                        end_code = k;
1054                if (end_data < k)
1055                        end_data = k;
1056                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1057                if (k > elf_brk) {
1058                        bss_prot = elf_prot;
1059                        elf_brk = k;
1060                }
1061        }
1062
1063        loc->elf_ex.e_entry += load_bias;
1064        elf_bss += load_bias;
1065        elf_brk += load_bias;
1066        start_code += load_bias;
1067        end_code += load_bias;
1068        start_data += load_bias;
1069        end_data += load_bias;
1070
1071        /* Calling set_brk effectively mmaps the pages that we need
1072         * for the bss and break sections.  We must do this before
1073         * mapping in the interpreter, to make sure it doesn't wind
1074         * up getting placed where the bss needs to go.
1075         */
1076        retval = set_brk(elf_bss, elf_brk, bss_prot);
1077        if (retval)
1078                goto out_free_dentry;
1079        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1080                retval = -EFAULT; /* Nobody gets to see this, but.. */
1081                goto out_free_dentry;
1082        }
1083
1084        if (elf_interpreter) {
1085                unsigned long interp_map_addr = 0;
1086
1087                elf_entry = load_elf_interp(&loc->interp_elf_ex,
1088                                            interpreter,
1089                                            &interp_map_addr,
1090                                            load_bias, interp_elf_phdata);
1091                if (!IS_ERR((void *)elf_entry)) {
1092                        /*
1093                         * load_elf_interp() returns relocation
1094                         * adjustment
1095                         */
1096                        interp_load_addr = elf_entry;
1097                        elf_entry += loc->interp_elf_ex.e_entry;
1098                }
1099                if (BAD_ADDR(elf_entry)) {
1100                        retval = IS_ERR((void *)elf_entry) ?
1101                                        (int)elf_entry : -EINVAL;
1102                        goto out_free_dentry;
1103                }
1104                reloc_func_desc = interp_load_addr;
1105
1106                allow_write_access(interpreter);
1107                fput(interpreter);
1108                kfree(elf_interpreter);
1109        } else {
1110                elf_entry = loc->elf_ex.e_entry;
1111                if (BAD_ADDR(elf_entry)) {
1112                        retval = -EINVAL;
1113                        goto out_free_dentry;
1114                }
1115        }
1116
1117        kfree(interp_elf_phdata);
1118        kfree(elf_phdata);
1119
1120        set_binfmt(&elf_format);
1121
1122#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1123        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1124        if (retval < 0)
1125                goto out;
1126#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1127
1128        retval = create_elf_tables(bprm, &loc->elf_ex,
1129                          load_addr, interp_load_addr);
1130        if (retval < 0)
1131                goto out;
1132        /* N.B. passed_fileno might not be initialized? */
1133        current->mm->end_code = end_code;
1134        current->mm->start_code = start_code;
1135        current->mm->start_data = start_data;
1136        current->mm->end_data = end_data;
1137        current->mm->start_stack = bprm->p;
1138
1139        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1140                current->mm->brk = current->mm->start_brk =
1141                        arch_randomize_brk(current->mm);
1142#ifdef compat_brk_randomized
1143                current->brk_randomized = 1;
1144#endif
1145        }
1146
1147        if (current->personality & MMAP_PAGE_ZERO) {
1148                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1149                   and some applications "depend" upon this behavior.
1150                   Since we do not have the power to recompile these, we
1151                   emulate the SVr4 behavior. Sigh. */
1152                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1153                                MAP_FIXED | MAP_PRIVATE, 0);
1154        }
1155
1156#ifdef ELF_PLAT_INIT
1157        /*
1158         * The ABI may specify that certain registers be set up in special
1159         * ways (on i386 %edx is the address of a DT_FINI function, for
1160         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1161         * that the e_entry field is the address of the function descriptor
1162         * for the startup routine, rather than the address of the startup
1163         * routine itself.  This macro performs whatever initialization to
1164         * the regs structure is required as well as any relocations to the
1165         * function descriptor entries when executing dynamically links apps.
1166         */
1167        ELF_PLAT_INIT(regs, reloc_func_desc);
1168#endif
1169
1170        finalize_exec(bprm);
1171        start_thread(regs, elf_entry, bprm->p);
1172        retval = 0;
1173out:
1174        kfree(loc);
1175out_ret:
1176        return retval;
1177
1178        /* error cleanup */
1179out_free_dentry:
1180        kfree(interp_elf_phdata);
1181        allow_write_access(interpreter);
1182        if (interpreter)
1183                fput(interpreter);
1184out_free_interp:
1185        kfree(elf_interpreter);
1186out_free_ph:
1187        kfree(elf_phdata);
1188        goto out;
1189}
1190
1191#ifdef CONFIG_USELIB
1192/* This is really simpleminded and specialized - we are loading an
1193   a.out library that is given an ELF header. */
1194static int load_elf_library(struct file *file)
1195{
1196        struct elf_phdr *elf_phdata;
1197        struct elf_phdr *eppnt;
1198        unsigned long elf_bss, bss, len;
1199        int retval, error, i, j;
1200        struct elfhdr elf_ex;
1201        loff_t pos = 0;
1202
1203        error = -ENOEXEC;
1204        retval = kernel_read(file, &elf_ex, sizeof(elf_ex), &pos);
1205        if (retval != sizeof(elf_ex))
1206                goto out;
1207
1208        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1209                goto out;
1210
1211        /* First of all, some simple consistency checks */
1212        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1213            !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1214                goto out;
1215        if (elf_check_fdpic(&elf_ex))
1216                goto out;
1217
1218        /* Now read in all of the header information */
1219
1220        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1221        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1222
1223        error = -ENOMEM;
1224        elf_phdata = kmalloc(j, GFP_KERNEL);
1225        if (!elf_phdata)
1226                goto out;
1227
1228        eppnt = elf_phdata;
1229        error = -ENOEXEC;
1230        pos =  elf_ex.e_phoff;
1231        retval = kernel_read(file, eppnt, j, &pos);
1232        if (retval != j)
1233                goto out_free_ph;
1234
1235        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1236                if ((eppnt + i)->p_type == PT_LOAD)
1237                        j++;
1238        if (j != 1)
1239                goto out_free_ph;
1240
1241        while (eppnt->p_type != PT_LOAD)
1242                eppnt++;
1243
1244        /* Now use mmap to map the library into memory. */
1245        error = vm_mmap(file,
1246                        ELF_PAGESTART(eppnt->p_vaddr),
1247                        (eppnt->p_filesz +
1248                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1249                        PROT_READ | PROT_WRITE | PROT_EXEC,
1250                        MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_DENYWRITE,
1251                        (eppnt->p_offset -
1252                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1253        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1254                goto out_free_ph;
1255
1256        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1257        if (padzero(elf_bss)) {
1258                error = -EFAULT;
1259                goto out_free_ph;
1260        }
1261
1262        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1263                            ELF_MIN_ALIGN - 1);
1264        bss = eppnt->p_memsz + eppnt->p_vaddr;
1265        if (bss > len) {
1266                error = vm_brk(len, bss - len);
1267                if (error)
1268                        goto out_free_ph;
1269        }
1270        error = 0;
1271
1272out_free_ph:
1273        kfree(elf_phdata);
1274out:
1275        return error;
1276}
1277#endif /* #ifdef CONFIG_USELIB */
1278
1279#ifdef CONFIG_ELF_CORE
1280/*
1281 * ELF core dumper
1282 *
1283 * Modelled on fs/exec.c:aout_core_dump()
1284 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1285 */
1286
1287/*
1288 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1289 * that are useful for post-mortem analysis are included in every core dump.
1290 * In that way we ensure that the core dump is fully interpretable later
1291 * without matching up the same kernel and hardware config to see what PC values
1292 * meant. These special mappings include - vDSO, vsyscall, and other
1293 * architecture specific mappings
1294 */
1295static bool always_dump_vma(struct vm_area_struct *vma)
1296{
1297        /* Any vsyscall mappings? */
1298        if (vma == get_gate_vma(vma->vm_mm))
1299                return true;
1300
1301        /*
1302         * Assume that all vmas with a .name op should always be dumped.
1303         * If this changes, a new vm_ops field can easily be added.
1304         */
1305        if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1306                return true;
1307
1308        /*
1309         * arch_vma_name() returns non-NULL for special architecture mappings,
1310         * such as vDSO sections.
1311         */
1312        if (arch_vma_name(vma))
1313                return true;
1314
1315        return false;
1316}
1317
1318/*
1319 * Decide what to dump of a segment, part, all or none.
1320 */
1321static unsigned long vma_dump_size(struct vm_area_struct *vma,
1322                                   unsigned long mm_flags)
1323{
1324#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1325
1326        /* always dump the vdso and vsyscall sections */
1327        if (always_dump_vma(vma))
1328                goto whole;
1329
1330        if (vma->vm_flags & VM_DONTDUMP)
1331                return 0;
1332
1333        /* support for DAX */
1334        if (vma_is_dax(vma)) {
1335                if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1336                        goto whole;
1337                if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1338                        goto whole;
1339                return 0;
1340        }
1341
1342        /* Hugetlb memory check */
1343        if (vma->vm_flags & VM_HUGETLB) {
1344                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1345                        goto whole;
1346                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1347                        goto whole;
1348                return 0;
1349        }
1350
1351        /* Do not dump I/O mapped devices or special mappings */
1352        if (vma->vm_flags & VM_IO)
1353                return 0;
1354
1355        /* By default, dump shared memory if mapped from an anonymous file. */
1356        if (vma->vm_flags & VM_SHARED) {
1357                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1358                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1359                        goto whole;
1360                return 0;
1361        }
1362
1363        /* Dump segments that have been written to.  */
1364        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1365                goto whole;
1366        if (vma->vm_file == NULL)
1367                return 0;
1368
1369        if (FILTER(MAPPED_PRIVATE))
1370                goto whole;
1371
1372        /*
1373         * If this looks like the beginning of a DSO or executable mapping,
1374         * check for an ELF header.  If we find one, dump the first page to
1375         * aid in determining what was mapped here.
1376         */
1377        if (FILTER(ELF_HEADERS) &&
1378            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1379                u32 __user *header = (u32 __user *) vma->vm_start;
1380                u32 word;
1381                mm_segment_t fs = get_fs();
1382                /*
1383                 * Doing it this way gets the constant folded by GCC.
1384                 */
1385                union {
1386                        u32 cmp;
1387                        char elfmag[SELFMAG];
1388                } magic;
1389                BUILD_BUG_ON(SELFMAG != sizeof word);
1390                magic.elfmag[EI_MAG0] = ELFMAG0;
1391                magic.elfmag[EI_MAG1] = ELFMAG1;
1392                magic.elfmag[EI_MAG2] = ELFMAG2;
1393                magic.elfmag[EI_MAG3] = ELFMAG3;
1394                /*
1395                 * Switch to the user "segment" for get_user(),
1396                 * then put back what elf_core_dump() had in place.
1397                 */
1398                set_fs(USER_DS);
1399                if (unlikely(get_user(word, header)))
1400                        word = 0;
1401                set_fs(fs);
1402                if (word == magic.cmp)
1403                        return PAGE_SIZE;
1404        }
1405
1406#undef  FILTER
1407
1408        return 0;
1409
1410whole:
1411        return vma->vm_end - vma->vm_start;
1412}
1413
1414/* An ELF note in memory */
1415struct memelfnote
1416{
1417        const char *name;
1418        int type;
1419        unsigned int datasz;
1420        void *data;
1421};
1422
1423static int notesize(struct memelfnote *en)
1424{
1425        int sz;
1426
1427        sz = sizeof(struct elf_note);
1428        sz += roundup(strlen(en->name) + 1, 4);
1429        sz += roundup(en->datasz, 4);
1430
1431        return sz;
1432}
1433
1434static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1435{
1436        struct elf_note en;
1437        en.n_namesz = strlen(men->name) + 1;
1438        en.n_descsz = men->datasz;
1439        en.n_type = men->type;
1440
1441        return dump_emit(cprm, &en, sizeof(en)) &&
1442            dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1443            dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1444}
1445
1446static void fill_elf_header(struct elfhdr *elf, int segs,
1447                            u16 machine, u32 flags)
1448{
1449        memset(elf, 0, sizeof(*elf));
1450
1451        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1452        elf->e_ident[EI_CLASS] = ELF_CLASS;
1453        elf->e_ident[EI_DATA] = ELF_DATA;
1454        elf->e_ident[EI_VERSION] = EV_CURRENT;
1455        elf->e_ident[EI_OSABI] = ELF_OSABI;
1456
1457        elf->e_type = ET_CORE;
1458        elf->e_machine = machine;
1459        elf->e_version = EV_CURRENT;
1460        elf->e_phoff = sizeof(struct elfhdr);
1461        elf->e_flags = flags;
1462        elf->e_ehsize = sizeof(struct elfhdr);
1463        elf->e_phentsize = sizeof(struct elf_phdr);
1464        elf->e_phnum = segs;
1465
1466        return;
1467}
1468
1469static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1470{
1471        phdr->p_type = PT_NOTE;
1472        phdr->p_offset = offset;
1473        phdr->p_vaddr = 0;
1474        phdr->p_paddr = 0;
1475        phdr->p_filesz = sz;
1476        phdr->p_memsz = 0;
1477        phdr->p_flags = 0;
1478        phdr->p_align = 0;
1479        return;
1480}
1481
1482static void fill_note(struct memelfnote *note, const char *name, int type, 
1483                unsigned int sz, void *data)
1484{
1485        note->name = name;
1486        note->type = type;
1487        note->datasz = sz;
1488        note->data = data;
1489        return;
1490}
1491
1492/*
1493 * fill up all the fields in prstatus from the given task struct, except
1494 * registers which need to be filled up separately.
1495 */
1496static void fill_prstatus(struct elf_prstatus *prstatus,
1497                struct task_struct *p, long signr)
1498{
1499        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1500        prstatus->pr_sigpend = p->pending.signal.sig[0];
1501        prstatus->pr_sighold = p->blocked.sig[0];
1502        rcu_read_lock();
1503        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1504        rcu_read_unlock();
1505        prstatus->pr_pid = task_pid_vnr(p);
1506        prstatus->pr_pgrp = task_pgrp_vnr(p);
1507        prstatus->pr_sid = task_session_vnr(p);
1508        if (thread_group_leader(p)) {
1509                struct task_cputime cputime;
1510
1511                /*
1512                 * This is the record for the group leader.  It shows the
1513                 * group-wide total, not its individual thread total.
1514                 */
1515                thread_group_cputime(p, &cputime);
1516                prstatus->pr_utime = ns_to_timeval(cputime.utime);
1517                prstatus->pr_stime = ns_to_timeval(cputime.stime);
1518        } else {
1519                u64 utime, stime;
1520
1521                task_cputime(p, &utime, &stime);
1522                prstatus->pr_utime = ns_to_timeval(utime);
1523                prstatus->pr_stime = ns_to_timeval(stime);
1524        }
1525
1526        prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1527        prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1528}
1529
1530static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1531                       struct mm_struct *mm)
1532{
1533        const struct cred *cred;
1534        unsigned int i, len;
1535        
1536        /* first copy the parameters from user space */
1537        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1538
1539        len = mm->arg_end - mm->arg_start;
1540        if (len >= ELF_PRARGSZ)
1541                len = ELF_PRARGSZ-1;
1542        if (copy_from_user(&psinfo->pr_psargs,
1543                           (const char __user *)mm->arg_start, len))
1544                return -EFAULT;
1545        for(i = 0; i < len; i++)
1546                if (psinfo->pr_psargs[i] == 0)
1547                        psinfo->pr_psargs[i] = ' ';
1548        psinfo->pr_psargs[len] = 0;
1549
1550        rcu_read_lock();
1551        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1552        rcu_read_unlock();
1553        psinfo->pr_pid = task_pid_vnr(p);
1554        psinfo->pr_pgrp = task_pgrp_vnr(p);
1555        psinfo->pr_sid = task_session_vnr(p);
1556
1557        i = p->state ? ffz(~p->state) + 1 : 0;
1558        psinfo->pr_state = i;
1559        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1560        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1561        psinfo->pr_nice = task_nice(p);
1562        psinfo->pr_flag = p->flags;
1563        rcu_read_lock();
1564        cred = __task_cred(p);
1565        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1566        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1567        rcu_read_unlock();
1568        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1569        
1570        return 0;
1571}
1572
1573static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1574{
1575        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1576        int i = 0;
1577        do
1578                i += 2;
1579        while (auxv[i - 2] != AT_NULL);
1580        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1581}
1582
1583static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1584                const siginfo_t *siginfo)
1585{
1586        mm_segment_t old_fs = get_fs();
1587        set_fs(KERNEL_DS);
1588        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1589        set_fs(old_fs);
1590        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1591}
1592
1593#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1594/*
1595 * Format of NT_FILE note:
1596 *
1597 * long count     -- how many files are mapped
1598 * long page_size -- units for file_ofs
1599 * array of [COUNT] elements of
1600 *   long start
1601 *   long end
1602 *   long file_ofs
1603 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1604 */
1605static int fill_files_note(struct memelfnote *note)
1606{
1607        struct vm_area_struct *vma;
1608        unsigned count, size, names_ofs, remaining, n;
1609        user_long_t *data;
1610        user_long_t *start_end_ofs;
1611        char *name_base, *name_curpos;
1612
1613        /* *Estimated* file count and total data size needed */
1614        count = current->mm->map_count;
1615        if (count > UINT_MAX / 64)
1616                return -EINVAL;
1617        size = count * 64;
1618
1619        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1620 alloc:
1621        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1622                return -EINVAL;
1623        size = round_up(size, PAGE_SIZE);
1624        data = vmalloc(size);
1625        if (!data)
1626                return -ENOMEM;
1627
1628        start_end_ofs = data + 2;
1629        name_base = name_curpos = ((char *)data) + names_ofs;
1630        remaining = size - names_ofs;
1631        count = 0;
1632        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1633                struct file *file;
1634                const char *filename;
1635
1636                file = vma->vm_file;
1637                if (!file)
1638                        continue;
1639                filename = file_path(file, name_curpos, remaining);
1640                if (IS_ERR(filename)) {
1641                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1642                                vfree(data);
1643                                size = size * 5 / 4;
1644                                goto alloc;
1645                        }
1646                        continue;
1647                }
1648
1649                /* file_path() fills at the end, move name down */
1650                /* n = strlen(filename) + 1: */
1651                n = (name_curpos + remaining) - filename;
1652                remaining = filename - name_curpos;
1653                memmove(name_curpos, filename, n);
1654                name_curpos += n;
1655
1656                *start_end_ofs++ = vma->vm_start;
1657                *start_end_ofs++ = vma->vm_end;
1658                *start_end_ofs++ = vma->vm_pgoff;
1659                count++;
1660        }
1661
1662        /* Now we know exact count of files, can store it */
1663        data[0] = count;
1664        data[1] = PAGE_SIZE;
1665        /*
1666         * Count usually is less than current->mm->map_count,
1667         * we need to move filenames down.
1668         */
1669        n = current->mm->map_count - count;
1670        if (n != 0) {
1671                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1672                memmove(name_base - shift_bytes, name_base,
1673                        name_curpos - name_base);
1674                name_curpos -= shift_bytes;
1675        }
1676
1677        size = name_curpos - (char *)data;
1678        fill_note(note, "CORE", NT_FILE, size, data);
1679        return 0;
1680}
1681
1682#ifdef CORE_DUMP_USE_REGSET
1683#include <linux/regset.h>
1684
1685struct elf_thread_core_info {
1686        struct elf_thread_core_info *next;
1687        struct task_struct *task;
1688        struct elf_prstatus prstatus;
1689        struct memelfnote notes[0];
1690};
1691
1692struct elf_note_info {
1693        struct elf_thread_core_info *thread;
1694        struct memelfnote psinfo;
1695        struct memelfnote signote;
1696        struct memelfnote auxv;
1697        struct memelfnote files;
1698        user_siginfo_t csigdata;
1699        size_t size;
1700        int thread_notes;
1701};
1702
1703/*
1704 * When a regset has a writeback hook, we call it on each thread before
1705 * dumping user memory.  On register window machines, this makes sure the
1706 * user memory backing the register data is up to date before we read it.
1707 */
1708static void do_thread_regset_writeback(struct task_struct *task,
1709                                       const struct user_regset *regset)
1710{
1711        if (regset->writeback)
1712                regset->writeback(task, regset, 1);
1713}
1714
1715#ifndef PRSTATUS_SIZE
1716#define PRSTATUS_SIZE(S, R) sizeof(S)
1717#endif
1718
1719#ifndef SET_PR_FPVALID
1720#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1721#endif
1722
1723static int fill_thread_core_info(struct elf_thread_core_info *t,
1724                                 const struct user_regset_view *view,
1725                                 long signr, size_t *total)
1726{
1727        unsigned int i;
1728        unsigned int regset0_size = regset_size(t->task, &view->regsets[0]);
1729
1730        /*
1731         * NT_PRSTATUS is the one special case, because the regset data
1732         * goes into the pr_reg field inside the note contents, rather
1733         * than being the whole note contents.  We fill the reset in here.
1734         * We assume that regset 0 is NT_PRSTATUS.
1735         */
1736        fill_prstatus(&t->prstatus, t->task, signr);
1737        (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset0_size,
1738                                    &t->prstatus.pr_reg, NULL);
1739
1740        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1741                  PRSTATUS_SIZE(t->prstatus, regset0_size), &t->prstatus);
1742        *total += notesize(&t->notes[0]);
1743
1744        do_thread_regset_writeback(t->task, &view->regsets[0]);
1745
1746        /*
1747         * Each other regset might generate a note too.  For each regset
1748         * that has no core_note_type or is inactive, we leave t->notes[i]
1749         * all zero and we'll know to skip writing it later.
1750         */
1751        for (i = 1; i < view->n; ++i) {
1752                const struct user_regset *regset = &view->regsets[i];
1753                do_thread_regset_writeback(t->task, regset);
1754                if (regset->core_note_type && regset->get &&
1755                    (!regset->active || regset->active(t->task, regset))) {
1756                        int ret;
1757                        size_t size = regset_size(t->task, regset);
1758                        void *data = kmalloc(size, GFP_KERNEL);
1759                        if (unlikely(!data))
1760                                return 0;
1761                        ret = regset->get(t->task, regset,
1762                                          0, size, data, NULL);
1763                        if (unlikely(ret))
1764                                kfree(data);
1765                        else {
1766                                if (regset->core_note_type != NT_PRFPREG)
1767                                        fill_note(&t->notes[i], "LINUX",
1768                                                  regset->core_note_type,
1769                                                  size, data);
1770                                else {
1771                                        SET_PR_FPVALID(&t->prstatus,
1772                                                        1, regset0_size);
1773                                        fill_note(&t->notes[i], "CORE",
1774                                                  NT_PRFPREG, size, data);
1775                                }
1776                                *total += notesize(&t->notes[i]);
1777                        }
1778                }
1779        }
1780
1781        return 1;
1782}
1783
1784static int fill_note_info(struct elfhdr *elf, int phdrs,
1785                          struct elf_note_info *info,
1786                          const siginfo_t *siginfo, struct pt_regs *regs)
1787{
1788        struct task_struct *dump_task = current;
1789        const struct user_regset_view *view = task_user_regset_view(dump_task);
1790        struct elf_thread_core_info *t;
1791        struct elf_prpsinfo *psinfo;
1792        struct core_thread *ct;
1793        unsigned int i;
1794
1795        info->size = 0;
1796        info->thread = NULL;
1797
1798        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1799        if (psinfo == NULL) {
1800                info->psinfo.data = NULL; /* So we don't free this wrongly */
1801                return 0;
1802        }
1803
1804        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1805
1806        /*
1807         * Figure out how many notes we're going to need for each thread.
1808         */
1809        info->thread_notes = 0;
1810        for (i = 0; i < view->n; ++i)
1811                if (view->regsets[i].core_note_type != 0)
1812                        ++info->thread_notes;
1813
1814        /*
1815         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1816         * since it is our one special case.
1817         */
1818        if (unlikely(info->thread_notes == 0) ||
1819            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1820                WARN_ON(1);
1821                return 0;
1822        }
1823
1824        /*
1825         * Initialize the ELF file header.
1826         */
1827        fill_elf_header(elf, phdrs,
1828                        view->e_machine, view->e_flags);
1829
1830        /*
1831         * Allocate a structure for each thread.
1832         */
1833        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1834                t = kzalloc(offsetof(struct elf_thread_core_info,
1835                                     notes[info->thread_notes]),
1836                            GFP_KERNEL);
1837                if (unlikely(!t))
1838                        return 0;
1839
1840                t->task = ct->task;
1841                if (ct->task == dump_task || !info->thread) {
1842                        t->next = info->thread;
1843                        info->thread = t;
1844                } else {
1845                        /*
1846                         * Make sure to keep the original task at
1847                         * the head of the list.
1848                         */
1849                        t->next = info->thread->next;
1850                        info->thread->next = t;
1851                }
1852        }
1853
1854        /*
1855         * Now fill in each thread's information.
1856         */
1857        for (t = info->thread; t != NULL; t = t->next)
1858                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1859                        return 0;
1860
1861        /*
1862         * Fill in the two process-wide notes.
1863         */
1864        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1865        info->size += notesize(&info->psinfo);
1866
1867        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1868        info->size += notesize(&info->signote);
1869
1870        fill_auxv_note(&info->auxv, current->mm);
1871        info->size += notesize(&info->auxv);
1872
1873        if (fill_files_note(&info->files) == 0)
1874                info->size += notesize(&info->files);
1875
1876        return 1;
1877}
1878
1879static size_t get_note_info_size(struct elf_note_info *info)
1880{
1881        return info->size;
1882}
1883
1884/*
1885 * Write all the notes for each thread.  When writing the first thread, the
1886 * process-wide notes are interleaved after the first thread-specific note.
1887 */
1888static int write_note_info(struct elf_note_info *info,
1889                           struct coredump_params *cprm)
1890{
1891        bool first = true;
1892        struct elf_thread_core_info *t = info->thread;
1893
1894        do {
1895                int i;
1896
1897                if (!writenote(&t->notes[0], cprm))
1898                        return 0;
1899
1900                if (first && !writenote(&info->psinfo, cprm))
1901                        return 0;
1902                if (first && !writenote(&info->signote, cprm))
1903                        return 0;
1904                if (first && !writenote(&info->auxv, cprm))
1905                        return 0;
1906                if (first && info->files.data &&
1907                                !writenote(&info->files, cprm))
1908                        return 0;
1909
1910                for (i = 1; i < info->thread_notes; ++i)
1911                        if (t->notes[i].data &&
1912                            !writenote(&t->notes[i], cprm))
1913                                return 0;
1914
1915                first = false;
1916                t = t->next;
1917        } while (t);
1918
1919        return 1;
1920}
1921
1922static void free_note_info(struct elf_note_info *info)
1923{
1924        struct elf_thread_core_info *threads = info->thread;
1925        while (threads) {
1926                unsigned int i;
1927                struct elf_thread_core_info *t = threads;
1928                threads = t->next;
1929                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1930                for (i = 1; i < info->thread_notes; ++i)
1931                        kfree(t->notes[i].data);
1932                kfree(t);
1933        }
1934        kfree(info->psinfo.data);
1935        vfree(info->files.data);
1936}
1937
1938#else
1939
1940/* Here is the structure in which status of each thread is captured. */
1941struct elf_thread_status
1942{
1943        struct list_head list;
1944        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1945        elf_fpregset_t fpu;             /* NT_PRFPREG */
1946        struct task_struct *thread;
1947#ifdef ELF_CORE_COPY_XFPREGS
1948        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1949#endif
1950        struct memelfnote notes[3];
1951        int num_notes;
1952};
1953
1954/*
1955 * In order to add the specific thread information for the elf file format,
1956 * we need to keep a linked list of every threads pr_status and then create
1957 * a single section for them in the final core file.
1958 */
1959static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1960{
1961        int sz = 0;
1962        struct task_struct *p = t->thread;
1963        t->num_notes = 0;
1964
1965        fill_prstatus(&t->prstatus, p, signr);
1966        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1967        
1968        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1969                  &(t->prstatus));
1970        t->num_notes++;
1971        sz += notesize(&t->notes[0]);
1972
1973        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1974                                                                &t->fpu))) {
1975                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1976                          &(t->fpu));
1977                t->num_notes++;
1978                sz += notesize(&t->notes[1]);
1979        }
1980
1981#ifdef ELF_CORE_COPY_XFPREGS
1982        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1983                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1984                          sizeof(t->xfpu), &t->xfpu);
1985                t->num_notes++;
1986                sz += notesize(&t->notes[2]);
1987        }
1988#endif  
1989        return sz;
1990}
1991
1992struct elf_note_info {
1993        struct memelfnote *notes;
1994        struct memelfnote *notes_files;
1995        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1996        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1997        struct list_head thread_list;
1998        elf_fpregset_t *fpu;
1999#ifdef ELF_CORE_COPY_XFPREGS
2000        elf_fpxregset_t *xfpu;
2001#endif
2002        user_siginfo_t csigdata;
2003        int thread_status_size;
2004        int numnote;
2005};
2006
2007static int elf_note_info_init(struct elf_note_info *info)
2008{
2009        memset(info, 0, sizeof(*info));
2010        INIT_LIST_HEAD(&info->thread_list);
2011
2012        /* Allocate space for ELF notes */
2013        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
2014        if (!info->notes)
2015                return 0;
2016        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
2017        if (!info->psinfo)
2018                return 0;
2019        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
2020        if (!info->prstatus)
2021                return 0;
2022        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
2023        if (!info->fpu)
2024                return 0;
2025#ifdef ELF_CORE_COPY_XFPREGS
2026        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
2027        if (!info->xfpu)
2028                return 0;
2029#endif
2030        return 1;
2031}
2032
2033static int fill_note_info(struct elfhdr *elf, int phdrs,
2034                          struct elf_note_info *info,
2035                          const siginfo_t *siginfo, struct pt_regs *regs)
2036{
2037        struct list_head *t;
2038        struct core_thread *ct;
2039        struct elf_thread_status *ets;
2040
2041        if (!elf_note_info_init(info))
2042                return 0;
2043
2044        for (ct = current->mm->core_state->dumper.next;
2045                                        ct; ct = ct->next) {
2046                ets = kzalloc(sizeof(*ets), GFP_KERNEL);
2047                if (!ets)
2048                        return 0;
2049
2050                ets->thread = ct->task;
2051                list_add(&ets->list, &info->thread_list);
2052        }
2053
2054        list_for_each(t, &info->thread_list) {
2055                int sz;
2056
2057                ets = list_entry(t, struct elf_thread_status, list);
2058                sz = elf_dump_thread_status(siginfo->si_signo, ets);
2059                info->thread_status_size += sz;
2060        }
2061        /* now collect the dump for the current */
2062        memset(info->prstatus, 0, sizeof(*info->prstatus));
2063        fill_prstatus(info->prstatus, current, siginfo->si_signo);
2064        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
2065
2066        /* Set up header */
2067        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2068
2069        /*
2070         * Set up the notes in similar form to SVR4 core dumps made
2071         * with info from their /proc.
2072         */
2073
2074        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2075                  sizeof(*info->prstatus), info->prstatus);
2076        fill_psinfo(info->psinfo, current->group_leader, current->mm);
2077        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2078                  sizeof(*info->psinfo), info->psinfo);
2079
2080        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2081        fill_auxv_note(info->notes + 3, current->mm);
2082        info->numnote = 4;
2083
2084        if (fill_files_note(info->notes + info->numnote) == 0) {
2085                info->notes_files = info->notes + info->numnote;
2086                info->numnote++;
2087        }
2088
2089        /* Try to dump the FPU. */
2090        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2091                                                               info->fpu);
2092        if (info->prstatus->pr_fpvalid)
2093                fill_note(info->notes + info->numnote++,
2094                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2095#ifdef ELF_CORE_COPY_XFPREGS
2096        if (elf_core_copy_task_xfpregs(current, info->xfpu))
2097                fill_note(info->notes + info->numnote++,
2098                          "LINUX", ELF_CORE_XFPREG_TYPE,
2099                          sizeof(*info->xfpu), info->xfpu);
2100#endif
2101
2102        return 1;
2103}
2104
2105static size_t get_note_info_size(struct elf_note_info *info)
2106{
2107        int sz = 0;
2108        int i;
2109
2110        for (i = 0; i < info->numnote; i++)
2111                sz += notesize(info->notes + i);
2112
2113        sz += info->thread_status_size;
2114
2115        return sz;
2116}
2117
2118static int write_note_info(struct elf_note_info *info,
2119                           struct coredump_params *cprm)
2120{
2121        int i;
2122        struct list_head *t;
2123
2124        for (i = 0; i < info->numnote; i++)
2125                if (!writenote(info->notes + i, cprm))
2126                        return 0;
2127
2128        /* write out the thread status notes section */
2129        list_for_each(t, &info->thread_list) {
2130                struct elf_thread_status *tmp =
2131                                list_entry(t, struct elf_thread_status, list);
2132
2133                for (i = 0; i < tmp->num_notes; i++)
2134                        if (!writenote(&tmp->notes[i], cprm))
2135                                return 0;
2136        }
2137
2138        return 1;
2139}
2140
2141static void free_note_info(struct elf_note_info *info)
2142{
2143        while (!list_empty(&info->thread_list)) {
2144                struct list_head *tmp = info->thread_list.next;
2145                list_del(tmp);
2146                kfree(list_entry(tmp, struct elf_thread_status, list));
2147        }
2148
2149        /* Free data possibly allocated by fill_files_note(): */
2150        if (info->notes_files)
2151                vfree(info->notes_files->data);
2152
2153        kfree(info->prstatus);
2154        kfree(info->psinfo);
2155        kfree(info->notes);
2156        kfree(info->fpu);
2157#ifdef ELF_CORE_COPY_XFPREGS
2158        kfree(info->xfpu);
2159#endif
2160}
2161
2162#endif
2163
2164static struct vm_area_struct *first_vma(struct task_struct *tsk,
2165                                        struct vm_area_struct *gate_vma)
2166{
2167        struct vm_area_struct *ret = tsk->mm->mmap;
2168
2169        if (ret)
2170                return ret;
2171        return gate_vma;
2172}
2173/*
2174 * Helper function for iterating across a vma list.  It ensures that the caller
2175 * will visit `gate_vma' prior to terminating the search.
2176 */
2177static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2178                                        struct vm_area_struct *gate_vma)
2179{
2180        struct vm_area_struct *ret;
2181
2182        ret = this_vma->vm_next;
2183        if (ret)
2184                return ret;
2185        if (this_vma == gate_vma)
2186                return NULL;
2187        return gate_vma;
2188}
2189
2190static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2191                             elf_addr_t e_shoff, int segs)
2192{
2193        elf->e_shoff = e_shoff;
2194        elf->e_shentsize = sizeof(*shdr4extnum);
2195        elf->e_shnum = 1;
2196        elf->e_shstrndx = SHN_UNDEF;
2197
2198        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2199
2200        shdr4extnum->sh_type = SHT_NULL;
2201        shdr4extnum->sh_size = elf->e_shnum;
2202        shdr4extnum->sh_link = elf->e_shstrndx;
2203        shdr4extnum->sh_info = segs;
2204}
2205
2206/*
2207 * Actual dumper
2208 *
2209 * This is a two-pass process; first we find the offsets of the bits,
2210 * and then they are actually written out.  If we run out of core limit
2211 * we just truncate.
2212 */
2213static int elf_core_dump(struct coredump_params *cprm)
2214{
2215        int has_dumped = 0;
2216        mm_segment_t fs;
2217        int segs, i;
2218        size_t vma_data_size = 0;
2219        struct vm_area_struct *vma, *gate_vma;
2220        struct elfhdr *elf = NULL;
2221        loff_t offset = 0, dataoff;
2222        struct elf_note_info info = { };
2223        struct elf_phdr *phdr4note = NULL;
2224        struct elf_shdr *shdr4extnum = NULL;
2225        Elf_Half e_phnum;
2226        elf_addr_t e_shoff;
2227        elf_addr_t *vma_filesz = NULL;
2228
2229        /*
2230         * We no longer stop all VM operations.
2231         * 
2232         * This is because those proceses that could possibly change map_count
2233         * or the mmap / vma pages are now blocked in do_exit on current
2234         * finishing this core dump.
2235         *
2236         * Only ptrace can touch these memory addresses, but it doesn't change
2237         * the map_count or the pages allocated. So no possibility of crashing
2238         * exists while dumping the mm->vm_next areas to the core file.
2239         */
2240  
2241        /* alloc memory for large data structures: too large to be on stack */
2242        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2243        if (!elf)
2244                goto out;
2245        /*
2246         * The number of segs are recored into ELF header as 16bit value.
2247         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2248         */
2249        segs = current->mm->map_count;
2250        segs += elf_core_extra_phdrs();
2251
2252        gate_vma = get_gate_vma(current->mm);
2253        if (gate_vma != NULL)
2254                segs++;
2255
2256        /* for notes section */
2257        segs++;
2258
2259        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2260         * this, kernel supports extended numbering. Have a look at
2261         * include/linux/elf.h for further information. */
2262        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2263
2264        /*
2265         * Collect all the non-memory information about the process for the
2266         * notes.  This also sets up the file header.
2267         */
2268        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2269                goto cleanup;
2270
2271        has_dumped = 1;
2272
2273        fs = get_fs();
2274        set_fs(KERNEL_DS);
2275
2276        offset += sizeof(*elf);                         /* Elf header */
2277        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2278
2279        /* Write notes phdr entry */
2280        {
2281                size_t sz = get_note_info_size(&info);
2282
2283                sz += elf_coredump_extra_notes_size();
2284
2285                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2286                if (!phdr4note)
2287                        goto end_coredump;
2288
2289                fill_elf_note_phdr(phdr4note, sz, offset);
2290                offset += sz;
2291        }
2292
2293        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2294
2295        if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2296                goto end_coredump;
2297        vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2298        if (!vma_filesz)
2299                goto end_coredump;
2300
2301        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2302                        vma = next_vma(vma, gate_vma)) {
2303                unsigned long dump_size;
2304
2305                dump_size = vma_dump_size(vma, cprm->mm_flags);
2306                vma_filesz[i++] = dump_size;
2307                vma_data_size += dump_size;
2308        }
2309
2310        offset += vma_data_size;
2311        offset += elf_core_extra_data_size();
2312        e_shoff = offset;
2313
2314        if (e_phnum == PN_XNUM) {
2315                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2316                if (!shdr4extnum)
2317                        goto end_coredump;
2318                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2319        }
2320
2321        offset = dataoff;
2322
2323        if (!dump_emit(cprm, elf, sizeof(*elf)))
2324                goto end_coredump;
2325
2326        if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2327                goto end_coredump;
2328
2329        /* Write program headers for segments dump */
2330        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2331                        vma = next_vma(vma, gate_vma)) {
2332                struct elf_phdr phdr;
2333
2334                phdr.p_type = PT_LOAD;
2335                phdr.p_offset = offset;
2336                phdr.p_vaddr = vma->vm_start;
2337                phdr.p_paddr = 0;
2338                phdr.p_filesz = vma_filesz[i++];
2339                phdr.p_memsz = vma->vm_end - vma->vm_start;
2340                offset += phdr.p_filesz;
2341                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2342                if (vma->vm_flags & VM_WRITE)
2343                        phdr.p_flags |= PF_W;
2344                if (vma->vm_flags & VM_EXEC)
2345                        phdr.p_flags |= PF_X;
2346                phdr.p_align = ELF_EXEC_PAGESIZE;
2347
2348                if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2349                        goto end_coredump;
2350        }
2351
2352        if (!elf_core_write_extra_phdrs(cprm, offset))
2353                goto end_coredump;
2354
2355        /* write out the notes section */
2356        if (!write_note_info(&info, cprm))
2357                goto end_coredump;
2358
2359        if (elf_coredump_extra_notes_write(cprm))
2360                goto end_coredump;
2361
2362        /* Align to page */
2363        if (!dump_skip(cprm, dataoff - cprm->pos))
2364                goto end_coredump;
2365
2366        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2367                        vma = next_vma(vma, gate_vma)) {
2368                unsigned long addr;
2369                unsigned long end;
2370
2371                end = vma->vm_start + vma_filesz[i++];
2372
2373                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2374                        struct page *page;
2375                        int stop;
2376
2377                        page = get_dump_page(addr);
2378                        if (page) {
2379                                void *kaddr = kmap(page);
2380                                stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2381                                kunmap(page);
2382                                put_page(page);
2383                        } else
2384                                stop = !dump_skip(cprm, PAGE_SIZE);
2385                        if (stop)
2386                                goto end_coredump;
2387                }
2388        }
2389        dump_truncate(cprm);
2390
2391        if (!elf_core_write_extra_data(cprm))
2392                goto end_coredump;
2393
2394        if (e_phnum == PN_XNUM) {
2395                if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2396                        goto end_coredump;
2397        }
2398
2399end_coredump:
2400        set_fs(fs);
2401
2402cleanup:
2403        free_note_info(&info);
2404        kfree(shdr4extnum);
2405        vfree(vma_filesz);
2406        kfree(phdr4note);
2407        kfree(elf);
2408out:
2409        return has_dumped;
2410}
2411
2412#endif          /* CONFIG_ELF_CORE */
2413
2414static int __init init_elf_binfmt(void)
2415{
2416        register_binfmt(&elf_format);
2417        return 0;
2418}
2419
2420static void __exit exit_elf_binfmt(void)
2421{
2422        /* Remove the COFF and ELF loaders. */
2423        unregister_binfmt(&elf_format);
2424}
2425
2426core_initcall(init_elf_binfmt);
2427module_exit(exit_elf_binfmt);
2428MODULE_LICENSE("GPL");
2429