linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/elf-randomize.h>
  35#include <linux/utsname.h>
  36#include <linux/coredump.h>
  37#include <linux/sched.h>
  38#include <linux/sched/coredump.h>
  39#include <linux/sched/task_stack.h>
  40#include <linux/sched/cputime.h>
  41#include <linux/cred.h>
  42#include <linux/dax.h>
  43#include <linux/uaccess.h>
  44#include <asm/param.h>
  45#include <asm/page.h>
  46
  47#ifndef user_long_t
  48#define user_long_t long
  49#endif
  50#ifndef user_siginfo_t
  51#define user_siginfo_t siginfo_t
  52#endif
  53
  54static int load_elf_binary(struct linux_binprm *bprm);
  55static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  56                                int, int, unsigned long);
  57
  58#ifdef CONFIG_USELIB
  59static int load_elf_library(struct file *);
  60#else
  61#define load_elf_library NULL
  62#endif
  63
  64/*
  65 * If we don't support core dumping, then supply a NULL so we
  66 * don't even try.
  67 */
  68#ifdef CONFIG_ELF_CORE
  69static int elf_core_dump(struct coredump_params *cprm);
  70#else
  71#define elf_core_dump   NULL
  72#endif
  73
  74#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  75#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  76#else
  77#define ELF_MIN_ALIGN   PAGE_SIZE
  78#endif
  79
  80#ifndef ELF_CORE_EFLAGS
  81#define ELF_CORE_EFLAGS 0
  82#endif
  83
  84#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  85#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  86#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  87
  88static struct linux_binfmt elf_format = {
  89        .module         = THIS_MODULE,
  90        .load_binary    = load_elf_binary,
  91        .load_shlib     = load_elf_library,
  92        .core_dump      = elf_core_dump,
  93        .min_coredump   = ELF_EXEC_PAGESIZE,
  94};
  95
  96#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  97
  98static int set_brk(unsigned long start, unsigned long end, int prot)
  99{
 100        start = ELF_PAGEALIGN(start);
 101        end = ELF_PAGEALIGN(end);
 102        if (end > start) {
 103                /*
 104                 * Map the last of the bss segment.
 105                 * If the header is requesting these pages to be
 106                 * executable, honour that (ppc32 needs this).
 107                 */
 108                int error = vm_brk_flags(start, end - start,
 109                                prot & PROT_EXEC ? VM_EXEC : 0);
 110                if (error)
 111                        return error;
 112        }
 113        current->mm->start_brk = current->mm->brk = end;
 114        return 0;
 115}
 116
 117/* We need to explicitly zero any fractional pages
 118   after the data section (i.e. bss).  This would
 119   contain the junk from the file that should not
 120   be in memory
 121 */
 122static int padzero(unsigned long elf_bss)
 123{
 124        unsigned long nbyte;
 125
 126        nbyte = ELF_PAGEOFFSET(elf_bss);
 127        if (nbyte) {
 128                nbyte = ELF_MIN_ALIGN - nbyte;
 129                if (clear_user((void __user *) elf_bss, nbyte))
 130                        return -EFAULT;
 131        }
 132        return 0;
 133}
 134
 135/* Let's use some macros to make this stack manipulation a little clearer */
 136#ifdef CONFIG_STACK_GROWSUP
 137#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 138#define STACK_ROUND(sp, items) \
 139        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 140#define STACK_ALLOC(sp, len) ({ \
 141        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 142        old_sp; })
 143#else
 144#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 145#define STACK_ROUND(sp, items) \
 146        (((unsigned long) (sp - items)) &~ 15UL)
 147#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 148#endif
 149
 150#ifndef ELF_BASE_PLATFORM
 151/*
 152 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 153 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 154 * will be copied to the user stack in the same manner as AT_PLATFORM.
 155 */
 156#define ELF_BASE_PLATFORM NULL
 157#endif
 158
 159static int
 160create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 161                unsigned long load_addr, unsigned long interp_load_addr)
 162{
 163        unsigned long p = bprm->p;
 164        int argc = bprm->argc;
 165        int envc = bprm->envc;
 166        elf_addr_t __user *argv;
 167        elf_addr_t __user *envp;
 168        elf_addr_t __user *sp;
 169        elf_addr_t __user *u_platform;
 170        elf_addr_t __user *u_base_platform;
 171        elf_addr_t __user *u_rand_bytes;
 172        const char *k_platform = ELF_PLATFORM;
 173        const char *k_base_platform = ELF_BASE_PLATFORM;
 174        unsigned char k_rand_bytes[16];
 175        int items;
 176        elf_addr_t *elf_info;
 177        int ei_index = 0;
 178        const struct cred *cred = current_cred();
 179        struct vm_area_struct *vma;
 180
 181        /*
 182         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 183         * evictions by the processes running on the same package. One
 184         * thing we can do is to shuffle the initial stack for them.
 185         */
 186
 187        p = arch_align_stack(p);
 188
 189        /*
 190         * If this architecture has a platform capability string, copy it
 191         * to userspace.  In some cases (Sparc), this info is impossible
 192         * for userspace to get any other way, in others (i386) it is
 193         * merely difficult.
 194         */
 195        u_platform = NULL;
 196        if (k_platform) {
 197                size_t len = strlen(k_platform) + 1;
 198
 199                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 200                if (__copy_to_user(u_platform, k_platform, len))
 201                        return -EFAULT;
 202        }
 203
 204        /*
 205         * If this architecture has a "base" platform capability
 206         * string, copy it to userspace.
 207         */
 208        u_base_platform = NULL;
 209        if (k_base_platform) {
 210                size_t len = strlen(k_base_platform) + 1;
 211
 212                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 213                if (__copy_to_user(u_base_platform, k_base_platform, len))
 214                        return -EFAULT;
 215        }
 216
 217        /*
 218         * Generate 16 random bytes for userspace PRNG seeding.
 219         */
 220        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 221        u_rand_bytes = (elf_addr_t __user *)
 222                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 223        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 224                return -EFAULT;
 225
 226        /* Create the ELF interpreter info */
 227        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 228        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 229#define NEW_AUX_ENT(id, val) \
 230        do { \
 231                elf_info[ei_index++] = id; \
 232                elf_info[ei_index++] = val; \
 233        } while (0)
 234
 235#ifdef ARCH_DLINFO
 236        /* 
 237         * ARCH_DLINFO must come first so PPC can do its special alignment of
 238         * AUXV.
 239         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 240         * ARCH_DLINFO changes
 241         */
 242        ARCH_DLINFO;
 243#endif
 244        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 245        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 246        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 247        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 248        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 249        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 250        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 251        NEW_AUX_ENT(AT_FLAGS, 0);
 252        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 253        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 254        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 255        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 256        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 257        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 258        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 259#ifdef ELF_HWCAP2
 260        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 261#endif
 262        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 263        if (k_platform) {
 264                NEW_AUX_ENT(AT_PLATFORM,
 265                            (elf_addr_t)(unsigned long)u_platform);
 266        }
 267        if (k_base_platform) {
 268                NEW_AUX_ENT(AT_BASE_PLATFORM,
 269                            (elf_addr_t)(unsigned long)u_base_platform);
 270        }
 271        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 272                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 273        }
 274#undef NEW_AUX_ENT
 275        /* AT_NULL is zero; clear the rest too */
 276        memset(&elf_info[ei_index], 0,
 277               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 278
 279        /* And advance past the AT_NULL entry.  */
 280        ei_index += 2;
 281
 282        sp = STACK_ADD(p, ei_index);
 283
 284        items = (argc + 1) + (envc + 1) + 1;
 285        bprm->p = STACK_ROUND(sp, items);
 286
 287        /* Point sp at the lowest address on the stack */
 288#ifdef CONFIG_STACK_GROWSUP
 289        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 290        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 291#else
 292        sp = (elf_addr_t __user *)bprm->p;
 293#endif
 294
 295
 296        /*
 297         * Grow the stack manually; some architectures have a limit on how
 298         * far ahead a user-space access may be in order to grow the stack.
 299         */
 300        vma = find_extend_vma(current->mm, bprm->p);
 301        if (!vma)
 302                return -EFAULT;
 303
 304        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 305        if (__put_user(argc, sp++))
 306                return -EFAULT;
 307        argv = sp;
 308        envp = argv + argc + 1;
 309
 310        /* Populate argv and envp */
 311        p = current->mm->arg_end = current->mm->arg_start;
 312        while (argc-- > 0) {
 313                size_t len;
 314                if (__put_user((elf_addr_t)p, argv++))
 315                        return -EFAULT;
 316                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 317                if (!len || len > MAX_ARG_STRLEN)
 318                        return -EINVAL;
 319                p += len;
 320        }
 321        if (__put_user(0, argv))
 322                return -EFAULT;
 323        current->mm->arg_end = current->mm->env_start = p;
 324        while (envc-- > 0) {
 325                size_t len;
 326                if (__put_user((elf_addr_t)p, envp++))
 327                        return -EFAULT;
 328                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 329                if (!len || len > MAX_ARG_STRLEN)
 330                        return -EINVAL;
 331                p += len;
 332        }
 333        if (__put_user(0, envp))
 334                return -EFAULT;
 335        current->mm->env_end = p;
 336
 337        /* Put the elf_info on the stack in the right place.  */
 338        sp = (elf_addr_t __user *)envp + 1;
 339        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 340                return -EFAULT;
 341        return 0;
 342}
 343
 344#ifndef elf_map
 345
 346static unsigned long elf_map(struct file *filep, unsigned long addr,
 347                struct elf_phdr *eppnt, int prot, int type,
 348                unsigned long total_size)
 349{
 350        unsigned long map_addr;
 351        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 352        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 353        addr = ELF_PAGESTART(addr);
 354        size = ELF_PAGEALIGN(size);
 355
 356        /* mmap() will return -EINVAL if given a zero size, but a
 357         * segment with zero filesize is perfectly valid */
 358        if (!size)
 359                return addr;
 360
 361        /*
 362        * total_size is the size of the ELF (interpreter) image.
 363        * The _first_ mmap needs to know the full size, otherwise
 364        * randomization might put this image into an overlapping
 365        * position with the ELF binary image. (since size < total_size)
 366        * So we first map the 'big' image - and unmap the remainder at
 367        * the end. (which unmap is needed for ELF images with holes.)
 368        */
 369        if (total_size) {
 370                total_size = ELF_PAGEALIGN(total_size);
 371                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 372                if (!BAD_ADDR(map_addr))
 373                        vm_munmap(map_addr+size, total_size-size);
 374        } else
 375                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 376
 377        return(map_addr);
 378}
 379
 380#endif /* !elf_map */
 381
 382static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 383{
 384        int i, first_idx = -1, last_idx = -1;
 385
 386        for (i = 0; i < nr; i++) {
 387                if (cmds[i].p_type == PT_LOAD) {
 388                        last_idx = i;
 389                        if (first_idx == -1)
 390                                first_idx = i;
 391                }
 392        }
 393        if (first_idx == -1)
 394                return 0;
 395
 396        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 397                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 398}
 399
 400/**
 401 * load_elf_phdrs() - load ELF program headers
 402 * @elf_ex:   ELF header of the binary whose program headers should be loaded
 403 * @elf_file: the opened ELF binary file
 404 *
 405 * Loads ELF program headers from the binary file elf_file, which has the ELF
 406 * header pointed to by elf_ex, into a newly allocated array. The caller is
 407 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
 408 */
 409static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
 410                                       struct file *elf_file)
 411{
 412        struct elf_phdr *elf_phdata = NULL;
 413        int retval, size, err = -1;
 414
 415        /*
 416         * If the size of this structure has changed, then punt, since
 417         * we will be doing the wrong thing.
 418         */
 419        if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
 420                goto out;
 421
 422        /* Sanity check the number of program headers... */
 423        if (elf_ex->e_phnum < 1 ||
 424                elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 425                goto out;
 426
 427        /* ...and their total size. */
 428        size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
 429        if (size > ELF_MIN_ALIGN)
 430                goto out;
 431
 432        elf_phdata = kmalloc(size, GFP_KERNEL);
 433        if (!elf_phdata)
 434                goto out;
 435
 436        /* Read in the program headers */
 437        retval = kernel_read(elf_file, elf_ex->e_phoff,
 438                             (char *)elf_phdata, size);
 439        if (retval != size) {
 440                err = (retval < 0) ? retval : -EIO;
 441                goto out;
 442        }
 443
 444        /* Success! */
 445        err = 0;
 446out:
 447        if (err) {
 448                kfree(elf_phdata);
 449                elf_phdata = NULL;
 450        }
 451        return elf_phdata;
 452}
 453
 454#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
 455
 456/**
 457 * struct arch_elf_state - arch-specific ELF loading state
 458 *
 459 * This structure is used to preserve architecture specific data during
 460 * the loading of an ELF file, throughout the checking of architecture
 461 * specific ELF headers & through to the point where the ELF load is
 462 * known to be proceeding (ie. SET_PERSONALITY).
 463 *
 464 * This implementation is a dummy for architectures which require no
 465 * specific state.
 466 */
 467struct arch_elf_state {
 468};
 469
 470#define INIT_ARCH_ELF_STATE {}
 471
 472/**
 473 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
 474 * @ehdr:       The main ELF header
 475 * @phdr:       The program header to check
 476 * @elf:        The open ELF file
 477 * @is_interp:  True if the phdr is from the interpreter of the ELF being
 478 *              loaded, else false.
 479 * @state:      Architecture-specific state preserved throughout the process
 480 *              of loading the ELF.
 481 *
 482 * Inspects the program header phdr to validate its correctness and/or
 483 * suitability for the system. Called once per ELF program header in the
 484 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
 485 * interpreter.
 486 *
 487 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 488 *         with that return code.
 489 */
 490static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
 491                                   struct elf_phdr *phdr,
 492                                   struct file *elf, bool is_interp,
 493                                   struct arch_elf_state *state)
 494{
 495        /* Dummy implementation, always proceed */
 496        return 0;
 497}
 498
 499/**
 500 * arch_check_elf() - check an ELF executable
 501 * @ehdr:       The main ELF header
 502 * @has_interp: True if the ELF has an interpreter, else false.
 503 * @interp_ehdr: The interpreter's ELF header
 504 * @state:      Architecture-specific state preserved throughout the process
 505 *              of loading the ELF.
 506 *
 507 * Provides a final opportunity for architecture code to reject the loading
 508 * of the ELF & cause an exec syscall to return an error. This is called after
 509 * all program headers to be checked by arch_elf_pt_proc have been.
 510 *
 511 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 512 *         with that return code.
 513 */
 514static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
 515                                 struct elfhdr *interp_ehdr,
 516                                 struct arch_elf_state *state)
 517{
 518        /* Dummy implementation, always proceed */
 519        return 0;
 520}
 521
 522#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
 523
 524/* This is much more generalized than the library routine read function,
 525   so we keep this separate.  Technically the library read function
 526   is only provided so that we can read a.out libraries that have
 527   an ELF header */
 528
 529static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 530                struct file *interpreter, unsigned long *interp_map_addr,
 531                unsigned long no_base, struct elf_phdr *interp_elf_phdata)
 532{
 533        struct elf_phdr *eppnt;
 534        unsigned long load_addr = 0;
 535        int load_addr_set = 0;
 536        unsigned long last_bss = 0, elf_bss = 0;
 537        int bss_prot = 0;
 538        unsigned long error = ~0UL;
 539        unsigned long total_size;
 540        int i;
 541
 542        /* First of all, some simple consistency checks */
 543        if (interp_elf_ex->e_type != ET_EXEC &&
 544            interp_elf_ex->e_type != ET_DYN)
 545                goto out;
 546        if (!elf_check_arch(interp_elf_ex))
 547                goto out;
 548        if (!interpreter->f_op->mmap)
 549                goto out;
 550
 551        total_size = total_mapping_size(interp_elf_phdata,
 552                                        interp_elf_ex->e_phnum);
 553        if (!total_size) {
 554                error = -EINVAL;
 555                goto out;
 556        }
 557
 558        eppnt = interp_elf_phdata;
 559        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 560                if (eppnt->p_type == PT_LOAD) {
 561                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 562                        int elf_prot = 0;
 563                        unsigned long vaddr = 0;
 564                        unsigned long k, map_addr;
 565
 566                        if (eppnt->p_flags & PF_R)
 567                                elf_prot = PROT_READ;
 568                        if (eppnt->p_flags & PF_W)
 569                                elf_prot |= PROT_WRITE;
 570                        if (eppnt->p_flags & PF_X)
 571                                elf_prot |= PROT_EXEC;
 572                        vaddr = eppnt->p_vaddr;
 573                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 574                                elf_type |= MAP_FIXED;
 575                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 576                                load_addr = -vaddr;
 577
 578                        map_addr = elf_map(interpreter, load_addr + vaddr,
 579                                        eppnt, elf_prot, elf_type, total_size);
 580                        total_size = 0;
 581                        if (!*interp_map_addr)
 582                                *interp_map_addr = map_addr;
 583                        error = map_addr;
 584                        if (BAD_ADDR(map_addr))
 585                                goto out;
 586
 587                        if (!load_addr_set &&
 588                            interp_elf_ex->e_type == ET_DYN) {
 589                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 590                                load_addr_set = 1;
 591                        }
 592
 593                        /*
 594                         * Check to see if the section's size will overflow the
 595                         * allowed task size. Note that p_filesz must always be
 596                         * <= p_memsize so it's only necessary to check p_memsz.
 597                         */
 598                        k = load_addr + eppnt->p_vaddr;
 599                        if (BAD_ADDR(k) ||
 600                            eppnt->p_filesz > eppnt->p_memsz ||
 601                            eppnt->p_memsz > TASK_SIZE ||
 602                            TASK_SIZE - eppnt->p_memsz < k) {
 603                                error = -ENOMEM;
 604                                goto out;
 605                        }
 606
 607                        /*
 608                         * Find the end of the file mapping for this phdr, and
 609                         * keep track of the largest address we see for this.
 610                         */
 611                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 612                        if (k > elf_bss)
 613                                elf_bss = k;
 614
 615                        /*
 616                         * Do the same thing for the memory mapping - between
 617                         * elf_bss and last_bss is the bss section.
 618                         */
 619                        k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
 620                        if (k > last_bss) {
 621                                last_bss = k;
 622                                bss_prot = elf_prot;
 623                        }
 624                }
 625        }
 626
 627        /*
 628         * Now fill out the bss section: first pad the last page from
 629         * the file up to the page boundary, and zero it from elf_bss
 630         * up to the end of the page.
 631         */
 632        if (padzero(elf_bss)) {
 633                error = -EFAULT;
 634                goto out;
 635        }
 636        /*
 637         * Next, align both the file and mem bss up to the page size,
 638         * since this is where elf_bss was just zeroed up to, and where
 639         * last_bss will end after the vm_brk_flags() below.
 640         */
 641        elf_bss = ELF_PAGEALIGN(elf_bss);
 642        last_bss = ELF_PAGEALIGN(last_bss);
 643        /* Finally, if there is still more bss to allocate, do it. */
 644        if (last_bss > elf_bss) {
 645                error = vm_brk_flags(elf_bss, last_bss - elf_bss,
 646                                bss_prot & PROT_EXEC ? VM_EXEC : 0);
 647                if (error)
 648                        goto out;
 649        }
 650
 651        error = load_addr;
 652out:
 653        return error;
 654}
 655
 656/*
 657 * These are the functions used to load ELF style executables and shared
 658 * libraries.  There is no binary dependent code anywhere else.
 659 */
 660
 661#ifndef STACK_RND_MASK
 662#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 663#endif
 664
 665static unsigned long randomize_stack_top(unsigned long stack_top)
 666{
 667        unsigned long random_variable = 0;
 668
 669        if ((current->flags & PF_RANDOMIZE) &&
 670                !(current->personality & ADDR_NO_RANDOMIZE)) {
 671                random_variable = get_random_long();
 672                random_variable &= STACK_RND_MASK;
 673                random_variable <<= PAGE_SHIFT;
 674        }
 675#ifdef CONFIG_STACK_GROWSUP
 676        return PAGE_ALIGN(stack_top) + random_variable;
 677#else
 678        return PAGE_ALIGN(stack_top) - random_variable;
 679#endif
 680}
 681
 682static int load_elf_binary(struct linux_binprm *bprm)
 683{
 684        struct file *interpreter = NULL; /* to shut gcc up */
 685        unsigned long load_addr = 0, load_bias = 0;
 686        int load_addr_set = 0;
 687        char * elf_interpreter = NULL;
 688        unsigned long error;
 689        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
 690        unsigned long elf_bss, elf_brk;
 691        int bss_prot = 0;
 692        int retval, i;
 693        unsigned long elf_entry;
 694        unsigned long interp_load_addr = 0;
 695        unsigned long start_code, end_code, start_data, end_data;
 696        unsigned long reloc_func_desc __maybe_unused = 0;
 697        int executable_stack = EXSTACK_DEFAULT;
 698        struct pt_regs *regs = current_pt_regs();
 699        struct {
 700                struct elfhdr elf_ex;
 701                struct elfhdr interp_elf_ex;
 702        } *loc;
 703        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
 704
 705        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 706        if (!loc) {
 707                retval = -ENOMEM;
 708                goto out_ret;
 709        }
 710        
 711        /* Get the exec-header */
 712        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 713
 714        retval = -ENOEXEC;
 715        /* First of all, some simple consistency checks */
 716        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 717                goto out;
 718
 719        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 720                goto out;
 721        if (!elf_check_arch(&loc->elf_ex))
 722                goto out;
 723        if (!bprm->file->f_op->mmap)
 724                goto out;
 725
 726        elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
 727        if (!elf_phdata)
 728                goto out;
 729
 730        elf_ppnt = elf_phdata;
 731        elf_bss = 0;
 732        elf_brk = 0;
 733
 734        start_code = ~0UL;
 735        end_code = 0;
 736        start_data = 0;
 737        end_data = 0;
 738
 739        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 740                if (elf_ppnt->p_type == PT_INTERP) {
 741                        /* This is the program interpreter used for
 742                         * shared libraries - for now assume that this
 743                         * is an a.out format binary
 744                         */
 745                        retval = -ENOEXEC;
 746                        if (elf_ppnt->p_filesz > PATH_MAX || 
 747                            elf_ppnt->p_filesz < 2)
 748                                goto out_free_ph;
 749
 750                        retval = -ENOMEM;
 751                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 752                                                  GFP_KERNEL);
 753                        if (!elf_interpreter)
 754                                goto out_free_ph;
 755
 756                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 757                                             elf_interpreter,
 758                                             elf_ppnt->p_filesz);
 759                        if (retval != elf_ppnt->p_filesz) {
 760                                if (retval >= 0)
 761                                        retval = -EIO;
 762                                goto out_free_interp;
 763                        }
 764                        /* make sure path is NULL terminated */
 765                        retval = -ENOEXEC;
 766                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 767                                goto out_free_interp;
 768
 769                        interpreter = open_exec(elf_interpreter);
 770                        retval = PTR_ERR(interpreter);
 771                        if (IS_ERR(interpreter))
 772                                goto out_free_interp;
 773
 774                        /*
 775                         * If the binary is not readable then enforce
 776                         * mm->dumpable = 0 regardless of the interpreter's
 777                         * permissions.
 778                         */
 779                        would_dump(bprm, interpreter);
 780
 781                        /* Get the exec headers */
 782                        retval = kernel_read(interpreter, 0,
 783                                             (void *)&loc->interp_elf_ex,
 784                                             sizeof(loc->interp_elf_ex));
 785                        if (retval != sizeof(loc->interp_elf_ex)) {
 786                                if (retval >= 0)
 787                                        retval = -EIO;
 788                                goto out_free_dentry;
 789                        }
 790
 791                        break;
 792                }
 793                elf_ppnt++;
 794        }
 795
 796        elf_ppnt = elf_phdata;
 797        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 798                switch (elf_ppnt->p_type) {
 799                case PT_GNU_STACK:
 800                        if (elf_ppnt->p_flags & PF_X)
 801                                executable_stack = EXSTACK_ENABLE_X;
 802                        else
 803                                executable_stack = EXSTACK_DISABLE_X;
 804                        break;
 805
 806                case PT_LOPROC ... PT_HIPROC:
 807                        retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
 808                                                  bprm->file, false,
 809                                                  &arch_state);
 810                        if (retval)
 811                                goto out_free_dentry;
 812                        break;
 813                }
 814
 815        /* Some simple consistency checks for the interpreter */
 816        if (elf_interpreter) {
 817                retval = -ELIBBAD;
 818                /* Not an ELF interpreter */
 819                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 820                        goto out_free_dentry;
 821                /* Verify the interpreter has a valid arch */
 822                if (!elf_check_arch(&loc->interp_elf_ex))
 823                        goto out_free_dentry;
 824
 825                /* Load the interpreter program headers */
 826                interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
 827                                                   interpreter);
 828                if (!interp_elf_phdata)
 829                        goto out_free_dentry;
 830
 831                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
 832                elf_ppnt = interp_elf_phdata;
 833                for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
 834                        switch (elf_ppnt->p_type) {
 835                        case PT_LOPROC ... PT_HIPROC:
 836                                retval = arch_elf_pt_proc(&loc->interp_elf_ex,
 837                                                          elf_ppnt, interpreter,
 838                                                          true, &arch_state);
 839                                if (retval)
 840                                        goto out_free_dentry;
 841                                break;
 842                        }
 843        }
 844
 845        /*
 846         * Allow arch code to reject the ELF at this point, whilst it's
 847         * still possible to return an error to the code that invoked
 848         * the exec syscall.
 849         */
 850        retval = arch_check_elf(&loc->elf_ex,
 851                                !!interpreter, &loc->interp_elf_ex,
 852                                &arch_state);
 853        if (retval)
 854                goto out_free_dentry;
 855
 856        /* Flush all traces of the currently running executable */
 857        retval = flush_old_exec(bprm);
 858        if (retval)
 859                goto out_free_dentry;
 860
 861        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 862           may depend on the personality.  */
 863        SET_PERSONALITY2(loc->elf_ex, &arch_state);
 864        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 865                current->personality |= READ_IMPLIES_EXEC;
 866
 867        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 868                current->flags |= PF_RANDOMIZE;
 869
 870        setup_new_exec(bprm);
 871        install_exec_creds(bprm);
 872
 873        /* Do this so that we can load the interpreter, if need be.  We will
 874           change some of these later */
 875        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 876                                 executable_stack);
 877        if (retval < 0)
 878                goto out_free_dentry;
 879        
 880        current->mm->start_stack = bprm->p;
 881
 882        /* Now we do a little grungy work by mmapping the ELF image into
 883           the correct location in memory. */
 884        for(i = 0, elf_ppnt = elf_phdata;
 885            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 886                int elf_prot = 0, elf_flags;
 887                unsigned long k, vaddr;
 888                unsigned long total_size = 0;
 889
 890                if (elf_ppnt->p_type != PT_LOAD)
 891                        continue;
 892
 893                if (unlikely (elf_brk > elf_bss)) {
 894                        unsigned long nbyte;
 895                    
 896                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 897                           before this one. Map anonymous pages, if needed,
 898                           and clear the area.  */
 899                        retval = set_brk(elf_bss + load_bias,
 900                                         elf_brk + load_bias,
 901                                         bss_prot);
 902                        if (retval)
 903                                goto out_free_dentry;
 904                        nbyte = ELF_PAGEOFFSET(elf_bss);
 905                        if (nbyte) {
 906                                nbyte = ELF_MIN_ALIGN - nbyte;
 907                                if (nbyte > elf_brk - elf_bss)
 908                                        nbyte = elf_brk - elf_bss;
 909                                if (clear_user((void __user *)elf_bss +
 910                                                        load_bias, nbyte)) {
 911                                        /*
 912                                         * This bss-zeroing can fail if the ELF
 913                                         * file specifies odd protections. So
 914                                         * we don't check the return value
 915                                         */
 916                                }
 917                        }
 918                }
 919
 920                if (elf_ppnt->p_flags & PF_R)
 921                        elf_prot |= PROT_READ;
 922                if (elf_ppnt->p_flags & PF_W)
 923                        elf_prot |= PROT_WRITE;
 924                if (elf_ppnt->p_flags & PF_X)
 925                        elf_prot |= PROT_EXEC;
 926
 927                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 928
 929                vaddr = elf_ppnt->p_vaddr;
 930                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 931                        elf_flags |= MAP_FIXED;
 932                } else if (loc->elf_ex.e_type == ET_DYN) {
 933                        /* Try and get dynamic programs out of the way of the
 934                         * default mmap base, as well as whatever program they
 935                         * might try to exec.  This is because the brk will
 936                         * follow the loader, and is not movable.  */
 937                        load_bias = ELF_ET_DYN_BASE - vaddr;
 938                        if (current->flags & PF_RANDOMIZE)
 939                                load_bias += arch_mmap_rnd();
 940                        load_bias = ELF_PAGESTART(load_bias);
 941                        total_size = total_mapping_size(elf_phdata,
 942                                                        loc->elf_ex.e_phnum);
 943                        if (!total_size) {
 944                                retval = -EINVAL;
 945                                goto out_free_dentry;
 946                        }
 947                }
 948
 949                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 950                                elf_prot, elf_flags, total_size);
 951                if (BAD_ADDR(error)) {
 952                        retval = IS_ERR((void *)error) ?
 953                                PTR_ERR((void*)error) : -EINVAL;
 954                        goto out_free_dentry;
 955                }
 956
 957                if (!load_addr_set) {
 958                        load_addr_set = 1;
 959                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 960                        if (loc->elf_ex.e_type == ET_DYN) {
 961                                load_bias += error -
 962                                             ELF_PAGESTART(load_bias + vaddr);
 963                                load_addr += load_bias;
 964                                reloc_func_desc = load_bias;
 965                        }
 966                }
 967                k = elf_ppnt->p_vaddr;
 968                if (k < start_code)
 969                        start_code = k;
 970                if (start_data < k)
 971                        start_data = k;
 972
 973                /*
 974                 * Check to see if the section's size will overflow the
 975                 * allowed task size. Note that p_filesz must always be
 976                 * <= p_memsz so it is only necessary to check p_memsz.
 977                 */
 978                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 979                    elf_ppnt->p_memsz > TASK_SIZE ||
 980                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 981                        /* set_brk can never work. Avoid overflows. */
 982                        retval = -EINVAL;
 983                        goto out_free_dentry;
 984                }
 985
 986                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 987
 988                if (k > elf_bss)
 989                        elf_bss = k;
 990                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 991                        end_code = k;
 992                if (end_data < k)
 993                        end_data = k;
 994                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 995                if (k > elf_brk) {
 996                        bss_prot = elf_prot;
 997                        elf_brk = k;
 998                }
 999        }
1000
1001        loc->elf_ex.e_entry += load_bias;
1002        elf_bss += load_bias;
1003        elf_brk += load_bias;
1004        start_code += load_bias;
1005        end_code += load_bias;
1006        start_data += load_bias;
1007        end_data += load_bias;
1008
1009        /* Calling set_brk effectively mmaps the pages that we need
1010         * for the bss and break sections.  We must do this before
1011         * mapping in the interpreter, to make sure it doesn't wind
1012         * up getting placed where the bss needs to go.
1013         */
1014        retval = set_brk(elf_bss, elf_brk, bss_prot);
1015        if (retval)
1016                goto out_free_dentry;
1017        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1018                retval = -EFAULT; /* Nobody gets to see this, but.. */
1019                goto out_free_dentry;
1020        }
1021
1022        if (elf_interpreter) {
1023                unsigned long interp_map_addr = 0;
1024
1025                elf_entry = load_elf_interp(&loc->interp_elf_ex,
1026                                            interpreter,
1027                                            &interp_map_addr,
1028                                            load_bias, interp_elf_phdata);
1029                if (!IS_ERR((void *)elf_entry)) {
1030                        /*
1031                         * load_elf_interp() returns relocation
1032                         * adjustment
1033                         */
1034                        interp_load_addr = elf_entry;
1035                        elf_entry += loc->interp_elf_ex.e_entry;
1036                }
1037                if (BAD_ADDR(elf_entry)) {
1038                        retval = IS_ERR((void *)elf_entry) ?
1039                                        (int)elf_entry : -EINVAL;
1040                        goto out_free_dentry;
1041                }
1042                reloc_func_desc = interp_load_addr;
1043
1044                allow_write_access(interpreter);
1045                fput(interpreter);
1046                kfree(elf_interpreter);
1047        } else {
1048                elf_entry = loc->elf_ex.e_entry;
1049                if (BAD_ADDR(elf_entry)) {
1050                        retval = -EINVAL;
1051                        goto out_free_dentry;
1052                }
1053        }
1054
1055        kfree(interp_elf_phdata);
1056        kfree(elf_phdata);
1057
1058        set_binfmt(&elf_format);
1059
1060#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1061        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1062        if (retval < 0)
1063                goto out;
1064#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1065
1066        retval = create_elf_tables(bprm, &loc->elf_ex,
1067                          load_addr, interp_load_addr);
1068        if (retval < 0)
1069                goto out;
1070        /* N.B. passed_fileno might not be initialized? */
1071        current->mm->end_code = end_code;
1072        current->mm->start_code = start_code;
1073        current->mm->start_data = start_data;
1074        current->mm->end_data = end_data;
1075        current->mm->start_stack = bprm->p;
1076
1077        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1078                current->mm->brk = current->mm->start_brk =
1079                        arch_randomize_brk(current->mm);
1080#ifdef compat_brk_randomized
1081                current->brk_randomized = 1;
1082#endif
1083        }
1084
1085        if (current->personality & MMAP_PAGE_ZERO) {
1086                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1087                   and some applications "depend" upon this behavior.
1088                   Since we do not have the power to recompile these, we
1089                   emulate the SVr4 behavior. Sigh. */
1090                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1091                                MAP_FIXED | MAP_PRIVATE, 0);
1092        }
1093
1094#ifdef ELF_PLAT_INIT
1095        /*
1096         * The ABI may specify that certain registers be set up in special
1097         * ways (on i386 %edx is the address of a DT_FINI function, for
1098         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1099         * that the e_entry field is the address of the function descriptor
1100         * for the startup routine, rather than the address of the startup
1101         * routine itself.  This macro performs whatever initialization to
1102         * the regs structure is required as well as any relocations to the
1103         * function descriptor entries when executing dynamically links apps.
1104         */
1105        ELF_PLAT_INIT(regs, reloc_func_desc);
1106#endif
1107
1108        start_thread(regs, elf_entry, bprm->p);
1109        retval = 0;
1110out:
1111        kfree(loc);
1112out_ret:
1113        return retval;
1114
1115        /* error cleanup */
1116out_free_dentry:
1117        kfree(interp_elf_phdata);
1118        allow_write_access(interpreter);
1119        if (interpreter)
1120                fput(interpreter);
1121out_free_interp:
1122        kfree(elf_interpreter);
1123out_free_ph:
1124        kfree(elf_phdata);
1125        goto out;
1126}
1127
1128#ifdef CONFIG_USELIB
1129/* This is really simpleminded and specialized - we are loading an
1130   a.out library that is given an ELF header. */
1131static int load_elf_library(struct file *file)
1132{
1133        struct elf_phdr *elf_phdata;
1134        struct elf_phdr *eppnt;
1135        unsigned long elf_bss, bss, len;
1136        int retval, error, i, j;
1137        struct elfhdr elf_ex;
1138
1139        error = -ENOEXEC;
1140        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1141        if (retval != sizeof(elf_ex))
1142                goto out;
1143
1144        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1145                goto out;
1146
1147        /* First of all, some simple consistency checks */
1148        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1149            !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1150                goto out;
1151
1152        /* Now read in all of the header information */
1153
1154        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1155        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1156
1157        error = -ENOMEM;
1158        elf_phdata = kmalloc(j, GFP_KERNEL);
1159        if (!elf_phdata)
1160                goto out;
1161
1162        eppnt = elf_phdata;
1163        error = -ENOEXEC;
1164        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1165        if (retval != j)
1166                goto out_free_ph;
1167
1168        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1169                if ((eppnt + i)->p_type == PT_LOAD)
1170                        j++;
1171        if (j != 1)
1172                goto out_free_ph;
1173
1174        while (eppnt->p_type != PT_LOAD)
1175                eppnt++;
1176
1177        /* Now use mmap to map the library into memory. */
1178        error = vm_mmap(file,
1179                        ELF_PAGESTART(eppnt->p_vaddr),
1180                        (eppnt->p_filesz +
1181                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1182                        PROT_READ | PROT_WRITE | PROT_EXEC,
1183                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1184                        (eppnt->p_offset -
1185                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1186        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1187                goto out_free_ph;
1188
1189        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1190        if (padzero(elf_bss)) {
1191                error = -EFAULT;
1192                goto out_free_ph;
1193        }
1194
1195        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1196                            ELF_MIN_ALIGN - 1);
1197        bss = eppnt->p_memsz + eppnt->p_vaddr;
1198        if (bss > len) {
1199                error = vm_brk(len, bss - len);
1200                if (error)
1201                        goto out_free_ph;
1202        }
1203        error = 0;
1204
1205out_free_ph:
1206        kfree(elf_phdata);
1207out:
1208        return error;
1209}
1210#endif /* #ifdef CONFIG_USELIB */
1211
1212#ifdef CONFIG_ELF_CORE
1213/*
1214 * ELF core dumper
1215 *
1216 * Modelled on fs/exec.c:aout_core_dump()
1217 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1218 */
1219
1220/*
1221 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1222 * that are useful for post-mortem analysis are included in every core dump.
1223 * In that way we ensure that the core dump is fully interpretable later
1224 * without matching up the same kernel and hardware config to see what PC values
1225 * meant. These special mappings include - vDSO, vsyscall, and other
1226 * architecture specific mappings
1227 */
1228static bool always_dump_vma(struct vm_area_struct *vma)
1229{
1230        /* Any vsyscall mappings? */
1231        if (vma == get_gate_vma(vma->vm_mm))
1232                return true;
1233
1234        /*
1235         * Assume that all vmas with a .name op should always be dumped.
1236         * If this changes, a new vm_ops field can easily be added.
1237         */
1238        if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1239                return true;
1240
1241        /*
1242         * arch_vma_name() returns non-NULL for special architecture mappings,
1243         * such as vDSO sections.
1244         */
1245        if (arch_vma_name(vma))
1246                return true;
1247
1248        return false;
1249}
1250
1251/*
1252 * Decide what to dump of a segment, part, all or none.
1253 */
1254static unsigned long vma_dump_size(struct vm_area_struct *vma,
1255                                   unsigned long mm_flags)
1256{
1257#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1258
1259        /* always dump the vdso and vsyscall sections */
1260        if (always_dump_vma(vma))
1261                goto whole;
1262
1263        if (vma->vm_flags & VM_DONTDUMP)
1264                return 0;
1265
1266        /* support for DAX */
1267        if (vma_is_dax(vma)) {
1268                if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1269                        goto whole;
1270                if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1271                        goto whole;
1272                return 0;
1273        }
1274
1275        /* Hugetlb memory check */
1276        if (vma->vm_flags & VM_HUGETLB) {
1277                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1278                        goto whole;
1279                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1280                        goto whole;
1281                return 0;
1282        }
1283
1284        /* Do not dump I/O mapped devices or special mappings */
1285        if (vma->vm_flags & VM_IO)
1286                return 0;
1287
1288        /* By default, dump shared memory if mapped from an anonymous file. */
1289        if (vma->vm_flags & VM_SHARED) {
1290                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1291                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1292                        goto whole;
1293                return 0;
1294        }
1295
1296        /* Dump segments that have been written to.  */
1297        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1298                goto whole;
1299        if (vma->vm_file == NULL)
1300                return 0;
1301
1302        if (FILTER(MAPPED_PRIVATE))
1303                goto whole;
1304
1305        /*
1306         * If this looks like the beginning of a DSO or executable mapping,
1307         * check for an ELF header.  If we find one, dump the first page to
1308         * aid in determining what was mapped here.
1309         */
1310        if (FILTER(ELF_HEADERS) &&
1311            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1312                u32 __user *header = (u32 __user *) vma->vm_start;
1313                u32 word;
1314                mm_segment_t fs = get_fs();
1315                /*
1316                 * Doing it this way gets the constant folded by GCC.
1317                 */
1318                union {
1319                        u32 cmp;
1320                        char elfmag[SELFMAG];
1321                } magic;
1322                BUILD_BUG_ON(SELFMAG != sizeof word);
1323                magic.elfmag[EI_MAG0] = ELFMAG0;
1324                magic.elfmag[EI_MAG1] = ELFMAG1;
1325                magic.elfmag[EI_MAG2] = ELFMAG2;
1326                magic.elfmag[EI_MAG3] = ELFMAG3;
1327                /*
1328                 * Switch to the user "segment" for get_user(),
1329                 * then put back what elf_core_dump() had in place.
1330                 */
1331                set_fs(USER_DS);
1332                if (unlikely(get_user(word, header)))
1333                        word = 0;
1334                set_fs(fs);
1335                if (word == magic.cmp)
1336                        return PAGE_SIZE;
1337        }
1338
1339#undef  FILTER
1340
1341        return 0;
1342
1343whole:
1344        return vma->vm_end - vma->vm_start;
1345}
1346
1347/* An ELF note in memory */
1348struct memelfnote
1349{
1350        const char *name;
1351        int type;
1352        unsigned int datasz;
1353        void *data;
1354};
1355
1356static int notesize(struct memelfnote *en)
1357{
1358        int sz;
1359
1360        sz = sizeof(struct elf_note);
1361        sz += roundup(strlen(en->name) + 1, 4);
1362        sz += roundup(en->datasz, 4);
1363
1364        return sz;
1365}
1366
1367static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1368{
1369        struct elf_note en;
1370        en.n_namesz = strlen(men->name) + 1;
1371        en.n_descsz = men->datasz;
1372        en.n_type = men->type;
1373
1374        return dump_emit(cprm, &en, sizeof(en)) &&
1375            dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1376            dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1377}
1378
1379static void fill_elf_header(struct elfhdr *elf, int segs,
1380                            u16 machine, u32 flags)
1381{
1382        memset(elf, 0, sizeof(*elf));
1383
1384        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1385        elf->e_ident[EI_CLASS] = ELF_CLASS;
1386        elf->e_ident[EI_DATA] = ELF_DATA;
1387        elf->e_ident[EI_VERSION] = EV_CURRENT;
1388        elf->e_ident[EI_OSABI] = ELF_OSABI;
1389
1390        elf->e_type = ET_CORE;
1391        elf->e_machine = machine;
1392        elf->e_version = EV_CURRENT;
1393        elf->e_phoff = sizeof(struct elfhdr);
1394        elf->e_flags = flags;
1395        elf->e_ehsize = sizeof(struct elfhdr);
1396        elf->e_phentsize = sizeof(struct elf_phdr);
1397        elf->e_phnum = segs;
1398
1399        return;
1400}
1401
1402static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1403{
1404        phdr->p_type = PT_NOTE;
1405        phdr->p_offset = offset;
1406        phdr->p_vaddr = 0;
1407        phdr->p_paddr = 0;
1408        phdr->p_filesz = sz;
1409        phdr->p_memsz = 0;
1410        phdr->p_flags = 0;
1411        phdr->p_align = 0;
1412        return;
1413}
1414
1415static void fill_note(struct memelfnote *note, const char *name, int type, 
1416                unsigned int sz, void *data)
1417{
1418        note->name = name;
1419        note->type = type;
1420        note->datasz = sz;
1421        note->data = data;
1422        return;
1423}
1424
1425/*
1426 * fill up all the fields in prstatus from the given task struct, except
1427 * registers which need to be filled up separately.
1428 */
1429static void fill_prstatus(struct elf_prstatus *prstatus,
1430                struct task_struct *p, long signr)
1431{
1432        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1433        prstatus->pr_sigpend = p->pending.signal.sig[0];
1434        prstatus->pr_sighold = p->blocked.sig[0];
1435        rcu_read_lock();
1436        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1437        rcu_read_unlock();
1438        prstatus->pr_pid = task_pid_vnr(p);
1439        prstatus->pr_pgrp = task_pgrp_vnr(p);
1440        prstatus->pr_sid = task_session_vnr(p);
1441        if (thread_group_leader(p)) {
1442                struct task_cputime cputime;
1443
1444                /*
1445                 * This is the record for the group leader.  It shows the
1446                 * group-wide total, not its individual thread total.
1447                 */
1448                thread_group_cputime(p, &cputime);
1449                prstatus->pr_utime = ns_to_timeval(cputime.utime);
1450                prstatus->pr_stime = ns_to_timeval(cputime.stime);
1451        } else {
1452                u64 utime, stime;
1453
1454                task_cputime(p, &utime, &stime);
1455                prstatus->pr_utime = ns_to_timeval(utime);
1456                prstatus->pr_stime = ns_to_timeval(stime);
1457        }
1458
1459        prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1460        prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1461}
1462
1463static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1464                       struct mm_struct *mm)
1465{
1466        const struct cred *cred;
1467        unsigned int i, len;
1468        
1469        /* first copy the parameters from user space */
1470        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1471
1472        len = mm->arg_end - mm->arg_start;
1473        if (len >= ELF_PRARGSZ)
1474                len = ELF_PRARGSZ-1;
1475        if (copy_from_user(&psinfo->pr_psargs,
1476                           (const char __user *)mm->arg_start, len))
1477                return -EFAULT;
1478        for(i = 0; i < len; i++)
1479                if (psinfo->pr_psargs[i] == 0)
1480                        psinfo->pr_psargs[i] = ' ';
1481        psinfo->pr_psargs[len] = 0;
1482
1483        rcu_read_lock();
1484        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1485        rcu_read_unlock();
1486        psinfo->pr_pid = task_pid_vnr(p);
1487        psinfo->pr_pgrp = task_pgrp_vnr(p);
1488        psinfo->pr_sid = task_session_vnr(p);
1489
1490        i = p->state ? ffz(~p->state) + 1 : 0;
1491        psinfo->pr_state = i;
1492        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1493        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1494        psinfo->pr_nice = task_nice(p);
1495        psinfo->pr_flag = p->flags;
1496        rcu_read_lock();
1497        cred = __task_cred(p);
1498        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1499        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1500        rcu_read_unlock();
1501        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1502        
1503        return 0;
1504}
1505
1506static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1507{
1508        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1509        int i = 0;
1510        do
1511                i += 2;
1512        while (auxv[i - 2] != AT_NULL);
1513        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1514}
1515
1516static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1517                const siginfo_t *siginfo)
1518{
1519        mm_segment_t old_fs = get_fs();
1520        set_fs(KERNEL_DS);
1521        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1522        set_fs(old_fs);
1523        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1524}
1525
1526#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1527/*
1528 * Format of NT_FILE note:
1529 *
1530 * long count     -- how many files are mapped
1531 * long page_size -- units for file_ofs
1532 * array of [COUNT] elements of
1533 *   long start
1534 *   long end
1535 *   long file_ofs
1536 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1537 */
1538static int fill_files_note(struct memelfnote *note)
1539{
1540        struct vm_area_struct *vma;
1541        unsigned count, size, names_ofs, remaining, n;
1542        user_long_t *data;
1543        user_long_t *start_end_ofs;
1544        char *name_base, *name_curpos;
1545
1546        /* *Estimated* file count and total data size needed */
1547        count = current->mm->map_count;
1548        size = count * 64;
1549
1550        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1551 alloc:
1552        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1553                return -EINVAL;
1554        size = round_up(size, PAGE_SIZE);
1555        data = vmalloc(size);
1556        if (!data)
1557                return -ENOMEM;
1558
1559        start_end_ofs = data + 2;
1560        name_base = name_curpos = ((char *)data) + names_ofs;
1561        remaining = size - names_ofs;
1562        count = 0;
1563        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1564                struct file *file;
1565                const char *filename;
1566
1567                file = vma->vm_file;
1568                if (!file)
1569                        continue;
1570                filename = file_path(file, name_curpos, remaining);
1571                if (IS_ERR(filename)) {
1572                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1573                                vfree(data);
1574                                size = size * 5 / 4;
1575                                goto alloc;
1576                        }
1577                        continue;
1578                }
1579
1580                /* file_path() fills at the end, move name down */
1581                /* n = strlen(filename) + 1: */
1582                n = (name_curpos + remaining) - filename;
1583                remaining = filename - name_curpos;
1584                memmove(name_curpos, filename, n);
1585                name_curpos += n;
1586
1587                *start_end_ofs++ = vma->vm_start;
1588                *start_end_ofs++ = vma->vm_end;
1589                *start_end_ofs++ = vma->vm_pgoff;
1590                count++;
1591        }
1592
1593        /* Now we know exact count of files, can store it */
1594        data[0] = count;
1595        data[1] = PAGE_SIZE;
1596        /*
1597         * Count usually is less than current->mm->map_count,
1598         * we need to move filenames down.
1599         */
1600        n = current->mm->map_count - count;
1601        if (n != 0) {
1602                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1603                memmove(name_base - shift_bytes, name_base,
1604                        name_curpos - name_base);
1605                name_curpos -= shift_bytes;
1606        }
1607
1608        size = name_curpos - (char *)data;
1609        fill_note(note, "CORE", NT_FILE, size, data);
1610        return 0;
1611}
1612
1613#ifdef CORE_DUMP_USE_REGSET
1614#include <linux/regset.h>
1615
1616struct elf_thread_core_info {
1617        struct elf_thread_core_info *next;
1618        struct task_struct *task;
1619        struct elf_prstatus prstatus;
1620        struct memelfnote notes[0];
1621};
1622
1623struct elf_note_info {
1624        struct elf_thread_core_info *thread;
1625        struct memelfnote psinfo;
1626        struct memelfnote signote;
1627        struct memelfnote auxv;
1628        struct memelfnote files;
1629        user_siginfo_t csigdata;
1630        size_t size;
1631        int thread_notes;
1632};
1633
1634/*
1635 * When a regset has a writeback hook, we call it on each thread before
1636 * dumping user memory.  On register window machines, this makes sure the
1637 * user memory backing the register data is up to date before we read it.
1638 */
1639static void do_thread_regset_writeback(struct task_struct *task,
1640                                       const struct user_regset *regset)
1641{
1642        if (regset->writeback)
1643                regset->writeback(task, regset, 1);
1644}
1645
1646#ifndef PRSTATUS_SIZE
1647#define PRSTATUS_SIZE(S, R) sizeof(S)
1648#endif
1649
1650#ifndef SET_PR_FPVALID
1651#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1652#endif
1653
1654static int fill_thread_core_info(struct elf_thread_core_info *t,
1655                                 const struct user_regset_view *view,
1656                                 long signr, size_t *total)
1657{
1658        unsigned int i;
1659        unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1660
1661        /*
1662         * NT_PRSTATUS is the one special case, because the regset data
1663         * goes into the pr_reg field inside the note contents, rather
1664         * than being the whole note contents.  We fill the reset in here.
1665         * We assume that regset 0 is NT_PRSTATUS.
1666         */
1667        fill_prstatus(&t->prstatus, t->task, signr);
1668        (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1669                                    &t->prstatus.pr_reg, NULL);
1670
1671        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1672                  PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1673        *total += notesize(&t->notes[0]);
1674
1675        do_thread_regset_writeback(t->task, &view->regsets[0]);
1676
1677        /*
1678         * Each other regset might generate a note too.  For each regset
1679         * that has no core_note_type or is inactive, we leave t->notes[i]
1680         * all zero and we'll know to skip writing it later.
1681         */
1682        for (i = 1; i < view->n; ++i) {
1683                const struct user_regset *regset = &view->regsets[i];
1684                do_thread_regset_writeback(t->task, regset);
1685                if (regset->core_note_type && regset->get &&
1686                    (!regset->active || regset->active(t->task, regset))) {
1687                        int ret;
1688                        size_t size = regset->n * regset->size;
1689                        void *data = kmalloc(size, GFP_KERNEL);
1690                        if (unlikely(!data))
1691                                return 0;
1692                        ret = regset->get(t->task, regset,
1693                                          0, size, data, NULL);
1694                        if (unlikely(ret))
1695                                kfree(data);
1696                        else {
1697                                if (regset->core_note_type != NT_PRFPREG)
1698                                        fill_note(&t->notes[i], "LINUX",
1699                                                  regset->core_note_type,
1700                                                  size, data);
1701                                else {
1702                                        SET_PR_FPVALID(&t->prstatus,
1703                                                        1, regset_size);
1704                                        fill_note(&t->notes[i], "CORE",
1705                                                  NT_PRFPREG, size, data);
1706                                }
1707                                *total += notesize(&t->notes[i]);
1708                        }
1709                }
1710        }
1711
1712        return 1;
1713}
1714
1715static int fill_note_info(struct elfhdr *elf, int phdrs,
1716                          struct elf_note_info *info,
1717                          const siginfo_t *siginfo, struct pt_regs *regs)
1718{
1719        struct task_struct *dump_task = current;
1720        const struct user_regset_view *view = task_user_regset_view(dump_task);
1721        struct elf_thread_core_info *t;
1722        struct elf_prpsinfo *psinfo;
1723        struct core_thread *ct;
1724        unsigned int i;
1725
1726        info->size = 0;
1727        info->thread = NULL;
1728
1729        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1730        if (psinfo == NULL) {
1731                info->psinfo.data = NULL; /* So we don't free this wrongly */
1732                return 0;
1733        }
1734
1735        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1736
1737        /*
1738         * Figure out how many notes we're going to need for each thread.
1739         */
1740        info->thread_notes = 0;
1741        for (i = 0; i < view->n; ++i)
1742                if (view->regsets[i].core_note_type != 0)
1743                        ++info->thread_notes;
1744
1745        /*
1746         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1747         * since it is our one special case.
1748         */
1749        if (unlikely(info->thread_notes == 0) ||
1750            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1751                WARN_ON(1);
1752                return 0;
1753        }
1754
1755        /*
1756         * Initialize the ELF file header.
1757         */
1758        fill_elf_header(elf, phdrs,
1759                        view->e_machine, view->e_flags);
1760
1761        /*
1762         * Allocate a structure for each thread.
1763         */
1764        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1765                t = kzalloc(offsetof(struct elf_thread_core_info,
1766                                     notes[info->thread_notes]),
1767                            GFP_KERNEL);
1768                if (unlikely(!t))
1769                        return 0;
1770
1771                t->task = ct->task;
1772                if (ct->task == dump_task || !info->thread) {
1773                        t->next = info->thread;
1774                        info->thread = t;
1775                } else {
1776                        /*
1777                         * Make sure to keep the original task at
1778                         * the head of the list.
1779                         */
1780                        t->next = info->thread->next;
1781                        info->thread->next = t;
1782                }
1783        }
1784
1785        /*
1786         * Now fill in each thread's information.
1787         */
1788        for (t = info->thread; t != NULL; t = t->next)
1789                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1790                        return 0;
1791
1792        /*
1793         * Fill in the two process-wide notes.
1794         */
1795        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1796        info->size += notesize(&info->psinfo);
1797
1798        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1799        info->size += notesize(&info->signote);
1800
1801        fill_auxv_note(&info->auxv, current->mm);
1802        info->size += notesize(&info->auxv);
1803
1804        if (fill_files_note(&info->files) == 0)
1805                info->size += notesize(&info->files);
1806
1807        return 1;
1808}
1809
1810static size_t get_note_info_size(struct elf_note_info *info)
1811{
1812        return info->size;
1813}
1814
1815/*
1816 * Write all the notes for each thread.  When writing the first thread, the
1817 * process-wide notes are interleaved after the first thread-specific note.
1818 */
1819static int write_note_info(struct elf_note_info *info,
1820                           struct coredump_params *cprm)
1821{
1822        bool first = true;
1823        struct elf_thread_core_info *t = info->thread;
1824
1825        do {
1826                int i;
1827
1828                if (!writenote(&t->notes[0], cprm))
1829                        return 0;
1830
1831                if (first && !writenote(&info->psinfo, cprm))
1832                        return 0;
1833                if (first && !writenote(&info->signote, cprm))
1834                        return 0;
1835                if (first && !writenote(&info->auxv, cprm))
1836                        return 0;
1837                if (first && info->files.data &&
1838                                !writenote(&info->files, cprm))
1839                        return 0;
1840
1841                for (i = 1; i < info->thread_notes; ++i)
1842                        if (t->notes[i].data &&
1843                            !writenote(&t->notes[i], cprm))
1844                                return 0;
1845
1846                first = false;
1847                t = t->next;
1848        } while (t);
1849
1850        return 1;
1851}
1852
1853static void free_note_info(struct elf_note_info *info)
1854{
1855        struct elf_thread_core_info *threads = info->thread;
1856        while (threads) {
1857                unsigned int i;
1858                struct elf_thread_core_info *t = threads;
1859                threads = t->next;
1860                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1861                for (i = 1; i < info->thread_notes; ++i)
1862                        kfree(t->notes[i].data);
1863                kfree(t);
1864        }
1865        kfree(info->psinfo.data);
1866        vfree(info->files.data);
1867}
1868
1869#else
1870
1871/* Here is the structure in which status of each thread is captured. */
1872struct elf_thread_status
1873{
1874        struct list_head list;
1875        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1876        elf_fpregset_t fpu;             /* NT_PRFPREG */
1877        struct task_struct *thread;
1878#ifdef ELF_CORE_COPY_XFPREGS
1879        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1880#endif
1881        struct memelfnote notes[3];
1882        int num_notes;
1883};
1884
1885/*
1886 * In order to add the specific thread information for the elf file format,
1887 * we need to keep a linked list of every threads pr_status and then create
1888 * a single section for them in the final core file.
1889 */
1890static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1891{
1892        int sz = 0;
1893        struct task_struct *p = t->thread;
1894        t->num_notes = 0;
1895
1896        fill_prstatus(&t->prstatus, p, signr);
1897        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1898        
1899        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1900                  &(t->prstatus));
1901        t->num_notes++;
1902        sz += notesize(&t->notes[0]);
1903
1904        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1905                                                                &t->fpu))) {
1906                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1907                          &(t->fpu));
1908                t->num_notes++;
1909                sz += notesize(&t->notes[1]);
1910        }
1911
1912#ifdef ELF_CORE_COPY_XFPREGS
1913        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1914                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1915                          sizeof(t->xfpu), &t->xfpu);
1916                t->num_notes++;
1917                sz += notesize(&t->notes[2]);
1918        }
1919#endif  
1920        return sz;
1921}
1922
1923struct elf_note_info {
1924        struct memelfnote *notes;
1925        struct memelfnote *notes_files;
1926        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1927        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1928        struct list_head thread_list;
1929        elf_fpregset_t *fpu;
1930#ifdef ELF_CORE_COPY_XFPREGS
1931        elf_fpxregset_t *xfpu;
1932#endif
1933        user_siginfo_t csigdata;
1934        int thread_status_size;
1935        int numnote;
1936};
1937
1938static int elf_note_info_init(struct elf_note_info *info)
1939{
1940        memset(info, 0, sizeof(*info));
1941        INIT_LIST_HEAD(&info->thread_list);
1942
1943        /* Allocate space for ELF notes */
1944        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1945        if (!info->notes)
1946                return 0;
1947        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1948        if (!info->psinfo)
1949                return 0;
1950        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1951        if (!info->prstatus)
1952                return 0;
1953        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1954        if (!info->fpu)
1955                return 0;
1956#ifdef ELF_CORE_COPY_XFPREGS
1957        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1958        if (!info->xfpu)
1959                return 0;
1960#endif
1961        return 1;
1962}
1963
1964static int fill_note_info(struct elfhdr *elf, int phdrs,
1965                          struct elf_note_info *info,
1966                          const siginfo_t *siginfo, struct pt_regs *regs)
1967{
1968        struct list_head *t;
1969        struct core_thread *ct;
1970        struct elf_thread_status *ets;
1971
1972        if (!elf_note_info_init(info))
1973                return 0;
1974
1975        for (ct = current->mm->core_state->dumper.next;
1976                                        ct; ct = ct->next) {
1977                ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1978                if (!ets)
1979                        return 0;
1980
1981                ets->thread = ct->task;
1982                list_add(&ets->list, &info->thread_list);
1983        }
1984
1985        list_for_each(t, &info->thread_list) {
1986                int sz;
1987
1988                ets = list_entry(t, struct elf_thread_status, list);
1989                sz = elf_dump_thread_status(siginfo->si_signo, ets);
1990                info->thread_status_size += sz;
1991        }
1992        /* now collect the dump for the current */
1993        memset(info->prstatus, 0, sizeof(*info->prstatus));
1994        fill_prstatus(info->prstatus, current, siginfo->si_signo);
1995        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1996
1997        /* Set up header */
1998        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1999
2000        /*
2001         * Set up the notes in similar form to SVR4 core dumps made
2002         * with info from their /proc.
2003         */
2004
2005        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2006                  sizeof(*info->prstatus), info->prstatus);
2007        fill_psinfo(info->psinfo, current->group_leader, current->mm);
2008        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2009                  sizeof(*info->psinfo), info->psinfo);
2010
2011        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2012        fill_auxv_note(info->notes + 3, current->mm);
2013        info->numnote = 4;
2014
2015        if (fill_files_note(info->notes + info->numnote) == 0) {
2016                info->notes_files = info->notes + info->numnote;
2017                info->numnote++;
2018        }
2019
2020        /* Try to dump the FPU. */
2021        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2022                                                               info->fpu);
2023        if (info->prstatus->pr_fpvalid)
2024                fill_note(info->notes + info->numnote++,
2025                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2026#ifdef ELF_CORE_COPY_XFPREGS
2027        if (elf_core_copy_task_xfpregs(current, info->xfpu))
2028                fill_note(info->notes + info->numnote++,
2029                          "LINUX", ELF_CORE_XFPREG_TYPE,
2030                          sizeof(*info->xfpu), info->xfpu);
2031#endif
2032
2033        return 1;
2034}
2035
2036static size_t get_note_info_size(struct elf_note_info *info)
2037{
2038        int sz = 0;
2039        int i;
2040
2041        for (i = 0; i < info->numnote; i++)
2042                sz += notesize(info->notes + i);
2043
2044        sz += info->thread_status_size;
2045
2046        return sz;
2047}
2048
2049static int write_note_info(struct elf_note_info *info,
2050                           struct coredump_params *cprm)
2051{
2052        int i;
2053        struct list_head *t;
2054
2055        for (i = 0; i < info->numnote; i++)
2056                if (!writenote(info->notes + i, cprm))
2057                        return 0;
2058
2059        /* write out the thread status notes section */
2060        list_for_each(t, &info->thread_list) {
2061                struct elf_thread_status *tmp =
2062                                list_entry(t, struct elf_thread_status, list);
2063
2064                for (i = 0; i < tmp->num_notes; i++)
2065                        if (!writenote(&tmp->notes[i], cprm))
2066                                return 0;
2067        }
2068
2069        return 1;
2070}
2071
2072static void free_note_info(struct elf_note_info *info)
2073{
2074        while (!list_empty(&info->thread_list)) {
2075                struct list_head *tmp = info->thread_list.next;
2076                list_del(tmp);
2077                kfree(list_entry(tmp, struct elf_thread_status, list));
2078        }
2079
2080        /* Free data possibly allocated by fill_files_note(): */
2081        if (info->notes_files)
2082                vfree(info->notes_files->data);
2083
2084        kfree(info->prstatus);
2085        kfree(info->psinfo);
2086        kfree(info->notes);
2087        kfree(info->fpu);
2088#ifdef ELF_CORE_COPY_XFPREGS
2089        kfree(info->xfpu);
2090#endif
2091}
2092
2093#endif
2094
2095static struct vm_area_struct *first_vma(struct task_struct *tsk,
2096                                        struct vm_area_struct *gate_vma)
2097{
2098        struct vm_area_struct *ret = tsk->mm->mmap;
2099
2100        if (ret)
2101                return ret;
2102        return gate_vma;
2103}
2104/*
2105 * Helper function for iterating across a vma list.  It ensures that the caller
2106 * will visit `gate_vma' prior to terminating the search.
2107 */
2108static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2109                                        struct vm_area_struct *gate_vma)
2110{
2111        struct vm_area_struct *ret;
2112
2113        ret = this_vma->vm_next;
2114        if (ret)
2115                return ret;
2116        if (this_vma == gate_vma)
2117                return NULL;
2118        return gate_vma;
2119}
2120
2121static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2122                             elf_addr_t e_shoff, int segs)
2123{
2124        elf->e_shoff = e_shoff;
2125        elf->e_shentsize = sizeof(*shdr4extnum);
2126        elf->e_shnum = 1;
2127        elf->e_shstrndx = SHN_UNDEF;
2128
2129        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2130
2131        shdr4extnum->sh_type = SHT_NULL;
2132        shdr4extnum->sh_size = elf->e_shnum;
2133        shdr4extnum->sh_link = elf->e_shstrndx;
2134        shdr4extnum->sh_info = segs;
2135}
2136
2137/*
2138 * Actual dumper
2139 *
2140 * This is a two-pass process; first we find the offsets of the bits,
2141 * and then they are actually written out.  If we run out of core limit
2142 * we just truncate.
2143 */
2144static int elf_core_dump(struct coredump_params *cprm)
2145{
2146        int has_dumped = 0;
2147        mm_segment_t fs;
2148        int segs, i;
2149        size_t vma_data_size = 0;
2150        struct vm_area_struct *vma, *gate_vma;
2151        struct elfhdr *elf = NULL;
2152        loff_t offset = 0, dataoff;
2153        struct elf_note_info info = { };
2154        struct elf_phdr *phdr4note = NULL;
2155        struct elf_shdr *shdr4extnum = NULL;
2156        Elf_Half e_phnum;
2157        elf_addr_t e_shoff;
2158        elf_addr_t *vma_filesz = NULL;
2159
2160        /*
2161         * We no longer stop all VM operations.
2162         * 
2163         * This is because those proceses that could possibly change map_count
2164         * or the mmap / vma pages are now blocked in do_exit on current
2165         * finishing this core dump.
2166         *
2167         * Only ptrace can touch these memory addresses, but it doesn't change
2168         * the map_count or the pages allocated. So no possibility of crashing
2169         * exists while dumping the mm->vm_next areas to the core file.
2170         */
2171  
2172        /* alloc memory for large data structures: too large to be on stack */
2173        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2174        if (!elf)
2175                goto out;
2176        /*
2177         * The number of segs are recored into ELF header as 16bit value.
2178         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2179         */
2180        segs = current->mm->map_count;
2181        segs += elf_core_extra_phdrs();
2182
2183        gate_vma = get_gate_vma(current->mm);
2184        if (gate_vma != NULL)
2185                segs++;
2186
2187        /* for notes section */
2188        segs++;
2189
2190        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2191         * this, kernel supports extended numbering. Have a look at
2192         * include/linux/elf.h for further information. */
2193        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2194
2195        /*
2196         * Collect all the non-memory information about the process for the
2197         * notes.  This also sets up the file header.
2198         */
2199        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2200                goto cleanup;
2201
2202        has_dumped = 1;
2203
2204        fs = get_fs();
2205        set_fs(KERNEL_DS);
2206
2207        offset += sizeof(*elf);                         /* Elf header */
2208        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2209
2210        /* Write notes phdr entry */
2211        {
2212                size_t sz = get_note_info_size(&info);
2213
2214                sz += elf_coredump_extra_notes_size();
2215
2216                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2217                if (!phdr4note)
2218                        goto end_coredump;
2219
2220                fill_elf_note_phdr(phdr4note, sz, offset);
2221                offset += sz;
2222        }
2223
2224        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2225
2226        if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2227                goto end_coredump;
2228        vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2229        if (!vma_filesz)
2230                goto end_coredump;
2231
2232        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2233                        vma = next_vma(vma, gate_vma)) {
2234                unsigned long dump_size;
2235
2236                dump_size = vma_dump_size(vma, cprm->mm_flags);
2237                vma_filesz[i++] = dump_size;
2238                vma_data_size += dump_size;
2239        }
2240
2241        offset += vma_data_size;
2242        offset += elf_core_extra_data_size();
2243        e_shoff = offset;
2244
2245        if (e_phnum == PN_XNUM) {
2246                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2247                if (!shdr4extnum)
2248                        goto end_coredump;
2249                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2250        }
2251
2252        offset = dataoff;
2253
2254        if (!dump_emit(cprm, elf, sizeof(*elf)))
2255                goto end_coredump;
2256
2257        if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2258                goto end_coredump;
2259
2260        /* Write program headers for segments dump */
2261        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2262                        vma = next_vma(vma, gate_vma)) {
2263                struct elf_phdr phdr;
2264
2265                phdr.p_type = PT_LOAD;
2266                phdr.p_offset = offset;
2267                phdr.p_vaddr = vma->vm_start;
2268                phdr.p_paddr = 0;
2269                phdr.p_filesz = vma_filesz[i++];
2270                phdr.p_memsz = vma->vm_end - vma->vm_start;
2271                offset += phdr.p_filesz;
2272                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2273                if (vma->vm_flags & VM_WRITE)
2274                        phdr.p_flags |= PF_W;
2275                if (vma->vm_flags & VM_EXEC)
2276                        phdr.p_flags |= PF_X;
2277                phdr.p_align = ELF_EXEC_PAGESIZE;
2278
2279                if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2280                        goto end_coredump;
2281        }
2282
2283        if (!elf_core_write_extra_phdrs(cprm, offset))
2284                goto end_coredump;
2285
2286        /* write out the notes section */
2287        if (!write_note_info(&info, cprm))
2288                goto end_coredump;
2289
2290        if (elf_coredump_extra_notes_write(cprm))
2291                goto end_coredump;
2292
2293        /* Align to page */
2294        if (!dump_skip(cprm, dataoff - cprm->pos))
2295                goto end_coredump;
2296
2297        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2298                        vma = next_vma(vma, gate_vma)) {
2299                unsigned long addr;
2300                unsigned long end;
2301
2302                end = vma->vm_start + vma_filesz[i++];
2303
2304                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2305                        struct page *page;
2306                        int stop;
2307
2308                        page = get_dump_page(addr);
2309                        if (page) {
2310                                void *kaddr = kmap(page);
2311                                stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2312                                kunmap(page);
2313                                put_page(page);
2314                        } else
2315                                stop = !dump_skip(cprm, PAGE_SIZE);
2316                        if (stop)
2317                                goto end_coredump;
2318                }
2319        }
2320        dump_truncate(cprm);
2321
2322        if (!elf_core_write_extra_data(cprm))
2323                goto end_coredump;
2324
2325        if (e_phnum == PN_XNUM) {
2326                if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2327                        goto end_coredump;
2328        }
2329
2330end_coredump:
2331        set_fs(fs);
2332
2333cleanup:
2334        free_note_info(&info);
2335        kfree(shdr4extnum);
2336        vfree(vma_filesz);
2337        kfree(phdr4note);
2338        kfree(elf);
2339out:
2340        return has_dumped;
2341}
2342
2343#endif          /* CONFIG_ELF_CORE */
2344
2345static int __init init_elf_binfmt(void)
2346{
2347        register_binfmt(&elf_format);
2348        return 0;
2349}
2350
2351static void __exit exit_elf_binfmt(void)
2352{
2353        /* Remove the COFF and ELF loaders. */
2354        unregister_binfmt(&elf_format);
2355}
2356
2357core_initcall(init_elf_binfmt);
2358module_exit(exit_elf_binfmt);
2359MODULE_LICENSE("GPL");
2360