linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/utsname.h>
  35#include <linux/coredump.h>
  36#include <linux/sched.h>
  37#include <asm/uaccess.h>
  38#include <asm/param.h>
  39#include <asm/page.h>
  40
  41#ifndef user_long_t
  42#define user_long_t long
  43#endif
  44#ifndef user_siginfo_t
  45#define user_siginfo_t siginfo_t
  46#endif
  47
  48static int load_elf_binary(struct linux_binprm *bprm);
  49static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  50                                int, int, unsigned long);
  51
  52#ifdef CONFIG_USELIB
  53static int load_elf_library(struct file *);
  54#else
  55#define load_elf_library NULL
  56#endif
  57
  58/*
  59 * If we don't support core dumping, then supply a NULL so we
  60 * don't even try.
  61 */
  62#ifdef CONFIG_ELF_CORE
  63static int elf_core_dump(struct coredump_params *cprm);
  64#else
  65#define elf_core_dump   NULL
  66#endif
  67
  68#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  69#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  70#else
  71#define ELF_MIN_ALIGN   PAGE_SIZE
  72#endif
  73
  74#ifndef ELF_CORE_EFLAGS
  75#define ELF_CORE_EFLAGS 0
  76#endif
  77
  78#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  79#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  80#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  81
  82static struct linux_binfmt elf_format = {
  83        .module         = THIS_MODULE,
  84        .load_binary    = load_elf_binary,
  85        .load_shlib     = load_elf_library,
  86        .core_dump      = elf_core_dump,
  87        .min_coredump   = ELF_EXEC_PAGESIZE,
  88};
  89
  90#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  91
  92static int set_brk(unsigned long start, unsigned long end)
  93{
  94        start = ELF_PAGEALIGN(start);
  95        end = ELF_PAGEALIGN(end);
  96        if (end > start) {
  97                unsigned long addr;
  98                addr = vm_brk(start, end - start);
  99                if (BAD_ADDR(addr))
 100                        return addr;
 101        }
 102        current->mm->start_brk = current->mm->brk = end;
 103        return 0;
 104}
 105
 106/* We need to explicitly zero any fractional pages
 107   after the data section (i.e. bss).  This would
 108   contain the junk from the file that should not
 109   be in memory
 110 */
 111static int padzero(unsigned long elf_bss)
 112{
 113        unsigned long nbyte;
 114
 115        nbyte = ELF_PAGEOFFSET(elf_bss);
 116        if (nbyte) {
 117                nbyte = ELF_MIN_ALIGN - nbyte;
 118                if (clear_user((void __user *) elf_bss, nbyte))
 119                        return -EFAULT;
 120        }
 121        return 0;
 122}
 123
 124/* Let's use some macros to make this stack manipulation a little clearer */
 125#ifdef CONFIG_STACK_GROWSUP
 126#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 127#define STACK_ROUND(sp, items) \
 128        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 129#define STACK_ALLOC(sp, len) ({ \
 130        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 131        old_sp; })
 132#else
 133#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 134#define STACK_ROUND(sp, items) \
 135        (((unsigned long) (sp - items)) &~ 15UL)
 136#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 137#endif
 138
 139#ifndef ELF_BASE_PLATFORM
 140/*
 141 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 142 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 143 * will be copied to the user stack in the same manner as AT_PLATFORM.
 144 */
 145#define ELF_BASE_PLATFORM NULL
 146#endif
 147
 148static int
 149create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 150                unsigned long load_addr, unsigned long interp_load_addr)
 151{
 152        unsigned long p = bprm->p;
 153        int argc = bprm->argc;
 154        int envc = bprm->envc;
 155        elf_addr_t __user *argv;
 156        elf_addr_t __user *envp;
 157        elf_addr_t __user *sp;
 158        elf_addr_t __user *u_platform;
 159        elf_addr_t __user *u_base_platform;
 160        elf_addr_t __user *u_rand_bytes;
 161        const char *k_platform = ELF_PLATFORM;
 162        const char *k_base_platform = ELF_BASE_PLATFORM;
 163        unsigned char k_rand_bytes[16];
 164        int items;
 165        elf_addr_t *elf_info;
 166        int ei_index = 0;
 167        const struct cred *cred = current_cred();
 168        struct vm_area_struct *vma;
 169
 170        /*
 171         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 172         * evictions by the processes running on the same package. One
 173         * thing we can do is to shuffle the initial stack for them.
 174         */
 175
 176        p = arch_align_stack(p);
 177
 178        /*
 179         * If this architecture has a platform capability string, copy it
 180         * to userspace.  In some cases (Sparc), this info is impossible
 181         * for userspace to get any other way, in others (i386) it is
 182         * merely difficult.
 183         */
 184        u_platform = NULL;
 185        if (k_platform) {
 186                size_t len = strlen(k_platform) + 1;
 187
 188                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 189                if (__copy_to_user(u_platform, k_platform, len))
 190                        return -EFAULT;
 191        }
 192
 193        /*
 194         * If this architecture has a "base" platform capability
 195         * string, copy it to userspace.
 196         */
 197        u_base_platform = NULL;
 198        if (k_base_platform) {
 199                size_t len = strlen(k_base_platform) + 1;
 200
 201                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 202                if (__copy_to_user(u_base_platform, k_base_platform, len))
 203                        return -EFAULT;
 204        }
 205
 206        /*
 207         * Generate 16 random bytes for userspace PRNG seeding.
 208         */
 209        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 210        u_rand_bytes = (elf_addr_t __user *)
 211                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 212        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 213                return -EFAULT;
 214
 215        /* Create the ELF interpreter info */
 216        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 217        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 218#define NEW_AUX_ENT(id, val) \
 219        do { \
 220                elf_info[ei_index++] = id; \
 221                elf_info[ei_index++] = val; \
 222        } while (0)
 223
 224#ifdef ARCH_DLINFO
 225        /* 
 226         * ARCH_DLINFO must come first so PPC can do its special alignment of
 227         * AUXV.
 228         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 229         * ARCH_DLINFO changes
 230         */
 231        ARCH_DLINFO;
 232#endif
 233        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 234        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 235        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 236        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 237        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 238        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 239        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 240        NEW_AUX_ENT(AT_FLAGS, 0);
 241        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 242        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 243        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 244        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 245        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 246        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 247        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 248#ifdef ELF_HWCAP2
 249        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 250#endif
 251        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 252        if (k_platform) {
 253                NEW_AUX_ENT(AT_PLATFORM,
 254                            (elf_addr_t)(unsigned long)u_platform);
 255        }
 256        if (k_base_platform) {
 257                NEW_AUX_ENT(AT_BASE_PLATFORM,
 258                            (elf_addr_t)(unsigned long)u_base_platform);
 259        }
 260        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 261                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 262        }
 263#undef NEW_AUX_ENT
 264        /* AT_NULL is zero; clear the rest too */
 265        memset(&elf_info[ei_index], 0,
 266               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 267
 268        /* And advance past the AT_NULL entry.  */
 269        ei_index += 2;
 270
 271        sp = STACK_ADD(p, ei_index);
 272
 273        items = (argc + 1) + (envc + 1) + 1;
 274        bprm->p = STACK_ROUND(sp, items);
 275
 276        /* Point sp at the lowest address on the stack */
 277#ifdef CONFIG_STACK_GROWSUP
 278        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 279        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 280#else
 281        sp = (elf_addr_t __user *)bprm->p;
 282#endif
 283
 284
 285        /*
 286         * Grow the stack manually; some architectures have a limit on how
 287         * far ahead a user-space access may be in order to grow the stack.
 288         */
 289        vma = find_extend_vma(current->mm, bprm->p);
 290        if (!vma)
 291                return -EFAULT;
 292
 293        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 294        if (__put_user(argc, sp++))
 295                return -EFAULT;
 296        argv = sp;
 297        envp = argv + argc + 1;
 298
 299        /* Populate argv and envp */
 300        p = current->mm->arg_end = current->mm->arg_start;
 301        while (argc-- > 0) {
 302                size_t len;
 303                if (__put_user((elf_addr_t)p, argv++))
 304                        return -EFAULT;
 305                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 306                if (!len || len > MAX_ARG_STRLEN)
 307                        return -EINVAL;
 308                p += len;
 309        }
 310        if (__put_user(0, argv))
 311                return -EFAULT;
 312        current->mm->arg_end = current->mm->env_start = p;
 313        while (envc-- > 0) {
 314                size_t len;
 315                if (__put_user((elf_addr_t)p, envp++))
 316                        return -EFAULT;
 317                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 318                if (!len || len > MAX_ARG_STRLEN)
 319                        return -EINVAL;
 320                p += len;
 321        }
 322        if (__put_user(0, envp))
 323                return -EFAULT;
 324        current->mm->env_end = p;
 325
 326        /* Put the elf_info on the stack in the right place.  */
 327        sp = (elf_addr_t __user *)envp + 1;
 328        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 329                return -EFAULT;
 330        return 0;
 331}
 332
 333#ifndef elf_map
 334
 335static unsigned long elf_map(struct file *filep, unsigned long addr,
 336                struct elf_phdr *eppnt, int prot, int type,
 337                unsigned long total_size)
 338{
 339        unsigned long map_addr;
 340        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 341        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 342        addr = ELF_PAGESTART(addr);
 343        size = ELF_PAGEALIGN(size);
 344
 345        /* mmap() will return -EINVAL if given a zero size, but a
 346         * segment with zero filesize is perfectly valid */
 347        if (!size)
 348                return addr;
 349
 350        /*
 351        * total_size is the size of the ELF (interpreter) image.
 352        * The _first_ mmap needs to know the full size, otherwise
 353        * randomization might put this image into an overlapping
 354        * position with the ELF binary image. (since size < total_size)
 355        * So we first map the 'big' image - and unmap the remainder at
 356        * the end. (which unmap is needed for ELF images with holes.)
 357        */
 358        if (total_size) {
 359                total_size = ELF_PAGEALIGN(total_size);
 360                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 361                if (!BAD_ADDR(map_addr))
 362                        vm_munmap(map_addr+size, total_size-size);
 363        } else
 364                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 365
 366        return(map_addr);
 367}
 368
 369#endif /* !elf_map */
 370
 371static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 372{
 373        int i, first_idx = -1, last_idx = -1;
 374
 375        for (i = 0; i < nr; i++) {
 376                if (cmds[i].p_type == PT_LOAD) {
 377                        last_idx = i;
 378                        if (first_idx == -1)
 379                                first_idx = i;
 380                }
 381        }
 382        if (first_idx == -1)
 383                return 0;
 384
 385        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 386                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 387}
 388
 389/**
 390 * load_elf_phdrs() - load ELF program headers
 391 * @elf_ex:   ELF header of the binary whose program headers should be loaded
 392 * @elf_file: the opened ELF binary file
 393 *
 394 * Loads ELF program headers from the binary file elf_file, which has the ELF
 395 * header pointed to by elf_ex, into a newly allocated array. The caller is
 396 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
 397 */
 398static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
 399                                       struct file *elf_file)
 400{
 401        struct elf_phdr *elf_phdata = NULL;
 402        int retval, size, err = -1;
 403
 404        /*
 405         * If the size of this structure has changed, then punt, since
 406         * we will be doing the wrong thing.
 407         */
 408        if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
 409                goto out;
 410
 411        /* Sanity check the number of program headers... */
 412        if (elf_ex->e_phnum < 1 ||
 413                elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 414                goto out;
 415
 416        /* ...and their total size. */
 417        size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
 418        if (size > ELF_MIN_ALIGN)
 419                goto out;
 420
 421        elf_phdata = kmalloc(size, GFP_KERNEL);
 422        if (!elf_phdata)
 423                goto out;
 424
 425        /* Read in the program headers */
 426        retval = kernel_read(elf_file, elf_ex->e_phoff,
 427                             (char *)elf_phdata, size);
 428        if (retval != size) {
 429                err = (retval < 0) ? retval : -EIO;
 430                goto out;
 431        }
 432
 433        /* Success! */
 434        err = 0;
 435out:
 436        if (err) {
 437                kfree(elf_phdata);
 438                elf_phdata = NULL;
 439        }
 440        return elf_phdata;
 441}
 442
 443#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
 444
 445/**
 446 * struct arch_elf_state - arch-specific ELF loading state
 447 *
 448 * This structure is used to preserve architecture specific data during
 449 * the loading of an ELF file, throughout the checking of architecture
 450 * specific ELF headers & through to the point where the ELF load is
 451 * known to be proceeding (ie. SET_PERSONALITY).
 452 *
 453 * This implementation is a dummy for architectures which require no
 454 * specific state.
 455 */
 456struct arch_elf_state {
 457};
 458
 459#define INIT_ARCH_ELF_STATE {}
 460
 461/**
 462 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
 463 * @ehdr:       The main ELF header
 464 * @phdr:       The program header to check
 465 * @elf:        The open ELF file
 466 * @is_interp:  True if the phdr is from the interpreter of the ELF being
 467 *              loaded, else false.
 468 * @state:      Architecture-specific state preserved throughout the process
 469 *              of loading the ELF.
 470 *
 471 * Inspects the program header phdr to validate its correctness and/or
 472 * suitability for the system. Called once per ELF program header in the
 473 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
 474 * interpreter.
 475 *
 476 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 477 *         with that return code.
 478 */
 479static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
 480                                   struct elf_phdr *phdr,
 481                                   struct file *elf, bool is_interp,
 482                                   struct arch_elf_state *state)
 483{
 484        /* Dummy implementation, always proceed */
 485        return 0;
 486}
 487
 488/**
 489 * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
 490 * @ehdr:       The main ELF header
 491 * @has_interp: True if the ELF has an interpreter, else false.
 492 * @state:      Architecture-specific state preserved throughout the process
 493 *              of loading the ELF.
 494 *
 495 * Provides a final opportunity for architecture code to reject the loading
 496 * of the ELF & cause an exec syscall to return an error. This is called after
 497 * all program headers to be checked by arch_elf_pt_proc have been.
 498 *
 499 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
 500 *         with that return code.
 501 */
 502static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
 503                                 struct arch_elf_state *state)
 504{
 505        /* Dummy implementation, always proceed */
 506        return 0;
 507}
 508
 509#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
 510
 511/* This is much more generalized than the library routine read function,
 512   so we keep this separate.  Technically the library read function
 513   is only provided so that we can read a.out libraries that have
 514   an ELF header */
 515
 516static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 517                struct file *interpreter, unsigned long *interp_map_addr,
 518                unsigned long no_base, struct elf_phdr *interp_elf_phdata)
 519{
 520        struct elf_phdr *eppnt;
 521        unsigned long load_addr = 0;
 522        int load_addr_set = 0;
 523        unsigned long last_bss = 0, elf_bss = 0;
 524        unsigned long error = ~0UL;
 525        unsigned long total_size;
 526        int i;
 527
 528        /* First of all, some simple consistency checks */
 529        if (interp_elf_ex->e_type != ET_EXEC &&
 530            interp_elf_ex->e_type != ET_DYN)
 531                goto out;
 532        if (!elf_check_arch(interp_elf_ex))
 533                goto out;
 534        if (!interpreter->f_op->mmap)
 535                goto out;
 536
 537        total_size = total_mapping_size(interp_elf_phdata,
 538                                        interp_elf_ex->e_phnum);
 539        if (!total_size) {
 540                error = -EINVAL;
 541                goto out;
 542        }
 543
 544        eppnt = interp_elf_phdata;
 545        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 546                if (eppnt->p_type == PT_LOAD) {
 547                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 548                        int elf_prot = 0;
 549                        unsigned long vaddr = 0;
 550                        unsigned long k, map_addr;
 551
 552                        if (eppnt->p_flags & PF_R)
 553                                elf_prot = PROT_READ;
 554                        if (eppnt->p_flags & PF_W)
 555                                elf_prot |= PROT_WRITE;
 556                        if (eppnt->p_flags & PF_X)
 557                                elf_prot |= PROT_EXEC;
 558                        vaddr = eppnt->p_vaddr;
 559                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 560                                elf_type |= MAP_FIXED;
 561                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 562                                load_addr = -vaddr;
 563
 564                        map_addr = elf_map(interpreter, load_addr + vaddr,
 565                                        eppnt, elf_prot, elf_type, total_size);
 566                        total_size = 0;
 567                        if (!*interp_map_addr)
 568                                *interp_map_addr = map_addr;
 569                        error = map_addr;
 570                        if (BAD_ADDR(map_addr))
 571                                goto out;
 572
 573                        if (!load_addr_set &&
 574                            interp_elf_ex->e_type == ET_DYN) {
 575                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 576                                load_addr_set = 1;
 577                        }
 578
 579                        /*
 580                         * Check to see if the section's size will overflow the
 581                         * allowed task size. Note that p_filesz must always be
 582                         * <= p_memsize so it's only necessary to check p_memsz.
 583                         */
 584                        k = load_addr + eppnt->p_vaddr;
 585                        if (BAD_ADDR(k) ||
 586                            eppnt->p_filesz > eppnt->p_memsz ||
 587                            eppnt->p_memsz > TASK_SIZE ||
 588                            TASK_SIZE - eppnt->p_memsz < k) {
 589                                error = -ENOMEM;
 590                                goto out;
 591                        }
 592
 593                        /*
 594                         * Find the end of the file mapping for this phdr, and
 595                         * keep track of the largest address we see for this.
 596                         */
 597                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 598                        if (k > elf_bss)
 599                                elf_bss = k;
 600
 601                        /*
 602                         * Do the same thing for the memory mapping - between
 603                         * elf_bss and last_bss is the bss section.
 604                         */
 605                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 606                        if (k > last_bss)
 607                                last_bss = k;
 608                }
 609        }
 610
 611        if (last_bss > elf_bss) {
 612                /*
 613                 * Now fill out the bss section.  First pad the last page up
 614                 * to the page boundary, and then perform a mmap to make sure
 615                 * that there are zero-mapped pages up to and including the
 616                 * last bss page.
 617                 */
 618                if (padzero(elf_bss)) {
 619                        error = -EFAULT;
 620                        goto out;
 621                }
 622
 623                /* What we have mapped so far */
 624                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 625
 626                /* Map the last of the bss segment */
 627                error = vm_brk(elf_bss, last_bss - elf_bss);
 628                if (BAD_ADDR(error))
 629                        goto out;
 630        }
 631
 632        error = load_addr;
 633out:
 634        return error;
 635}
 636
 637/*
 638 * These are the functions used to load ELF style executables and shared
 639 * libraries.  There is no binary dependent code anywhere else.
 640 */
 641
 642#ifndef STACK_RND_MASK
 643#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 644#endif
 645
 646static unsigned long randomize_stack_top(unsigned long stack_top)
 647{
 648        unsigned int random_variable = 0;
 649
 650        if ((current->flags & PF_RANDOMIZE) &&
 651                !(current->personality & ADDR_NO_RANDOMIZE)) {
 652                random_variable = get_random_int() & STACK_RND_MASK;
 653                random_variable <<= PAGE_SHIFT;
 654        }
 655#ifdef CONFIG_STACK_GROWSUP
 656        return PAGE_ALIGN(stack_top) + random_variable;
 657#else
 658        return PAGE_ALIGN(stack_top) - random_variable;
 659#endif
 660}
 661
 662static int load_elf_binary(struct linux_binprm *bprm)
 663{
 664        struct file *interpreter = NULL; /* to shut gcc up */
 665        unsigned long load_addr = 0, load_bias = 0;
 666        int load_addr_set = 0;
 667        char * elf_interpreter = NULL;
 668        unsigned long error;
 669        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
 670        unsigned long elf_bss, elf_brk;
 671        int retval, i;
 672        unsigned long elf_entry;
 673        unsigned long interp_load_addr = 0;
 674        unsigned long start_code, end_code, start_data, end_data;
 675        unsigned long reloc_func_desc __maybe_unused = 0;
 676        int executable_stack = EXSTACK_DEFAULT;
 677        struct pt_regs *regs = current_pt_regs();
 678        struct {
 679                struct elfhdr elf_ex;
 680                struct elfhdr interp_elf_ex;
 681        } *loc;
 682        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
 683
 684        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 685        if (!loc) {
 686                retval = -ENOMEM;
 687                goto out_ret;
 688        }
 689        
 690        /* Get the exec-header */
 691        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 692
 693        retval = -ENOEXEC;
 694        /* First of all, some simple consistency checks */
 695        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 696                goto out;
 697
 698        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 699                goto out;
 700        if (!elf_check_arch(&loc->elf_ex))
 701                goto out;
 702        if (!bprm->file->f_op->mmap)
 703                goto out;
 704
 705        elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
 706        if (!elf_phdata)
 707                goto out;
 708
 709        elf_ppnt = elf_phdata;
 710        elf_bss = 0;
 711        elf_brk = 0;
 712
 713        start_code = ~0UL;
 714        end_code = 0;
 715        start_data = 0;
 716        end_data = 0;
 717
 718        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 719                if (elf_ppnt->p_type == PT_INTERP) {
 720                        /* This is the program interpreter used for
 721                         * shared libraries - for now assume that this
 722                         * is an a.out format binary
 723                         */
 724                        retval = -ENOEXEC;
 725                        if (elf_ppnt->p_filesz > PATH_MAX || 
 726                            elf_ppnt->p_filesz < 2)
 727                                goto out_free_ph;
 728
 729                        retval = -ENOMEM;
 730                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 731                                                  GFP_KERNEL);
 732                        if (!elf_interpreter)
 733                                goto out_free_ph;
 734
 735                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 736                                             elf_interpreter,
 737                                             elf_ppnt->p_filesz);
 738                        if (retval != elf_ppnt->p_filesz) {
 739                                if (retval >= 0)
 740                                        retval = -EIO;
 741                                goto out_free_interp;
 742                        }
 743                        /* make sure path is NULL terminated */
 744                        retval = -ENOEXEC;
 745                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 746                                goto out_free_interp;
 747
 748                        interpreter = open_exec(elf_interpreter);
 749                        retval = PTR_ERR(interpreter);
 750                        if (IS_ERR(interpreter))
 751                                goto out_free_interp;
 752
 753                        /*
 754                         * If the binary is not readable then enforce
 755                         * mm->dumpable = 0 regardless of the interpreter's
 756                         * permissions.
 757                         */
 758                        would_dump(bprm, interpreter);
 759
 760                        retval = kernel_read(interpreter, 0, bprm->buf,
 761                                             BINPRM_BUF_SIZE);
 762                        if (retval != BINPRM_BUF_SIZE) {
 763                                if (retval >= 0)
 764                                        retval = -EIO;
 765                                goto out_free_dentry;
 766                        }
 767
 768                        /* Get the exec headers */
 769                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 770                        break;
 771                }
 772                elf_ppnt++;
 773        }
 774
 775        elf_ppnt = elf_phdata;
 776        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 777                switch (elf_ppnt->p_type) {
 778                case PT_GNU_STACK:
 779                        if (elf_ppnt->p_flags & PF_X)
 780                                executable_stack = EXSTACK_ENABLE_X;
 781                        else
 782                                executable_stack = EXSTACK_DISABLE_X;
 783                        break;
 784
 785                case PT_LOPROC ... PT_HIPROC:
 786                        retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
 787                                                  bprm->file, false,
 788                                                  &arch_state);
 789                        if (retval)
 790                                goto out_free_dentry;
 791                        break;
 792                }
 793
 794        /* Some simple consistency checks for the interpreter */
 795        if (elf_interpreter) {
 796                retval = -ELIBBAD;
 797                /* Not an ELF interpreter */
 798                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 799                        goto out_free_dentry;
 800                /* Verify the interpreter has a valid arch */
 801                if (!elf_check_arch(&loc->interp_elf_ex))
 802                        goto out_free_dentry;
 803
 804                /* Load the interpreter program headers */
 805                interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
 806                                                   interpreter);
 807                if (!interp_elf_phdata)
 808                        goto out_free_dentry;
 809
 810                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
 811                elf_ppnt = interp_elf_phdata;
 812                for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
 813                        switch (elf_ppnt->p_type) {
 814                        case PT_LOPROC ... PT_HIPROC:
 815                                retval = arch_elf_pt_proc(&loc->interp_elf_ex,
 816                                                          elf_ppnt, interpreter,
 817                                                          true, &arch_state);
 818                                if (retval)
 819                                        goto out_free_dentry;
 820                                break;
 821                        }
 822        }
 823
 824        /*
 825         * Allow arch code to reject the ELF at this point, whilst it's
 826         * still possible to return an error to the code that invoked
 827         * the exec syscall.
 828         */
 829        retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
 830        if (retval)
 831                goto out_free_dentry;
 832
 833        /* Flush all traces of the currently running executable */
 834        retval = flush_old_exec(bprm);
 835        if (retval)
 836                goto out_free_dentry;
 837
 838        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 839           may depend on the personality.  */
 840        SET_PERSONALITY2(loc->elf_ex, &arch_state);
 841        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 842                current->personality |= READ_IMPLIES_EXEC;
 843
 844        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 845                current->flags |= PF_RANDOMIZE;
 846
 847        setup_new_exec(bprm);
 848
 849        /* Do this so that we can load the interpreter, if need be.  We will
 850           change some of these later */
 851        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 852                                 executable_stack);
 853        if (retval < 0)
 854                goto out_free_dentry;
 855        
 856        current->mm->start_stack = bprm->p;
 857
 858        /* Now we do a little grungy work by mmapping the ELF image into
 859           the correct location in memory. */
 860        for(i = 0, elf_ppnt = elf_phdata;
 861            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 862                int elf_prot = 0, elf_flags;
 863                unsigned long k, vaddr;
 864
 865                if (elf_ppnt->p_type != PT_LOAD)
 866                        continue;
 867
 868                if (unlikely (elf_brk > elf_bss)) {
 869                        unsigned long nbyte;
 870                    
 871                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 872                           before this one. Map anonymous pages, if needed,
 873                           and clear the area.  */
 874                        retval = set_brk(elf_bss + load_bias,
 875                                         elf_brk + load_bias);
 876                        if (retval)
 877                                goto out_free_dentry;
 878                        nbyte = ELF_PAGEOFFSET(elf_bss);
 879                        if (nbyte) {
 880                                nbyte = ELF_MIN_ALIGN - nbyte;
 881                                if (nbyte > elf_brk - elf_bss)
 882                                        nbyte = elf_brk - elf_bss;
 883                                if (clear_user((void __user *)elf_bss +
 884                                                        load_bias, nbyte)) {
 885                                        /*
 886                                         * This bss-zeroing can fail if the ELF
 887                                         * file specifies odd protections. So
 888                                         * we don't check the return value
 889                                         */
 890                                }
 891                        }
 892                }
 893
 894                if (elf_ppnt->p_flags & PF_R)
 895                        elf_prot |= PROT_READ;
 896                if (elf_ppnt->p_flags & PF_W)
 897                        elf_prot |= PROT_WRITE;
 898                if (elf_ppnt->p_flags & PF_X)
 899                        elf_prot |= PROT_EXEC;
 900
 901                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 902
 903                vaddr = elf_ppnt->p_vaddr;
 904                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 905                        elf_flags |= MAP_FIXED;
 906                } else if (loc->elf_ex.e_type == ET_DYN) {
 907                        /* Try and get dynamic programs out of the way of the
 908                         * default mmap base, as well as whatever program they
 909                         * might try to exec.  This is because the brk will
 910                         * follow the loader, and is not movable.  */
 911#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
 912                        /* Memory randomization might have been switched off
 913                         * in runtime via sysctl or explicit setting of
 914                         * personality flags.
 915                         * If that is the case, retain the original non-zero
 916                         * load_bias value in order to establish proper
 917                         * non-randomized mappings.
 918                         */
 919                        if (current->flags & PF_RANDOMIZE)
 920                                load_bias = 0;
 921                        else
 922                                load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 923#else
 924                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 925#endif
 926                }
 927
 928                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 929                                elf_prot, elf_flags, 0);
 930                if (BAD_ADDR(error)) {
 931                        retval = IS_ERR((void *)error) ?
 932                                PTR_ERR((void*)error) : -EINVAL;
 933                        goto out_free_dentry;
 934                }
 935
 936                if (!load_addr_set) {
 937                        load_addr_set = 1;
 938                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 939                        if (loc->elf_ex.e_type == ET_DYN) {
 940                                load_bias += error -
 941                                             ELF_PAGESTART(load_bias + vaddr);
 942                                load_addr += load_bias;
 943                                reloc_func_desc = load_bias;
 944                        }
 945                }
 946                k = elf_ppnt->p_vaddr;
 947                if (k < start_code)
 948                        start_code = k;
 949                if (start_data < k)
 950                        start_data = k;
 951
 952                /*
 953                 * Check to see if the section's size will overflow the
 954                 * allowed task size. Note that p_filesz must always be
 955                 * <= p_memsz so it is only necessary to check p_memsz.
 956                 */
 957                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 958                    elf_ppnt->p_memsz > TASK_SIZE ||
 959                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 960                        /* set_brk can never work. Avoid overflows. */
 961                        retval = -EINVAL;
 962                        goto out_free_dentry;
 963                }
 964
 965                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 966
 967                if (k > elf_bss)
 968                        elf_bss = k;
 969                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 970                        end_code = k;
 971                if (end_data < k)
 972                        end_data = k;
 973                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 974                if (k > elf_brk)
 975                        elf_brk = k;
 976        }
 977
 978        loc->elf_ex.e_entry += load_bias;
 979        elf_bss += load_bias;
 980        elf_brk += load_bias;
 981        start_code += load_bias;
 982        end_code += load_bias;
 983        start_data += load_bias;
 984        end_data += load_bias;
 985
 986        /* Calling set_brk effectively mmaps the pages that we need
 987         * for the bss and break sections.  We must do this before
 988         * mapping in the interpreter, to make sure it doesn't wind
 989         * up getting placed where the bss needs to go.
 990         */
 991        retval = set_brk(elf_bss, elf_brk);
 992        if (retval)
 993                goto out_free_dentry;
 994        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 995                retval = -EFAULT; /* Nobody gets to see this, but.. */
 996                goto out_free_dentry;
 997        }
 998
 999        if (elf_interpreter) {
1000                unsigned long interp_map_addr = 0;
1001
1002                elf_entry = load_elf_interp(&loc->interp_elf_ex,
1003                                            interpreter,
1004                                            &interp_map_addr,
1005                                            load_bias, interp_elf_phdata);
1006                if (!IS_ERR((void *)elf_entry)) {
1007                        /*
1008                         * load_elf_interp() returns relocation
1009                         * adjustment
1010                         */
1011                        interp_load_addr = elf_entry;
1012                        elf_entry += loc->interp_elf_ex.e_entry;
1013                }
1014                if (BAD_ADDR(elf_entry)) {
1015                        retval = IS_ERR((void *)elf_entry) ?
1016                                        (int)elf_entry : -EINVAL;
1017                        goto out_free_dentry;
1018                }
1019                reloc_func_desc = interp_load_addr;
1020
1021                allow_write_access(interpreter);
1022                fput(interpreter);
1023                kfree(elf_interpreter);
1024        } else {
1025                elf_entry = loc->elf_ex.e_entry;
1026                if (BAD_ADDR(elf_entry)) {
1027                        retval = -EINVAL;
1028                        goto out_free_dentry;
1029                }
1030        }
1031
1032        kfree(interp_elf_phdata);
1033        kfree(elf_phdata);
1034
1035        set_binfmt(&elf_format);
1036
1037#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1038        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1039        if (retval < 0)
1040                goto out;
1041#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1042
1043        install_exec_creds(bprm);
1044        retval = create_elf_tables(bprm, &loc->elf_ex,
1045                          load_addr, interp_load_addr);
1046        if (retval < 0)
1047                goto out;
1048        /* N.B. passed_fileno might not be initialized? */
1049        current->mm->end_code = end_code;
1050        current->mm->start_code = start_code;
1051        current->mm->start_data = start_data;
1052        current->mm->end_data = end_data;
1053        current->mm->start_stack = bprm->p;
1054
1055#ifdef arch_randomize_brk
1056        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1057                current->mm->brk = current->mm->start_brk =
1058                        arch_randomize_brk(current->mm);
1059#ifdef CONFIG_COMPAT_BRK
1060                current->brk_randomized = 1;
1061#endif
1062        }
1063#endif
1064
1065        if (current->personality & MMAP_PAGE_ZERO) {
1066                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1067                   and some applications "depend" upon this behavior.
1068                   Since we do not have the power to recompile these, we
1069                   emulate the SVr4 behavior. Sigh. */
1070                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1071                                MAP_FIXED | MAP_PRIVATE, 0);
1072        }
1073
1074#ifdef ELF_PLAT_INIT
1075        /*
1076         * The ABI may specify that certain registers be set up in special
1077         * ways (on i386 %edx is the address of a DT_FINI function, for
1078         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1079         * that the e_entry field is the address of the function descriptor
1080         * for the startup routine, rather than the address of the startup
1081         * routine itself.  This macro performs whatever initialization to
1082         * the regs structure is required as well as any relocations to the
1083         * function descriptor entries when executing dynamically links apps.
1084         */
1085        ELF_PLAT_INIT(regs, reloc_func_desc);
1086#endif
1087
1088        start_thread(regs, elf_entry, bprm->p);
1089        retval = 0;
1090out:
1091        kfree(loc);
1092out_ret:
1093        return retval;
1094
1095        /* error cleanup */
1096out_free_dentry:
1097        kfree(interp_elf_phdata);
1098        allow_write_access(interpreter);
1099        if (interpreter)
1100                fput(interpreter);
1101out_free_interp:
1102        kfree(elf_interpreter);
1103out_free_ph:
1104        kfree(elf_phdata);
1105        goto out;
1106}
1107
1108#ifdef CONFIG_USELIB
1109/* This is really simpleminded and specialized - we are loading an
1110   a.out library that is given an ELF header. */
1111static int load_elf_library(struct file *file)
1112{
1113        struct elf_phdr *elf_phdata;
1114        struct elf_phdr *eppnt;
1115        unsigned long elf_bss, bss, len;
1116        int retval, error, i, j;
1117        struct elfhdr elf_ex;
1118
1119        error = -ENOEXEC;
1120        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1121        if (retval != sizeof(elf_ex))
1122                goto out;
1123
1124        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1125                goto out;
1126
1127        /* First of all, some simple consistency checks */
1128        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1129            !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1130                goto out;
1131
1132        /* Now read in all of the header information */
1133
1134        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1135        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1136
1137        error = -ENOMEM;
1138        elf_phdata = kmalloc(j, GFP_KERNEL);
1139        if (!elf_phdata)
1140                goto out;
1141
1142        eppnt = elf_phdata;
1143        error = -ENOEXEC;
1144        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1145        if (retval != j)
1146                goto out_free_ph;
1147
1148        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1149                if ((eppnt + i)->p_type == PT_LOAD)
1150                        j++;
1151        if (j != 1)
1152                goto out_free_ph;
1153
1154        while (eppnt->p_type != PT_LOAD)
1155                eppnt++;
1156
1157        /* Now use mmap to map the library into memory. */
1158        error = vm_mmap(file,
1159                        ELF_PAGESTART(eppnt->p_vaddr),
1160                        (eppnt->p_filesz +
1161                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1162                        PROT_READ | PROT_WRITE | PROT_EXEC,
1163                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1164                        (eppnt->p_offset -
1165                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1166        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1167                goto out_free_ph;
1168
1169        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1170        if (padzero(elf_bss)) {
1171                error = -EFAULT;
1172                goto out_free_ph;
1173        }
1174
1175        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1176                            ELF_MIN_ALIGN - 1);
1177        bss = eppnt->p_memsz + eppnt->p_vaddr;
1178        if (bss > len)
1179                vm_brk(len, bss - len);
1180        error = 0;
1181
1182out_free_ph:
1183        kfree(elf_phdata);
1184out:
1185        return error;
1186}
1187#endif /* #ifdef CONFIG_USELIB */
1188
1189#ifdef CONFIG_ELF_CORE
1190/*
1191 * ELF core dumper
1192 *
1193 * Modelled on fs/exec.c:aout_core_dump()
1194 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1195 */
1196
1197/*
1198 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1199 * that are useful for post-mortem analysis are included in every core dump.
1200 * In that way we ensure that the core dump is fully interpretable later
1201 * without matching up the same kernel and hardware config to see what PC values
1202 * meant. These special mappings include - vDSO, vsyscall, and other
1203 * architecture specific mappings
1204 */
1205static bool always_dump_vma(struct vm_area_struct *vma)
1206{
1207        /* Any vsyscall mappings? */
1208        if (vma == get_gate_vma(vma->vm_mm))
1209                return true;
1210
1211        /*
1212         * Assume that all vmas with a .name op should always be dumped.
1213         * If this changes, a new vm_ops field can easily be added.
1214         */
1215        if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1216                return true;
1217
1218        /*
1219         * arch_vma_name() returns non-NULL for special architecture mappings,
1220         * such as vDSO sections.
1221         */
1222        if (arch_vma_name(vma))
1223                return true;
1224
1225        return false;
1226}
1227
1228/*
1229 * Decide what to dump of a segment, part, all or none.
1230 */
1231static unsigned long vma_dump_size(struct vm_area_struct *vma,
1232                                   unsigned long mm_flags)
1233{
1234#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1235
1236        /* always dump the vdso and vsyscall sections */
1237        if (always_dump_vma(vma))
1238                goto whole;
1239
1240        if (vma->vm_flags & VM_DONTDUMP)
1241                return 0;
1242
1243        /* Hugetlb memory check */
1244        if (vma->vm_flags & VM_HUGETLB) {
1245                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1246                        goto whole;
1247                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1248                        goto whole;
1249                return 0;
1250        }
1251
1252        /* Do not dump I/O mapped devices or special mappings */
1253        if (vma->vm_flags & VM_IO)
1254                return 0;
1255
1256        /* By default, dump shared memory if mapped from an anonymous file. */
1257        if (vma->vm_flags & VM_SHARED) {
1258                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1259                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1260                        goto whole;
1261                return 0;
1262        }
1263
1264        /* Dump segments that have been written to.  */
1265        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1266                goto whole;
1267        if (vma->vm_file == NULL)
1268                return 0;
1269
1270        if (FILTER(MAPPED_PRIVATE))
1271                goto whole;
1272
1273        /*
1274         * If this looks like the beginning of a DSO or executable mapping,
1275         * check for an ELF header.  If we find one, dump the first page to
1276         * aid in determining what was mapped here.
1277         */
1278        if (FILTER(ELF_HEADERS) &&
1279            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1280                u32 __user *header = (u32 __user *) vma->vm_start;
1281                u32 word;
1282                mm_segment_t fs = get_fs();
1283                /*
1284                 * Doing it this way gets the constant folded by GCC.
1285                 */
1286                union {
1287                        u32 cmp;
1288                        char elfmag[SELFMAG];
1289                } magic;
1290                BUILD_BUG_ON(SELFMAG != sizeof word);
1291                magic.elfmag[EI_MAG0] = ELFMAG0;
1292                magic.elfmag[EI_MAG1] = ELFMAG1;
1293                magic.elfmag[EI_MAG2] = ELFMAG2;
1294                magic.elfmag[EI_MAG3] = ELFMAG3;
1295                /*
1296                 * Switch to the user "segment" for get_user(),
1297                 * then put back what elf_core_dump() had in place.
1298                 */
1299                set_fs(USER_DS);
1300                if (unlikely(get_user(word, header)))
1301                        word = 0;
1302                set_fs(fs);
1303                if (word == magic.cmp)
1304                        return PAGE_SIZE;
1305        }
1306
1307#undef  FILTER
1308
1309        return 0;
1310
1311whole:
1312        return vma->vm_end - vma->vm_start;
1313}
1314
1315/* An ELF note in memory */
1316struct memelfnote
1317{
1318        const char *name;
1319        int type;
1320        unsigned int datasz;
1321        void *data;
1322};
1323
1324static int notesize(struct memelfnote *en)
1325{
1326        int sz;
1327
1328        sz = sizeof(struct elf_note);
1329        sz += roundup(strlen(en->name) + 1, 4);
1330        sz += roundup(en->datasz, 4);
1331
1332        return sz;
1333}
1334
1335static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1336{
1337        struct elf_note en;
1338        en.n_namesz = strlen(men->name) + 1;
1339        en.n_descsz = men->datasz;
1340        en.n_type = men->type;
1341
1342        return dump_emit(cprm, &en, sizeof(en)) &&
1343            dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1344            dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1345}
1346
1347static void fill_elf_header(struct elfhdr *elf, int segs,
1348                            u16 machine, u32 flags)
1349{
1350        memset(elf, 0, sizeof(*elf));
1351
1352        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1353        elf->e_ident[EI_CLASS] = ELF_CLASS;
1354        elf->e_ident[EI_DATA] = ELF_DATA;
1355        elf->e_ident[EI_VERSION] = EV_CURRENT;
1356        elf->e_ident[EI_OSABI] = ELF_OSABI;
1357
1358        elf->e_type = ET_CORE;
1359        elf->e_machine = machine;
1360        elf->e_version = EV_CURRENT;
1361        elf->e_phoff = sizeof(struct elfhdr);
1362        elf->e_flags = flags;
1363        elf->e_ehsize = sizeof(struct elfhdr);
1364        elf->e_phentsize = sizeof(struct elf_phdr);
1365        elf->e_phnum = segs;
1366
1367        return;
1368}
1369
1370static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1371{
1372        phdr->p_type = PT_NOTE;
1373        phdr->p_offset = offset;
1374        phdr->p_vaddr = 0;
1375        phdr->p_paddr = 0;
1376        phdr->p_filesz = sz;
1377        phdr->p_memsz = 0;
1378        phdr->p_flags = 0;
1379        phdr->p_align = 0;
1380        return;
1381}
1382
1383static void fill_note(struct memelfnote *note, const char *name, int type, 
1384                unsigned int sz, void *data)
1385{
1386        note->name = name;
1387        note->type = type;
1388        note->datasz = sz;
1389        note->data = data;
1390        return;
1391}
1392
1393/*
1394 * fill up all the fields in prstatus from the given task struct, except
1395 * registers which need to be filled up separately.
1396 */
1397static void fill_prstatus(struct elf_prstatus *prstatus,
1398                struct task_struct *p, long signr)
1399{
1400        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1401        prstatus->pr_sigpend = p->pending.signal.sig[0];
1402        prstatus->pr_sighold = p->blocked.sig[0];
1403        rcu_read_lock();
1404        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1405        rcu_read_unlock();
1406        prstatus->pr_pid = task_pid_vnr(p);
1407        prstatus->pr_pgrp = task_pgrp_vnr(p);
1408        prstatus->pr_sid = task_session_vnr(p);
1409        if (thread_group_leader(p)) {
1410                struct task_cputime cputime;
1411
1412                /*
1413                 * This is the record for the group leader.  It shows the
1414                 * group-wide total, not its individual thread total.
1415                 */
1416                thread_group_cputime(p, &cputime);
1417                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1418                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1419        } else {
1420                cputime_t utime, stime;
1421
1422                task_cputime(p, &utime, &stime);
1423                cputime_to_timeval(utime, &prstatus->pr_utime);
1424                cputime_to_timeval(stime, &prstatus->pr_stime);
1425        }
1426        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1427        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1428}
1429
1430static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1431                       struct mm_struct *mm)
1432{
1433        const struct cred *cred;
1434        unsigned int i, len;
1435        
1436        /* first copy the parameters from user space */
1437        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1438
1439        len = mm->arg_end - mm->arg_start;
1440        if (len >= ELF_PRARGSZ)
1441                len = ELF_PRARGSZ-1;
1442        if (copy_from_user(&psinfo->pr_psargs,
1443                           (const char __user *)mm->arg_start, len))
1444                return -EFAULT;
1445        for(i = 0; i < len; i++)
1446                if (psinfo->pr_psargs[i] == 0)
1447                        psinfo->pr_psargs[i] = ' ';
1448        psinfo->pr_psargs[len] = 0;
1449
1450        rcu_read_lock();
1451        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1452        rcu_read_unlock();
1453        psinfo->pr_pid = task_pid_vnr(p);
1454        psinfo->pr_pgrp = task_pgrp_vnr(p);
1455        psinfo->pr_sid = task_session_vnr(p);
1456
1457        i = p->state ? ffz(~p->state) + 1 : 0;
1458        psinfo->pr_state = i;
1459        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1460        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1461        psinfo->pr_nice = task_nice(p);
1462        psinfo->pr_flag = p->flags;
1463        rcu_read_lock();
1464        cred = __task_cred(p);
1465        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1466        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1467        rcu_read_unlock();
1468        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1469        
1470        return 0;
1471}
1472
1473static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1474{
1475        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1476        int i = 0;
1477        do
1478                i += 2;
1479        while (auxv[i - 2] != AT_NULL);
1480        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1481}
1482
1483static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1484                const siginfo_t *siginfo)
1485{
1486        mm_segment_t old_fs = get_fs();
1487        set_fs(KERNEL_DS);
1488        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1489        set_fs(old_fs);
1490        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1491}
1492
1493#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1494/*
1495 * Format of NT_FILE note:
1496 *
1497 * long count     -- how many files are mapped
1498 * long page_size -- units for file_ofs
1499 * array of [COUNT] elements of
1500 *   long start
1501 *   long end
1502 *   long file_ofs
1503 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1504 */
1505static int fill_files_note(struct memelfnote *note)
1506{
1507        struct vm_area_struct *vma;
1508        unsigned count, size, names_ofs, remaining, n;
1509        user_long_t *data;
1510        user_long_t *start_end_ofs;
1511        char *name_base, *name_curpos;
1512
1513        /* *Estimated* file count and total data size needed */
1514        count = current->mm->map_count;
1515        size = count * 64;
1516
1517        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1518 alloc:
1519        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1520                return -EINVAL;
1521        size = round_up(size, PAGE_SIZE);
1522        data = vmalloc(size);
1523        if (!data)
1524                return -ENOMEM;
1525
1526        start_end_ofs = data + 2;
1527        name_base = name_curpos = ((char *)data) + names_ofs;
1528        remaining = size - names_ofs;
1529        count = 0;
1530        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1531                struct file *file;
1532                const char *filename;
1533
1534                file = vma->vm_file;
1535                if (!file)
1536                        continue;
1537                filename = d_path(&file->f_path, name_curpos, remaining);
1538                if (IS_ERR(filename)) {
1539                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1540                                vfree(data);
1541                                size = size * 5 / 4;
1542                                goto alloc;
1543                        }
1544                        continue;
1545                }
1546
1547                /* d_path() fills at the end, move name down */
1548                /* n = strlen(filename) + 1: */
1549                n = (name_curpos + remaining) - filename;
1550                remaining = filename - name_curpos;
1551                memmove(name_curpos, filename, n);
1552                name_curpos += n;
1553
1554                *start_end_ofs++ = vma->vm_start;
1555                *start_end_ofs++ = vma->vm_end;
1556                *start_end_ofs++ = vma->vm_pgoff;
1557                count++;
1558        }
1559
1560        /* Now we know exact count of files, can store it */
1561        data[0] = count;
1562        data[1] = PAGE_SIZE;
1563        /*
1564         * Count usually is less than current->mm->map_count,
1565         * we need to move filenames down.
1566         */
1567        n = current->mm->map_count - count;
1568        if (n != 0) {
1569                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1570                memmove(name_base - shift_bytes, name_base,
1571                        name_curpos - name_base);
1572                name_curpos -= shift_bytes;
1573        }
1574
1575        size = name_curpos - (char *)data;
1576        fill_note(note, "CORE", NT_FILE, size, data);
1577        return 0;
1578}
1579
1580#ifdef CORE_DUMP_USE_REGSET
1581#include <linux/regset.h>
1582
1583struct elf_thread_core_info {
1584        struct elf_thread_core_info *next;
1585        struct task_struct *task;
1586        struct elf_prstatus prstatus;
1587        struct memelfnote notes[0];
1588};
1589
1590struct elf_note_info {
1591        struct elf_thread_core_info *thread;
1592        struct memelfnote psinfo;
1593        struct memelfnote signote;
1594        struct memelfnote auxv;
1595        struct memelfnote files;
1596        user_siginfo_t csigdata;
1597        size_t size;
1598        int thread_notes;
1599};
1600
1601/*
1602 * When a regset has a writeback hook, we call it on each thread before
1603 * dumping user memory.  On register window machines, this makes sure the
1604 * user memory backing the register data is up to date before we read it.
1605 */
1606static void do_thread_regset_writeback(struct task_struct *task,
1607                                       const struct user_regset *regset)
1608{
1609        if (regset->writeback)
1610                regset->writeback(task, regset, 1);
1611}
1612
1613#ifndef PR_REG_SIZE
1614#define PR_REG_SIZE(S) sizeof(S)
1615#endif
1616
1617#ifndef PRSTATUS_SIZE
1618#define PRSTATUS_SIZE(S) sizeof(S)
1619#endif
1620
1621#ifndef PR_REG_PTR
1622#define PR_REG_PTR(S) (&((S)->pr_reg))
1623#endif
1624
1625#ifndef SET_PR_FPVALID
1626#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1627#endif
1628
1629static int fill_thread_core_info(struct elf_thread_core_info *t,
1630                                 const struct user_regset_view *view,
1631                                 long signr, size_t *total)
1632{
1633        unsigned int i;
1634
1635        /*
1636         * NT_PRSTATUS is the one special case, because the regset data
1637         * goes into the pr_reg field inside the note contents, rather
1638         * than being the whole note contents.  We fill the reset in here.
1639         * We assume that regset 0 is NT_PRSTATUS.
1640         */
1641        fill_prstatus(&t->prstatus, t->task, signr);
1642        (void) view->regsets[0].get(t->task, &view->regsets[0],
1643                                    0, PR_REG_SIZE(t->prstatus.pr_reg),
1644                                    PR_REG_PTR(&t->prstatus), NULL);
1645
1646        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1647                  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1648        *total += notesize(&t->notes[0]);
1649
1650        do_thread_regset_writeback(t->task, &view->regsets[0]);
1651
1652        /*
1653         * Each other regset might generate a note too.  For each regset
1654         * that has no core_note_type or is inactive, we leave t->notes[i]
1655         * all zero and we'll know to skip writing it later.
1656         */
1657        for (i = 1; i < view->n; ++i) {
1658                const struct user_regset *regset = &view->regsets[i];
1659                do_thread_regset_writeback(t->task, regset);
1660                if (regset->core_note_type && regset->get &&
1661                    (!regset->active || regset->active(t->task, regset))) {
1662                        int ret;
1663                        size_t size = regset->n * regset->size;
1664                        void *data = kmalloc(size, GFP_KERNEL);
1665                        if (unlikely(!data))
1666                                return 0;
1667                        ret = regset->get(t->task, regset,
1668                                          0, size, data, NULL);
1669                        if (unlikely(ret))
1670                                kfree(data);
1671                        else {
1672                                if (regset->core_note_type != NT_PRFPREG)
1673                                        fill_note(&t->notes[i], "LINUX",
1674                                                  regset->core_note_type,
1675                                                  size, data);
1676                                else {
1677                                        SET_PR_FPVALID(&t->prstatus, 1);
1678                                        fill_note(&t->notes[i], "CORE",
1679                                                  NT_PRFPREG, size, data);
1680                                }
1681                                *total += notesize(&t->notes[i]);
1682                        }
1683                }
1684        }
1685
1686        return 1;
1687}
1688
1689static int fill_note_info(struct elfhdr *elf, int phdrs,
1690                          struct elf_note_info *info,
1691                          const siginfo_t *siginfo, struct pt_regs *regs)
1692{
1693        struct task_struct *dump_task = current;
1694        const struct user_regset_view *view = task_user_regset_view(dump_task);
1695        struct elf_thread_core_info *t;
1696        struct elf_prpsinfo *psinfo;
1697        struct core_thread *ct;
1698        unsigned int i;
1699
1700        info->size = 0;
1701        info->thread = NULL;
1702
1703        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1704        if (psinfo == NULL) {
1705                info->psinfo.data = NULL; /* So we don't free this wrongly */
1706                return 0;
1707        }
1708
1709        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1710
1711        /*
1712         * Figure out how many notes we're going to need for each thread.
1713         */
1714        info->thread_notes = 0;
1715        for (i = 0; i < view->n; ++i)
1716                if (view->regsets[i].core_note_type != 0)
1717                        ++info->thread_notes;
1718
1719        /*
1720         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1721         * since it is our one special case.
1722         */
1723        if (unlikely(info->thread_notes == 0) ||
1724            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1725                WARN_ON(1);
1726                return 0;
1727        }
1728
1729        /*
1730         * Initialize the ELF file header.
1731         */
1732        fill_elf_header(elf, phdrs,
1733                        view->e_machine, view->e_flags);
1734
1735        /*
1736         * Allocate a structure for each thread.
1737         */
1738        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1739                t = kzalloc(offsetof(struct elf_thread_core_info,
1740                                     notes[info->thread_notes]),
1741                            GFP_KERNEL);
1742                if (unlikely(!t))
1743                        return 0;
1744
1745                t->task = ct->task;
1746                if (ct->task == dump_task || !info->thread) {
1747                        t->next = info->thread;
1748                        info->thread = t;
1749                } else {
1750                        /*
1751                         * Make sure to keep the original task at
1752                         * the head of the list.
1753                         */
1754                        t->next = info->thread->next;
1755                        info->thread->next = t;
1756                }
1757        }
1758
1759        /*
1760         * Now fill in each thread's information.
1761         */
1762        for (t = info->thread; t != NULL; t = t->next)
1763                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1764                        return 0;
1765
1766        /*
1767         * Fill in the two process-wide notes.
1768         */
1769        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1770        info->size += notesize(&info->psinfo);
1771
1772        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1773        info->size += notesize(&info->signote);
1774
1775        fill_auxv_note(&info->auxv, current->mm);
1776        info->size += notesize(&info->auxv);
1777
1778        if (fill_files_note(&info->files) == 0)
1779                info->size += notesize(&info->files);
1780
1781        return 1;
1782}
1783
1784static size_t get_note_info_size(struct elf_note_info *info)
1785{
1786        return info->size;
1787}
1788
1789/*
1790 * Write all the notes for each thread.  When writing the first thread, the
1791 * process-wide notes are interleaved after the first thread-specific note.
1792 */
1793static int write_note_info(struct elf_note_info *info,
1794                           struct coredump_params *cprm)
1795{
1796        bool first = true;
1797        struct elf_thread_core_info *t = info->thread;
1798
1799        do {
1800                int i;
1801
1802                if (!writenote(&t->notes[0], cprm))
1803                        return 0;
1804
1805                if (first && !writenote(&info->psinfo, cprm))
1806                        return 0;
1807                if (first && !writenote(&info->signote, cprm))
1808                        return 0;
1809                if (first && !writenote(&info->auxv, cprm))
1810                        return 0;
1811                if (first && info->files.data &&
1812                                !writenote(&info->files, cprm))
1813                        return 0;
1814
1815                for (i = 1; i < info->thread_notes; ++i)
1816                        if (t->notes[i].data &&
1817                            !writenote(&t->notes[i], cprm))
1818                                return 0;
1819
1820                first = false;
1821                t = t->next;
1822        } while (t);
1823
1824        return 1;
1825}
1826
1827static void free_note_info(struct elf_note_info *info)
1828{
1829        struct elf_thread_core_info *threads = info->thread;
1830        while (threads) {
1831                unsigned int i;
1832                struct elf_thread_core_info *t = threads;
1833                threads = t->next;
1834                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1835                for (i = 1; i < info->thread_notes; ++i)
1836                        kfree(t->notes[i].data);
1837                kfree(t);
1838        }
1839        kfree(info->psinfo.data);
1840        vfree(info->files.data);
1841}
1842
1843#else
1844
1845/* Here is the structure in which status of each thread is captured. */
1846struct elf_thread_status
1847{
1848        struct list_head list;
1849        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1850        elf_fpregset_t fpu;             /* NT_PRFPREG */
1851        struct task_struct *thread;
1852#ifdef ELF_CORE_COPY_XFPREGS
1853        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1854#endif
1855        struct memelfnote notes[3];
1856        int num_notes;
1857};
1858
1859/*
1860 * In order to add the specific thread information for the elf file format,
1861 * we need to keep a linked list of every threads pr_status and then create
1862 * a single section for them in the final core file.
1863 */
1864static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1865{
1866        int sz = 0;
1867        struct task_struct *p = t->thread;
1868        t->num_notes = 0;
1869
1870        fill_prstatus(&t->prstatus, p, signr);
1871        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1872        
1873        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1874                  &(t->prstatus));
1875        t->num_notes++;
1876        sz += notesize(&t->notes[0]);
1877
1878        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1879                                                                &t->fpu))) {
1880                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1881                          &(t->fpu));
1882                t->num_notes++;
1883                sz += notesize(&t->notes[1]);
1884        }
1885
1886#ifdef ELF_CORE_COPY_XFPREGS
1887        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1888                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1889                          sizeof(t->xfpu), &t->xfpu);
1890                t->num_notes++;
1891                sz += notesize(&t->notes[2]);
1892        }
1893#endif  
1894        return sz;
1895}
1896
1897struct elf_note_info {
1898        struct memelfnote *notes;
1899        struct memelfnote *notes_files;
1900        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1901        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1902        struct list_head thread_list;
1903        elf_fpregset_t *fpu;
1904#ifdef ELF_CORE_COPY_XFPREGS
1905        elf_fpxregset_t *xfpu;
1906#endif
1907        user_siginfo_t csigdata;
1908        int thread_status_size;
1909        int numnote;
1910};
1911
1912static int elf_note_info_init(struct elf_note_info *info)
1913{
1914        memset(info, 0, sizeof(*info));
1915        INIT_LIST_HEAD(&info->thread_list);
1916
1917        /* Allocate space for ELF notes */
1918        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1919        if (!info->notes)
1920                return 0;
1921        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1922        if (!info->psinfo)
1923                return 0;
1924        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1925        if (!info->prstatus)
1926                return 0;
1927        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1928        if (!info->fpu)
1929                return 0;
1930#ifdef ELF_CORE_COPY_XFPREGS
1931        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1932        if (!info->xfpu)
1933                return 0;
1934#endif
1935        return 1;
1936}
1937
1938static int fill_note_info(struct elfhdr *elf, int phdrs,
1939                          struct elf_note_info *info,
1940                          const siginfo_t *siginfo, struct pt_regs *regs)
1941{
1942        struct list_head *t;
1943        struct core_thread *ct;
1944        struct elf_thread_status *ets;
1945
1946        if (!elf_note_info_init(info))
1947                return 0;
1948
1949        for (ct = current->mm->core_state->dumper.next;
1950                                        ct; ct = ct->next) {
1951                ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1952                if (!ets)
1953                        return 0;
1954
1955                ets->thread = ct->task;
1956                list_add(&ets->list, &info->thread_list);
1957        }
1958
1959        list_for_each(t, &info->thread_list) {
1960                int sz;
1961
1962                ets = list_entry(t, struct elf_thread_status, list);
1963                sz = elf_dump_thread_status(siginfo->si_signo, ets);
1964                info->thread_status_size += sz;
1965        }
1966        /* now collect the dump for the current */
1967        memset(info->prstatus, 0, sizeof(*info->prstatus));
1968        fill_prstatus(info->prstatus, current, siginfo->si_signo);
1969        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1970
1971        /* Set up header */
1972        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1973
1974        /*
1975         * Set up the notes in similar form to SVR4 core dumps made
1976         * with info from their /proc.
1977         */
1978
1979        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1980                  sizeof(*info->prstatus), info->prstatus);
1981        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1982        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1983                  sizeof(*info->psinfo), info->psinfo);
1984
1985        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1986        fill_auxv_note(info->notes + 3, current->mm);
1987        info->numnote = 4;
1988
1989        if (fill_files_note(info->notes + info->numnote) == 0) {
1990                info->notes_files = info->notes + info->numnote;
1991                info->numnote++;
1992        }
1993
1994        /* Try to dump the FPU. */
1995        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1996                                                               info->fpu);
1997        if (info->prstatus->pr_fpvalid)
1998                fill_note(info->notes + info->numnote++,
1999                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2000#ifdef ELF_CORE_COPY_XFPREGS
2001        if (elf_core_copy_task_xfpregs(current, info->xfpu))
2002                fill_note(info->notes + info->numnote++,
2003                          "LINUX", ELF_CORE_XFPREG_TYPE,
2004                          sizeof(*info->xfpu), info->xfpu);
2005#endif
2006
2007        return 1;
2008}
2009
2010static size_t get_note_info_size(struct elf_note_info *info)
2011{
2012        int sz = 0;
2013        int i;
2014
2015        for (i = 0; i < info->numnote; i++)
2016                sz += notesize(info->notes + i);
2017
2018        sz += info->thread_status_size;
2019
2020        return sz;
2021}
2022
2023static int write_note_info(struct elf_note_info *info,
2024                           struct coredump_params *cprm)
2025{
2026        int i;
2027        struct list_head *t;
2028
2029        for (i = 0; i < info->numnote; i++)
2030                if (!writenote(info->notes + i, cprm))
2031                        return 0;
2032
2033        /* write out the thread status notes section */
2034        list_for_each(t, &info->thread_list) {
2035                struct elf_thread_status *tmp =
2036                                list_entry(t, struct elf_thread_status, list);
2037
2038                for (i = 0; i < tmp->num_notes; i++)
2039                        if (!writenote(&tmp->notes[i], cprm))
2040                                return 0;
2041        }
2042
2043        return 1;
2044}
2045
2046static void free_note_info(struct elf_note_info *info)
2047{
2048        while (!list_empty(&info->thread_list)) {
2049                struct list_head *tmp = info->thread_list.next;
2050                list_del(tmp);
2051                kfree(list_entry(tmp, struct elf_thread_status, list));
2052        }
2053
2054        /* Free data possibly allocated by fill_files_note(): */
2055        if (info->notes_files)
2056                vfree(info->notes_files->data);
2057
2058        kfree(info->prstatus);
2059        kfree(info->psinfo);
2060        kfree(info->notes);
2061        kfree(info->fpu);
2062#ifdef ELF_CORE_COPY_XFPREGS
2063        kfree(info->xfpu);
2064#endif
2065}
2066
2067#endif
2068
2069static struct vm_area_struct *first_vma(struct task_struct *tsk,
2070                                        struct vm_area_struct *gate_vma)
2071{
2072        struct vm_area_struct *ret = tsk->mm->mmap;
2073
2074        if (ret)
2075                return ret;
2076        return gate_vma;
2077}
2078/*
2079 * Helper function for iterating across a vma list.  It ensures that the caller
2080 * will visit `gate_vma' prior to terminating the search.
2081 */
2082static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2083                                        struct vm_area_struct *gate_vma)
2084{
2085        struct vm_area_struct *ret;
2086
2087        ret = this_vma->vm_next;
2088        if (ret)
2089                return ret;
2090        if (this_vma == gate_vma)
2091                return NULL;
2092        return gate_vma;
2093}
2094
2095static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2096                             elf_addr_t e_shoff, int segs)
2097{
2098        elf->e_shoff = e_shoff;
2099        elf->e_shentsize = sizeof(*shdr4extnum);
2100        elf->e_shnum = 1;
2101        elf->e_shstrndx = SHN_UNDEF;
2102
2103        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2104
2105        shdr4extnum->sh_type = SHT_NULL;
2106        shdr4extnum->sh_size = elf->e_shnum;
2107        shdr4extnum->sh_link = elf->e_shstrndx;
2108        shdr4extnum->sh_info = segs;
2109}
2110
2111/*
2112 * Actual dumper
2113 *
2114 * This is a two-pass process; first we find the offsets of the bits,
2115 * and then they are actually written out.  If we run out of core limit
2116 * we just truncate.
2117 */
2118static int elf_core_dump(struct coredump_params *cprm)
2119{
2120        int has_dumped = 0;
2121        mm_segment_t fs;
2122        int segs, i;
2123        size_t vma_data_size = 0;
2124        struct vm_area_struct *vma, *gate_vma;
2125        struct elfhdr *elf = NULL;
2126        loff_t offset = 0, dataoff;
2127        struct elf_note_info info = { };
2128        struct elf_phdr *phdr4note = NULL;
2129        struct elf_shdr *shdr4extnum = NULL;
2130        Elf_Half e_phnum;
2131        elf_addr_t e_shoff;
2132        elf_addr_t *vma_filesz = NULL;
2133
2134        /*
2135         * We no longer stop all VM operations.
2136         * 
2137         * This is because those proceses that could possibly change map_count
2138         * or the mmap / vma pages are now blocked in do_exit on current
2139         * finishing this core dump.
2140         *
2141         * Only ptrace can touch these memory addresses, but it doesn't change
2142         * the map_count or the pages allocated. So no possibility of crashing
2143         * exists while dumping the mm->vm_next areas to the core file.
2144         */
2145  
2146        /* alloc memory for large data structures: too large to be on stack */
2147        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2148        if (!elf)
2149                goto out;
2150        /*
2151         * The number of segs are recored into ELF header as 16bit value.
2152         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2153         */
2154        segs = current->mm->map_count;
2155        segs += elf_core_extra_phdrs();
2156
2157        gate_vma = get_gate_vma(current->mm);
2158        if (gate_vma != NULL)
2159                segs++;
2160
2161        /* for notes section */
2162        segs++;
2163
2164        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2165         * this, kernel supports extended numbering. Have a look at
2166         * include/linux/elf.h for further information. */
2167        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2168
2169        /*
2170         * Collect all the non-memory information about the process for the
2171         * notes.  This also sets up the file header.
2172         */
2173        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2174                goto cleanup;
2175
2176        has_dumped = 1;
2177
2178        fs = get_fs();
2179        set_fs(KERNEL_DS);
2180
2181        offset += sizeof(*elf);                         /* Elf header */
2182        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2183
2184        /* Write notes phdr entry */
2185        {
2186                size_t sz = get_note_info_size(&info);
2187
2188                sz += elf_coredump_extra_notes_size();
2189
2190                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2191                if (!phdr4note)
2192                        goto end_coredump;
2193
2194                fill_elf_note_phdr(phdr4note, sz, offset);
2195                offset += sz;
2196        }
2197
2198        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2199
2200        vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2201        if (!vma_filesz)
2202                goto end_coredump;
2203
2204        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2205                        vma = next_vma(vma, gate_vma)) {
2206                unsigned long dump_size;
2207
2208                dump_size = vma_dump_size(vma, cprm->mm_flags);
2209                vma_filesz[i++] = dump_size;
2210                vma_data_size += dump_size;
2211        }
2212
2213        offset += vma_data_size;
2214        offset += elf_core_extra_data_size();
2215        e_shoff = offset;
2216
2217        if (e_phnum == PN_XNUM) {
2218                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2219                if (!shdr4extnum)
2220                        goto end_coredump;
2221                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2222        }
2223
2224        offset = dataoff;
2225
2226        if (!dump_emit(cprm, elf, sizeof(*elf)))
2227                goto end_coredump;
2228
2229        if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2230                goto end_coredump;
2231
2232        /* Write program headers for segments dump */
2233        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2234                        vma = next_vma(vma, gate_vma)) {
2235                struct elf_phdr phdr;
2236
2237                phdr.p_type = PT_LOAD;
2238                phdr.p_offset = offset;
2239                phdr.p_vaddr = vma->vm_start;
2240                phdr.p_paddr = 0;
2241                phdr.p_filesz = vma_filesz[i++];
2242                phdr.p_memsz = vma->vm_end - vma->vm_start;
2243                offset += phdr.p_filesz;
2244                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2245                if (vma->vm_flags & VM_WRITE)
2246                        phdr.p_flags |= PF_W;
2247                if (vma->vm_flags & VM_EXEC)
2248                        phdr.p_flags |= PF_X;
2249                phdr.p_align = ELF_EXEC_PAGESIZE;
2250
2251                if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2252                        goto end_coredump;
2253        }
2254
2255        if (!elf_core_write_extra_phdrs(cprm, offset))
2256                goto end_coredump;
2257
2258        /* write out the notes section */
2259        if (!write_note_info(&info, cprm))
2260                goto end_coredump;
2261
2262        if (elf_coredump_extra_notes_write(cprm))
2263                goto end_coredump;
2264
2265        /* Align to page */
2266        if (!dump_skip(cprm, dataoff - cprm->written))
2267                goto end_coredump;
2268
2269        for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2270                        vma = next_vma(vma, gate_vma)) {
2271                unsigned long addr;
2272                unsigned long end;
2273
2274                end = vma->vm_start + vma_filesz[i++];
2275
2276                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2277                        struct page *page;
2278                        int stop;
2279
2280                        page = get_dump_page(addr);
2281                        if (page) {
2282                                void *kaddr = kmap(page);
2283                                stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2284                                kunmap(page);
2285                                page_cache_release(page);
2286                        } else
2287                                stop = !dump_skip(cprm, PAGE_SIZE);
2288                        if (stop)
2289                                goto end_coredump;
2290                }
2291        }
2292
2293        if (!elf_core_write_extra_data(cprm))
2294                goto end_coredump;
2295
2296        if (e_phnum == PN_XNUM) {
2297                if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2298                        goto end_coredump;
2299        }
2300
2301end_coredump:
2302        set_fs(fs);
2303
2304cleanup:
2305        free_note_info(&info);
2306        kfree(shdr4extnum);
2307        kfree(vma_filesz);
2308        kfree(phdr4note);
2309        kfree(elf);
2310out:
2311        return has_dumped;
2312}
2313
2314#endif          /* CONFIG_ELF_CORE */
2315
2316static int __init init_elf_binfmt(void)
2317{
2318        register_binfmt(&elf_format);
2319        return 0;
2320}
2321
2322static void __exit exit_elf_binfmt(void)
2323{
2324        /* Remove the COFF and ELF loaders. */
2325        unregister_binfmt(&elf_format);
2326}
2327
2328core_initcall(init_elf_binfmt);
2329module_exit(exit_elf_binfmt);
2330MODULE_LICENSE("GPL");
2331