linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/utsname.h>
  35#include <linux/coredump.h>
  36#include <linux/sched.h>
  37#include <asm/uaccess.h>
  38#include <asm/param.h>
  39#include <asm/page.h>
  40
  41#ifndef user_long_t
  42#define user_long_t long
  43#endif
  44#ifndef user_siginfo_t
  45#define user_siginfo_t siginfo_t
  46#endif
  47
  48static int load_elf_binary(struct linux_binprm *bprm);
  49static int load_elf_library(struct file *);
  50static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  51                                int, int, unsigned long);
  52
  53/*
  54 * If we don't support core dumping, then supply a NULL so we
  55 * don't even try.
  56 */
  57#ifdef CONFIG_ELF_CORE
  58static int elf_core_dump(struct coredump_params *cprm);
  59#else
  60#define elf_core_dump   NULL
  61#endif
  62
  63#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  64#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  65#else
  66#define ELF_MIN_ALIGN   PAGE_SIZE
  67#endif
  68
  69#ifndef ELF_CORE_EFLAGS
  70#define ELF_CORE_EFLAGS 0
  71#endif
  72
  73#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  74#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  75#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  76
  77static struct linux_binfmt elf_format = {
  78        .module         = THIS_MODULE,
  79        .load_binary    = load_elf_binary,
  80        .load_shlib     = load_elf_library,
  81        .core_dump      = elf_core_dump,
  82        .min_coredump   = ELF_EXEC_PAGESIZE,
  83};
  84
  85#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  86
  87static int set_brk(unsigned long start, unsigned long end)
  88{
  89        start = ELF_PAGEALIGN(start);
  90        end = ELF_PAGEALIGN(end);
  91        if (end > start) {
  92                unsigned long addr;
  93                addr = vm_brk(start, end - start);
  94                if (BAD_ADDR(addr))
  95                        return addr;
  96        }
  97        current->mm->start_brk = current->mm->brk = end;
  98        return 0;
  99}
 100
 101/* We need to explicitly zero any fractional pages
 102   after the data section (i.e. bss).  This would
 103   contain the junk from the file that should not
 104   be in memory
 105 */
 106static int padzero(unsigned long elf_bss)
 107{
 108        unsigned long nbyte;
 109
 110        nbyte = ELF_PAGEOFFSET(elf_bss);
 111        if (nbyte) {
 112                nbyte = ELF_MIN_ALIGN - nbyte;
 113                if (clear_user((void __user *) elf_bss, nbyte))
 114                        return -EFAULT;
 115        }
 116        return 0;
 117}
 118
 119/* Let's use some macros to make this stack manipulation a little clearer */
 120#ifdef CONFIG_STACK_GROWSUP
 121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 122#define STACK_ROUND(sp, items) \
 123        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 124#define STACK_ALLOC(sp, len) ({ \
 125        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 126        old_sp; })
 127#else
 128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 129#define STACK_ROUND(sp, items) \
 130        (((unsigned long) (sp - items)) &~ 15UL)
 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 132#endif
 133
 134#ifndef ELF_BASE_PLATFORM
 135/*
 136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 138 * will be copied to the user stack in the same manner as AT_PLATFORM.
 139 */
 140#define ELF_BASE_PLATFORM NULL
 141#endif
 142
 143static int
 144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 145                unsigned long load_addr, unsigned long interp_load_addr)
 146{
 147        unsigned long p = bprm->p;
 148        int argc = bprm->argc;
 149        int envc = bprm->envc;
 150        elf_addr_t __user *argv;
 151        elf_addr_t __user *envp;
 152        elf_addr_t __user *sp;
 153        elf_addr_t __user *u_platform;
 154        elf_addr_t __user *u_base_platform;
 155        elf_addr_t __user *u_rand_bytes;
 156        const char *k_platform = ELF_PLATFORM;
 157        const char *k_base_platform = ELF_BASE_PLATFORM;
 158        unsigned char k_rand_bytes[16];
 159        int items;
 160        elf_addr_t *elf_info;
 161        int ei_index = 0;
 162        const struct cred *cred = current_cred();
 163        struct vm_area_struct *vma;
 164
 165        /*
 166         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 167         * evictions by the processes running on the same package. One
 168         * thing we can do is to shuffle the initial stack for them.
 169         */
 170
 171        p = arch_align_stack(p);
 172
 173        /*
 174         * If this architecture has a platform capability string, copy it
 175         * to userspace.  In some cases (Sparc), this info is impossible
 176         * for userspace to get any other way, in others (i386) it is
 177         * merely difficult.
 178         */
 179        u_platform = NULL;
 180        if (k_platform) {
 181                size_t len = strlen(k_platform) + 1;
 182
 183                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 184                if (__copy_to_user(u_platform, k_platform, len))
 185                        return -EFAULT;
 186        }
 187
 188        /*
 189         * If this architecture has a "base" platform capability
 190         * string, copy it to userspace.
 191         */
 192        u_base_platform = NULL;
 193        if (k_base_platform) {
 194                size_t len = strlen(k_base_platform) + 1;
 195
 196                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 197                if (__copy_to_user(u_base_platform, k_base_platform, len))
 198                        return -EFAULT;
 199        }
 200
 201        /*
 202         * Generate 16 random bytes for userspace PRNG seeding.
 203         */
 204        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 205        u_rand_bytes = (elf_addr_t __user *)
 206                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 207        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 208                return -EFAULT;
 209
 210        /* Create the ELF interpreter info */
 211        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 212        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 213#define NEW_AUX_ENT(id, val) \
 214        do { \
 215                elf_info[ei_index++] = id; \
 216                elf_info[ei_index++] = val; \
 217        } while (0)
 218
 219#ifdef ARCH_DLINFO
 220        /* 
 221         * ARCH_DLINFO must come first so PPC can do its special alignment of
 222         * AUXV.
 223         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 224         * ARCH_DLINFO changes
 225         */
 226        ARCH_DLINFO;
 227#endif
 228        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 229        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 230        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 231        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 232        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 233        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 234        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 235        NEW_AUX_ENT(AT_FLAGS, 0);
 236        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 237        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 238        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 239        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 240        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 241        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 242        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 243#ifdef ELF_HWCAP2
 244        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 245#endif
 246        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 247        if (k_platform) {
 248                NEW_AUX_ENT(AT_PLATFORM,
 249                            (elf_addr_t)(unsigned long)u_platform);
 250        }
 251        if (k_base_platform) {
 252                NEW_AUX_ENT(AT_BASE_PLATFORM,
 253                            (elf_addr_t)(unsigned long)u_base_platform);
 254        }
 255        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 256                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 257        }
 258#undef NEW_AUX_ENT
 259        /* AT_NULL is zero; clear the rest too */
 260        memset(&elf_info[ei_index], 0,
 261               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 262
 263        /* And advance past the AT_NULL entry.  */
 264        ei_index += 2;
 265
 266        sp = STACK_ADD(p, ei_index);
 267
 268        items = (argc + 1) + (envc + 1) + 1;
 269        bprm->p = STACK_ROUND(sp, items);
 270
 271        /* Point sp at the lowest address on the stack */
 272#ifdef CONFIG_STACK_GROWSUP
 273        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 274        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 275#else
 276        sp = (elf_addr_t __user *)bprm->p;
 277#endif
 278
 279
 280        /*
 281         * Grow the stack manually; some architectures have a limit on how
 282         * far ahead a user-space access may be in order to grow the stack.
 283         */
 284        vma = find_extend_vma(current->mm, bprm->p);
 285        if (!vma)
 286                return -EFAULT;
 287
 288        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 289        if (__put_user(argc, sp++))
 290                return -EFAULT;
 291        argv = sp;
 292        envp = argv + argc + 1;
 293
 294        /* Populate argv and envp */
 295        p = current->mm->arg_end = current->mm->arg_start;
 296        while (argc-- > 0) {
 297                size_t len;
 298                if (__put_user((elf_addr_t)p, argv++))
 299                        return -EFAULT;
 300                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 301                if (!len || len > MAX_ARG_STRLEN)
 302                        return -EINVAL;
 303                p += len;
 304        }
 305        if (__put_user(0, argv))
 306                return -EFAULT;
 307        current->mm->arg_end = current->mm->env_start = p;
 308        while (envc-- > 0) {
 309                size_t len;
 310                if (__put_user((elf_addr_t)p, envp++))
 311                        return -EFAULT;
 312                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 313                if (!len || len > MAX_ARG_STRLEN)
 314                        return -EINVAL;
 315                p += len;
 316        }
 317        if (__put_user(0, envp))
 318                return -EFAULT;
 319        current->mm->env_end = p;
 320
 321        /* Put the elf_info on the stack in the right place.  */
 322        sp = (elf_addr_t __user *)envp + 1;
 323        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 324                return -EFAULT;
 325        return 0;
 326}
 327
 328#ifndef elf_map
 329
 330static unsigned long elf_map(struct file *filep, unsigned long addr,
 331                struct elf_phdr *eppnt, int prot, int type,
 332                unsigned long total_size)
 333{
 334        unsigned long map_addr;
 335        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 336        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 337        addr = ELF_PAGESTART(addr);
 338        size = ELF_PAGEALIGN(size);
 339
 340        /* mmap() will return -EINVAL if given a zero size, but a
 341         * segment with zero filesize is perfectly valid */
 342        if (!size)
 343                return addr;
 344
 345        /*
 346        * total_size is the size of the ELF (interpreter) image.
 347        * The _first_ mmap needs to know the full size, otherwise
 348        * randomization might put this image into an overlapping
 349        * position with the ELF binary image. (since size < total_size)
 350        * So we first map the 'big' image - and unmap the remainder at
 351        * the end. (which unmap is needed for ELF images with holes.)
 352        */
 353        if (total_size) {
 354                total_size = ELF_PAGEALIGN(total_size);
 355                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 356                if (!BAD_ADDR(map_addr))
 357                        vm_munmap(map_addr+size, total_size-size);
 358        } else
 359                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 360
 361        return(map_addr);
 362}
 363
 364#endif /* !elf_map */
 365
 366static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 367{
 368        int i, first_idx = -1, last_idx = -1;
 369
 370        for (i = 0; i < nr; i++) {
 371                if (cmds[i].p_type == PT_LOAD) {
 372                        last_idx = i;
 373                        if (first_idx == -1)
 374                                first_idx = i;
 375                }
 376        }
 377        if (first_idx == -1)
 378                return 0;
 379
 380        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 381                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 382}
 383
 384
 385/* This is much more generalized than the library routine read function,
 386   so we keep this separate.  Technically the library read function
 387   is only provided so that we can read a.out libraries that have
 388   an ELF header */
 389
 390static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 391                struct file *interpreter, unsigned long *interp_map_addr,
 392                unsigned long no_base)
 393{
 394        struct elf_phdr *elf_phdata;
 395        struct elf_phdr *eppnt;
 396        unsigned long load_addr = 0;
 397        int load_addr_set = 0;
 398        unsigned long last_bss = 0, elf_bss = 0;
 399        unsigned long error = ~0UL;
 400        unsigned long total_size;
 401        int retval, i, size;
 402
 403        /* First of all, some simple consistency checks */
 404        if (interp_elf_ex->e_type != ET_EXEC &&
 405            interp_elf_ex->e_type != ET_DYN)
 406                goto out;
 407        if (!elf_check_arch(interp_elf_ex))
 408                goto out;
 409        if (!interpreter->f_op || !interpreter->f_op->mmap)
 410                goto out;
 411
 412        /*
 413         * If the size of this structure has changed, then punt, since
 414         * we will be doing the wrong thing.
 415         */
 416        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 417                goto out;
 418        if (interp_elf_ex->e_phnum < 1 ||
 419                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 420                goto out;
 421
 422        /* Now read in all of the header information */
 423        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 424        if (size > ELF_MIN_ALIGN)
 425                goto out;
 426        elf_phdata = kmalloc(size, GFP_KERNEL);
 427        if (!elf_phdata)
 428                goto out;
 429
 430        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 431                             (char *)elf_phdata, size);
 432        error = -EIO;
 433        if (retval != size) {
 434                if (retval < 0)
 435                        error = retval; 
 436                goto out_close;
 437        }
 438
 439        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 440        if (!total_size) {
 441                error = -EINVAL;
 442                goto out_close;
 443        }
 444
 445        eppnt = elf_phdata;
 446        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 447                if (eppnt->p_type == PT_LOAD) {
 448                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 449                        int elf_prot = 0;
 450                        unsigned long vaddr = 0;
 451                        unsigned long k, map_addr;
 452
 453                        if (eppnt->p_flags & PF_R)
 454                                elf_prot = PROT_READ;
 455                        if (eppnt->p_flags & PF_W)
 456                                elf_prot |= PROT_WRITE;
 457                        if (eppnt->p_flags & PF_X)
 458                                elf_prot |= PROT_EXEC;
 459                        vaddr = eppnt->p_vaddr;
 460                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 461                                elf_type |= MAP_FIXED;
 462                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 463                                load_addr = -vaddr;
 464
 465                        map_addr = elf_map(interpreter, load_addr + vaddr,
 466                                        eppnt, elf_prot, elf_type, total_size);
 467                        total_size = 0;
 468                        if (!*interp_map_addr)
 469                                *interp_map_addr = map_addr;
 470                        error = map_addr;
 471                        if (BAD_ADDR(map_addr))
 472                                goto out_close;
 473
 474                        if (!load_addr_set &&
 475                            interp_elf_ex->e_type == ET_DYN) {
 476                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 477                                load_addr_set = 1;
 478                        }
 479
 480                        /*
 481                         * Check to see if the section's size will overflow the
 482                         * allowed task size. Note that p_filesz must always be
 483                         * <= p_memsize so it's only necessary to check p_memsz.
 484                         */
 485                        k = load_addr + eppnt->p_vaddr;
 486                        if (BAD_ADDR(k) ||
 487                            eppnt->p_filesz > eppnt->p_memsz ||
 488                            eppnt->p_memsz > TASK_SIZE ||
 489                            TASK_SIZE - eppnt->p_memsz < k) {
 490                                error = -ENOMEM;
 491                                goto out_close;
 492                        }
 493
 494                        /*
 495                         * Find the end of the file mapping for this phdr, and
 496                         * keep track of the largest address we see for this.
 497                         */
 498                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 499                        if (k > elf_bss)
 500                                elf_bss = k;
 501
 502                        /*
 503                         * Do the same thing for the memory mapping - between
 504                         * elf_bss and last_bss is the bss section.
 505                         */
 506                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 507                        if (k > last_bss)
 508                                last_bss = k;
 509                }
 510        }
 511
 512        if (last_bss > elf_bss) {
 513                /*
 514                 * Now fill out the bss section.  First pad the last page up
 515                 * to the page boundary, and then perform a mmap to make sure
 516                 * that there are zero-mapped pages up to and including the
 517                 * last bss page.
 518                 */
 519                if (padzero(elf_bss)) {
 520                        error = -EFAULT;
 521                        goto out_close;
 522                }
 523
 524                /* What we have mapped so far */
 525                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 526
 527                /* Map the last of the bss segment */
 528                error = vm_brk(elf_bss, last_bss - elf_bss);
 529                if (BAD_ADDR(error))
 530                        goto out_close;
 531        }
 532
 533        error = load_addr;
 534
 535out_close:
 536        kfree(elf_phdata);
 537out:
 538        return error;
 539}
 540
 541/*
 542 * These are the functions used to load ELF style executables and shared
 543 * libraries.  There is no binary dependent code anywhere else.
 544 */
 545
 546#define INTERPRETER_NONE 0
 547#define INTERPRETER_ELF 2
 548
 549#ifndef STACK_RND_MASK
 550#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 551#endif
 552
 553static unsigned long randomize_stack_top(unsigned long stack_top)
 554{
 555        unsigned int random_variable = 0;
 556
 557        if ((current->flags & PF_RANDOMIZE) &&
 558                !(current->personality & ADDR_NO_RANDOMIZE)) {
 559                random_variable = get_random_int() & STACK_RND_MASK;
 560                random_variable <<= PAGE_SHIFT;
 561        }
 562#ifdef CONFIG_STACK_GROWSUP
 563        return PAGE_ALIGN(stack_top) + random_variable;
 564#else
 565        return PAGE_ALIGN(stack_top) - random_variable;
 566#endif
 567}
 568
 569static int load_elf_binary(struct linux_binprm *bprm)
 570{
 571        struct file *interpreter = NULL; /* to shut gcc up */
 572        unsigned long load_addr = 0, load_bias = 0;
 573        int load_addr_set = 0;
 574        char * elf_interpreter = NULL;
 575        unsigned long error;
 576        struct elf_phdr *elf_ppnt, *elf_phdata;
 577        unsigned long elf_bss, elf_brk;
 578        int retval, i;
 579        unsigned int size;
 580        unsigned long elf_entry;
 581        unsigned long interp_load_addr = 0;
 582        unsigned long start_code, end_code, start_data, end_data;
 583        unsigned long reloc_func_desc __maybe_unused = 0;
 584        int executable_stack = EXSTACK_DEFAULT;
 585        unsigned long def_flags = 0;
 586        struct pt_regs *regs = current_pt_regs();
 587        struct {
 588                struct elfhdr elf_ex;
 589                struct elfhdr interp_elf_ex;
 590        } *loc;
 591
 592        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 593        if (!loc) {
 594                retval = -ENOMEM;
 595                goto out_ret;
 596        }
 597        
 598        /* Get the exec-header */
 599        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 600
 601        retval = -ENOEXEC;
 602        /* First of all, some simple consistency checks */
 603        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 604                goto out;
 605
 606        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 607                goto out;
 608        if (!elf_check_arch(&loc->elf_ex))
 609                goto out;
 610        if (!bprm->file->f_op || !bprm->file->f_op->mmap)
 611                goto out;
 612
 613        /* Now read in all of the header information */
 614        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 615                goto out;
 616        if (loc->elf_ex.e_phnum < 1 ||
 617                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 618                goto out;
 619        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 620        retval = -ENOMEM;
 621        elf_phdata = kmalloc(size, GFP_KERNEL);
 622        if (!elf_phdata)
 623                goto out;
 624
 625        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 626                             (char *)elf_phdata, size);
 627        if (retval != size) {
 628                if (retval >= 0)
 629                        retval = -EIO;
 630                goto out_free_ph;
 631        }
 632
 633        elf_ppnt = elf_phdata;
 634        elf_bss = 0;
 635        elf_brk = 0;
 636
 637        start_code = ~0UL;
 638        end_code = 0;
 639        start_data = 0;
 640        end_data = 0;
 641
 642        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 643                if (elf_ppnt->p_type == PT_INTERP) {
 644                        /* This is the program interpreter used for
 645                         * shared libraries - for now assume that this
 646                         * is an a.out format binary
 647                         */
 648                        retval = -ENOEXEC;
 649                        if (elf_ppnt->p_filesz > PATH_MAX || 
 650                            elf_ppnt->p_filesz < 2)
 651                                goto out_free_ph;
 652
 653                        retval = -ENOMEM;
 654                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 655                                                  GFP_KERNEL);
 656                        if (!elf_interpreter)
 657                                goto out_free_ph;
 658
 659                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 660                                             elf_interpreter,
 661                                             elf_ppnt->p_filesz);
 662                        if (retval != elf_ppnt->p_filesz) {
 663                                if (retval >= 0)
 664                                        retval = -EIO;
 665                                goto out_free_interp;
 666                        }
 667                        /* make sure path is NULL terminated */
 668                        retval = -ENOEXEC;
 669                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 670                                goto out_free_interp;
 671
 672                        interpreter = open_exec(elf_interpreter);
 673                        retval = PTR_ERR(interpreter);
 674                        if (IS_ERR(interpreter))
 675                                goto out_free_interp;
 676
 677                        /*
 678                         * If the binary is not readable then enforce
 679                         * mm->dumpable = 0 regardless of the interpreter's
 680                         * permissions.
 681                         */
 682                        would_dump(bprm, interpreter);
 683
 684                        retval = kernel_read(interpreter, 0, bprm->buf,
 685                                             BINPRM_BUF_SIZE);
 686                        if (retval != BINPRM_BUF_SIZE) {
 687                                if (retval >= 0)
 688                                        retval = -EIO;
 689                                goto out_free_dentry;
 690                        }
 691
 692                        /* Get the exec headers */
 693                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 694                        break;
 695                }
 696                elf_ppnt++;
 697        }
 698
 699        elf_ppnt = elf_phdata;
 700        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 701                if (elf_ppnt->p_type == PT_GNU_STACK) {
 702                        if (elf_ppnt->p_flags & PF_X)
 703                                executable_stack = EXSTACK_ENABLE_X;
 704                        else
 705                                executable_stack = EXSTACK_DISABLE_X;
 706                        break;
 707                }
 708
 709        /* Some simple consistency checks for the interpreter */
 710        if (elf_interpreter) {
 711                retval = -ELIBBAD;
 712                /* Not an ELF interpreter */
 713                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 714                        goto out_free_dentry;
 715                /* Verify the interpreter has a valid arch */
 716                if (!elf_check_arch(&loc->interp_elf_ex))
 717                        goto out_free_dentry;
 718        }
 719
 720        /* Flush all traces of the currently running executable */
 721        retval = flush_old_exec(bprm);
 722        if (retval)
 723                goto out_free_dentry;
 724
 725        /* OK, This is the point of no return */
 726        current->mm->def_flags = def_flags;
 727
 728        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 729           may depend on the personality.  */
 730        SET_PERSONALITY(loc->elf_ex);
 731        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 732                current->personality |= READ_IMPLIES_EXEC;
 733
 734        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 735                current->flags |= PF_RANDOMIZE;
 736
 737        setup_new_exec(bprm);
 738
 739        /* Do this so that we can load the interpreter, if need be.  We will
 740           change some of these later */
 741        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 742                                 executable_stack);
 743        if (retval < 0) {
 744                send_sig(SIGKILL, current, 0);
 745                goto out_free_dentry;
 746        }
 747        
 748        current->mm->start_stack = bprm->p;
 749
 750        /* Now we do a little grungy work by mmapping the ELF image into
 751           the correct location in memory. */
 752        for(i = 0, elf_ppnt = elf_phdata;
 753            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 754                int elf_prot = 0, elf_flags;
 755                unsigned long k, vaddr;
 756
 757                if (elf_ppnt->p_type != PT_LOAD)
 758                        continue;
 759
 760                if (unlikely (elf_brk > elf_bss)) {
 761                        unsigned long nbyte;
 762                    
 763                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 764                           before this one. Map anonymous pages, if needed,
 765                           and clear the area.  */
 766                        retval = set_brk(elf_bss + load_bias,
 767                                         elf_brk + load_bias);
 768                        if (retval) {
 769                                send_sig(SIGKILL, current, 0);
 770                                goto out_free_dentry;
 771                        }
 772                        nbyte = ELF_PAGEOFFSET(elf_bss);
 773                        if (nbyte) {
 774                                nbyte = ELF_MIN_ALIGN - nbyte;
 775                                if (nbyte > elf_brk - elf_bss)
 776                                        nbyte = elf_brk - elf_bss;
 777                                if (clear_user((void __user *)elf_bss +
 778                                                        load_bias, nbyte)) {
 779                                        /*
 780                                         * This bss-zeroing can fail if the ELF
 781                                         * file specifies odd protections. So
 782                                         * we don't check the return value
 783                                         */
 784                                }
 785                        }
 786                }
 787
 788                if (elf_ppnt->p_flags & PF_R)
 789                        elf_prot |= PROT_READ;
 790                if (elf_ppnt->p_flags & PF_W)
 791                        elf_prot |= PROT_WRITE;
 792                if (elf_ppnt->p_flags & PF_X)
 793                        elf_prot |= PROT_EXEC;
 794
 795                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 796
 797                vaddr = elf_ppnt->p_vaddr;
 798                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 799                        elf_flags |= MAP_FIXED;
 800                } else if (loc->elf_ex.e_type == ET_DYN) {
 801                        /* Try and get dynamic programs out of the way of the
 802                         * default mmap base, as well as whatever program they
 803                         * might try to exec.  This is because the brk will
 804                         * follow the loader, and is not movable.  */
 805#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
 806                        /* Memory randomization might have been switched off
 807                         * in runtime via sysctl or explicit setting of
 808                         * personality flags.
 809                         * If that is the case, retain the original non-zero
 810                         * load_bias value in order to establish proper
 811                         * non-randomized mappings.
 812                         */
 813                        if (current->flags & PF_RANDOMIZE)
 814                                load_bias = 0;
 815                        else
 816                                load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 817#else
 818                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 819#endif
 820                }
 821
 822                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 823                                elf_prot, elf_flags, 0);
 824                if (BAD_ADDR(error)) {
 825                        send_sig(SIGKILL, current, 0);
 826                        retval = IS_ERR((void *)error) ?
 827                                PTR_ERR((void*)error) : -EINVAL;
 828                        goto out_free_dentry;
 829                }
 830
 831                if (!load_addr_set) {
 832                        load_addr_set = 1;
 833                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 834                        if (loc->elf_ex.e_type == ET_DYN) {
 835                                load_bias += error -
 836                                             ELF_PAGESTART(load_bias + vaddr);
 837                                load_addr += load_bias;
 838                                reloc_func_desc = load_bias;
 839                        }
 840                }
 841                k = elf_ppnt->p_vaddr;
 842                if (k < start_code)
 843                        start_code = k;
 844                if (start_data < k)
 845                        start_data = k;
 846
 847                /*
 848                 * Check to see if the section's size will overflow the
 849                 * allowed task size. Note that p_filesz must always be
 850                 * <= p_memsz so it is only necessary to check p_memsz.
 851                 */
 852                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 853                    elf_ppnt->p_memsz > TASK_SIZE ||
 854                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 855                        /* set_brk can never work. Avoid overflows. */
 856                        send_sig(SIGKILL, current, 0);
 857                        retval = -EINVAL;
 858                        goto out_free_dentry;
 859                }
 860
 861                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 862
 863                if (k > elf_bss)
 864                        elf_bss = k;
 865                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 866                        end_code = k;
 867                if (end_data < k)
 868                        end_data = k;
 869                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 870                if (k > elf_brk)
 871                        elf_brk = k;
 872        }
 873
 874        loc->elf_ex.e_entry += load_bias;
 875        elf_bss += load_bias;
 876        elf_brk += load_bias;
 877        start_code += load_bias;
 878        end_code += load_bias;
 879        start_data += load_bias;
 880        end_data += load_bias;
 881
 882        /* Calling set_brk effectively mmaps the pages that we need
 883         * for the bss and break sections.  We must do this before
 884         * mapping in the interpreter, to make sure it doesn't wind
 885         * up getting placed where the bss needs to go.
 886         */
 887        retval = set_brk(elf_bss, elf_brk);
 888        if (retval) {
 889                send_sig(SIGKILL, current, 0);
 890                goto out_free_dentry;
 891        }
 892        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 893                send_sig(SIGSEGV, current, 0);
 894                retval = -EFAULT; /* Nobody gets to see this, but.. */
 895                goto out_free_dentry;
 896        }
 897
 898        if (elf_interpreter) {
 899                unsigned long interp_map_addr = 0;
 900
 901                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 902                                            interpreter,
 903                                            &interp_map_addr,
 904                                            load_bias);
 905                if (!IS_ERR((void *)elf_entry)) {
 906                        /*
 907                         * load_elf_interp() returns relocation
 908                         * adjustment
 909                         */
 910                        interp_load_addr = elf_entry;
 911                        elf_entry += loc->interp_elf_ex.e_entry;
 912                }
 913                if (BAD_ADDR(elf_entry)) {
 914                        force_sig(SIGSEGV, current);
 915                        retval = IS_ERR((void *)elf_entry) ?
 916                                        (int)elf_entry : -EINVAL;
 917                        goto out_free_dentry;
 918                }
 919                reloc_func_desc = interp_load_addr;
 920
 921                allow_write_access(interpreter);
 922                fput(interpreter);
 923                kfree(elf_interpreter);
 924        } else {
 925                elf_entry = loc->elf_ex.e_entry;
 926                if (BAD_ADDR(elf_entry)) {
 927                        force_sig(SIGSEGV, current);
 928                        retval = -EINVAL;
 929                        goto out_free_dentry;
 930                }
 931        }
 932
 933        kfree(elf_phdata);
 934
 935        set_binfmt(&elf_format);
 936
 937#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 938        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 939        if (retval < 0) {
 940                send_sig(SIGKILL, current, 0);
 941                goto out;
 942        }
 943#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 944
 945        install_exec_creds(bprm);
 946        retval = create_elf_tables(bprm, &loc->elf_ex,
 947                          load_addr, interp_load_addr);
 948        if (retval < 0) {
 949                send_sig(SIGKILL, current, 0);
 950                goto out;
 951        }
 952        /* N.B. passed_fileno might not be initialized? */
 953        current->mm->end_code = end_code;
 954        current->mm->start_code = start_code;
 955        current->mm->start_data = start_data;
 956        current->mm->end_data = end_data;
 957        current->mm->start_stack = bprm->p;
 958
 959#ifdef arch_randomize_brk
 960        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
 961                current->mm->brk = current->mm->start_brk =
 962                        arch_randomize_brk(current->mm);
 963#ifdef CONFIG_COMPAT_BRK
 964                current->brk_randomized = 1;
 965#endif
 966        }
 967#endif
 968
 969        if (current->personality & MMAP_PAGE_ZERO) {
 970                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 971                   and some applications "depend" upon this behavior.
 972                   Since we do not have the power to recompile these, we
 973                   emulate the SVr4 behavior. Sigh. */
 974                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 975                                MAP_FIXED | MAP_PRIVATE, 0);
 976        }
 977
 978#ifdef ELF_PLAT_INIT
 979        /*
 980         * The ABI may specify that certain registers be set up in special
 981         * ways (on i386 %edx is the address of a DT_FINI function, for
 982         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 983         * that the e_entry field is the address of the function descriptor
 984         * for the startup routine, rather than the address of the startup
 985         * routine itself.  This macro performs whatever initialization to
 986         * the regs structure is required as well as any relocations to the
 987         * function descriptor entries when executing dynamically links apps.
 988         */
 989        ELF_PLAT_INIT(regs, reloc_func_desc);
 990#endif
 991
 992        start_thread(regs, elf_entry, bprm->p);
 993        retval = 0;
 994out:
 995        kfree(loc);
 996out_ret:
 997        return retval;
 998
 999        /* error cleanup */
1000out_free_dentry:
1001        allow_write_access(interpreter);
1002        if (interpreter)
1003                fput(interpreter);
1004out_free_interp:
1005        kfree(elf_interpreter);
1006out_free_ph:
1007        kfree(elf_phdata);
1008        goto out;
1009}
1010
1011/* This is really simpleminded and specialized - we are loading an
1012   a.out library that is given an ELF header. */
1013static int load_elf_library(struct file *file)
1014{
1015        struct elf_phdr *elf_phdata;
1016        struct elf_phdr *eppnt;
1017        unsigned long elf_bss, bss, len;
1018        int retval, error, i, j;
1019        struct elfhdr elf_ex;
1020
1021        error = -ENOEXEC;
1022        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1023        if (retval != sizeof(elf_ex))
1024                goto out;
1025
1026        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1027                goto out;
1028
1029        /* First of all, some simple consistency checks */
1030        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1031            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1032                goto out;
1033
1034        /* Now read in all of the header information */
1035
1036        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1037        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1038
1039        error = -ENOMEM;
1040        elf_phdata = kmalloc(j, GFP_KERNEL);
1041        if (!elf_phdata)
1042                goto out;
1043
1044        eppnt = elf_phdata;
1045        error = -ENOEXEC;
1046        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1047        if (retval != j)
1048                goto out_free_ph;
1049
1050        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1051                if ((eppnt + i)->p_type == PT_LOAD)
1052                        j++;
1053        if (j != 1)
1054                goto out_free_ph;
1055
1056        while (eppnt->p_type != PT_LOAD)
1057                eppnt++;
1058
1059        /* Now use mmap to map the library into memory. */
1060        error = vm_mmap(file,
1061                        ELF_PAGESTART(eppnt->p_vaddr),
1062                        (eppnt->p_filesz +
1063                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1064                        PROT_READ | PROT_WRITE | PROT_EXEC,
1065                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1066                        (eppnt->p_offset -
1067                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1068        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1069                goto out_free_ph;
1070
1071        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1072        if (padzero(elf_bss)) {
1073                error = -EFAULT;
1074                goto out_free_ph;
1075        }
1076
1077        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1078                            ELF_MIN_ALIGN - 1);
1079        bss = eppnt->p_memsz + eppnt->p_vaddr;
1080        if (bss > len)
1081                vm_brk(len, bss - len);
1082        error = 0;
1083
1084out_free_ph:
1085        kfree(elf_phdata);
1086out:
1087        return error;
1088}
1089
1090#ifdef CONFIG_ELF_CORE
1091/*
1092 * ELF core dumper
1093 *
1094 * Modelled on fs/exec.c:aout_core_dump()
1095 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1096 */
1097
1098/*
1099 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1100 * that are useful for post-mortem analysis are included in every core dump.
1101 * In that way we ensure that the core dump is fully interpretable later
1102 * without matching up the same kernel and hardware config to see what PC values
1103 * meant. These special mappings include - vDSO, vsyscall, and other
1104 * architecture specific mappings
1105 */
1106static bool always_dump_vma(struct vm_area_struct *vma)
1107{
1108        /* Any vsyscall mappings? */
1109        if (vma == get_gate_vma(vma->vm_mm))
1110                return true;
1111        /*
1112         * arch_vma_name() returns non-NULL for special architecture mappings,
1113         * such as vDSO sections.
1114         */
1115        if (arch_vma_name(vma))
1116                return true;
1117
1118        return false;
1119}
1120
1121/*
1122 * Decide what to dump of a segment, part, all or none.
1123 */
1124static unsigned long vma_dump_size(struct vm_area_struct *vma,
1125                                   unsigned long mm_flags)
1126{
1127#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1128
1129        /* always dump the vdso and vsyscall sections */
1130        if (always_dump_vma(vma))
1131                goto whole;
1132
1133        if (vma->vm_flags & VM_DONTDUMP)
1134                return 0;
1135
1136        /* Hugetlb memory check */
1137        if (vma->vm_flags & VM_HUGETLB) {
1138                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1139                        goto whole;
1140                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1141                        goto whole;
1142                return 0;
1143        }
1144
1145        /* Do not dump I/O mapped devices or special mappings */
1146        if (vma->vm_flags & VM_IO)
1147                return 0;
1148
1149        /* By default, dump shared memory if mapped from an anonymous file. */
1150        if (vma->vm_flags & VM_SHARED) {
1151                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1152                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1153                        goto whole;
1154                return 0;
1155        }
1156
1157        /* Dump segments that have been written to.  */
1158        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1159                goto whole;
1160        if (vma->vm_file == NULL)
1161                return 0;
1162
1163        if (FILTER(MAPPED_PRIVATE))
1164                goto whole;
1165
1166        /*
1167         * If this looks like the beginning of a DSO or executable mapping,
1168         * check for an ELF header.  If we find one, dump the first page to
1169         * aid in determining what was mapped here.
1170         */
1171        if (FILTER(ELF_HEADERS) &&
1172            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1173                u32 __user *header = (u32 __user *) vma->vm_start;
1174                u32 word;
1175                mm_segment_t fs = get_fs();
1176                /*
1177                 * Doing it this way gets the constant folded by GCC.
1178                 */
1179                union {
1180                        u32 cmp;
1181                        char elfmag[SELFMAG];
1182                } magic;
1183                BUILD_BUG_ON(SELFMAG != sizeof word);
1184                magic.elfmag[EI_MAG0] = ELFMAG0;
1185                magic.elfmag[EI_MAG1] = ELFMAG1;
1186                magic.elfmag[EI_MAG2] = ELFMAG2;
1187                magic.elfmag[EI_MAG3] = ELFMAG3;
1188                /*
1189                 * Switch to the user "segment" for get_user(),
1190                 * then put back what elf_core_dump() had in place.
1191                 */
1192                set_fs(USER_DS);
1193                if (unlikely(get_user(word, header)))
1194                        word = 0;
1195                set_fs(fs);
1196                if (word == magic.cmp)
1197                        return PAGE_SIZE;
1198        }
1199
1200#undef  FILTER
1201
1202        return 0;
1203
1204whole:
1205        return vma->vm_end - vma->vm_start;
1206}
1207
1208/* An ELF note in memory */
1209struct memelfnote
1210{
1211        const char *name;
1212        int type;
1213        unsigned int datasz;
1214        void *data;
1215};
1216
1217static int notesize(struct memelfnote *en)
1218{
1219        int sz;
1220
1221        sz = sizeof(struct elf_note);
1222        sz += roundup(strlen(en->name) + 1, 4);
1223        sz += roundup(en->datasz, 4);
1224
1225        return sz;
1226}
1227
1228#define DUMP_WRITE(addr, nr, foffset)   \
1229        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1230
1231static int alignfile(struct file *file, loff_t *foffset)
1232{
1233        static const char buf[4] = { 0, };
1234        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1235        return 1;
1236}
1237
1238static int writenote(struct memelfnote *men, struct file *file,
1239                        loff_t *foffset)
1240{
1241        struct elf_note en;
1242        en.n_namesz = strlen(men->name) + 1;
1243        en.n_descsz = men->datasz;
1244        en.n_type = men->type;
1245
1246        DUMP_WRITE(&en, sizeof(en), foffset);
1247        DUMP_WRITE(men->name, en.n_namesz, foffset);
1248        if (!alignfile(file, foffset))
1249                return 0;
1250        DUMP_WRITE(men->data, men->datasz, foffset);
1251        if (!alignfile(file, foffset))
1252                return 0;
1253
1254        return 1;
1255}
1256#undef DUMP_WRITE
1257
1258static void fill_elf_header(struct elfhdr *elf, int segs,
1259                            u16 machine, u32 flags)
1260{
1261        memset(elf, 0, sizeof(*elf));
1262
1263        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1264        elf->e_ident[EI_CLASS] = ELF_CLASS;
1265        elf->e_ident[EI_DATA] = ELF_DATA;
1266        elf->e_ident[EI_VERSION] = EV_CURRENT;
1267        elf->e_ident[EI_OSABI] = ELF_OSABI;
1268
1269        elf->e_type = ET_CORE;
1270        elf->e_machine = machine;
1271        elf->e_version = EV_CURRENT;
1272        elf->e_phoff = sizeof(struct elfhdr);
1273        elf->e_flags = flags;
1274        elf->e_ehsize = sizeof(struct elfhdr);
1275        elf->e_phentsize = sizeof(struct elf_phdr);
1276        elf->e_phnum = segs;
1277
1278        return;
1279}
1280
1281static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1282{
1283        phdr->p_type = PT_NOTE;
1284        phdr->p_offset = offset;
1285        phdr->p_vaddr = 0;
1286        phdr->p_paddr = 0;
1287        phdr->p_filesz = sz;
1288        phdr->p_memsz = 0;
1289        phdr->p_flags = 0;
1290        phdr->p_align = 0;
1291        return;
1292}
1293
1294static void fill_note(struct memelfnote *note, const char *name, int type, 
1295                unsigned int sz, void *data)
1296{
1297        note->name = name;
1298        note->type = type;
1299        note->datasz = sz;
1300        note->data = data;
1301        return;
1302}
1303
1304/*
1305 * fill up all the fields in prstatus from the given task struct, except
1306 * registers which need to be filled up separately.
1307 */
1308static void fill_prstatus(struct elf_prstatus *prstatus,
1309                struct task_struct *p, long signr)
1310{
1311        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1312        prstatus->pr_sigpend = p->pending.signal.sig[0];
1313        prstatus->pr_sighold = p->blocked.sig[0];
1314        rcu_read_lock();
1315        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1316        rcu_read_unlock();
1317        prstatus->pr_pid = task_pid_vnr(p);
1318        prstatus->pr_pgrp = task_pgrp_vnr(p);
1319        prstatus->pr_sid = task_session_vnr(p);
1320        if (thread_group_leader(p)) {
1321                struct task_cputime cputime;
1322
1323                /*
1324                 * This is the record for the group leader.  It shows the
1325                 * group-wide total, not its individual thread total.
1326                 */
1327                thread_group_cputime(p, &cputime);
1328                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1329                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1330        } else {
1331                cputime_t utime, stime;
1332
1333                task_cputime(p, &utime, &stime);
1334                cputime_to_timeval(utime, &prstatus->pr_utime);
1335                cputime_to_timeval(stime, &prstatus->pr_stime);
1336        }
1337        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1338        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1339}
1340
1341static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1342                       struct mm_struct *mm)
1343{
1344        const struct cred *cred;
1345        unsigned int i, len;
1346        
1347        /* first copy the parameters from user space */
1348        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1349
1350        len = mm->arg_end - mm->arg_start;
1351        if (len >= ELF_PRARGSZ)
1352                len = ELF_PRARGSZ-1;
1353        if (copy_from_user(&psinfo->pr_psargs,
1354                           (const char __user *)mm->arg_start, len))
1355                return -EFAULT;
1356        for(i = 0; i < len; i++)
1357                if (psinfo->pr_psargs[i] == 0)
1358                        psinfo->pr_psargs[i] = ' ';
1359        psinfo->pr_psargs[len] = 0;
1360
1361        rcu_read_lock();
1362        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1363        rcu_read_unlock();
1364        psinfo->pr_pid = task_pid_vnr(p);
1365        psinfo->pr_pgrp = task_pgrp_vnr(p);
1366        psinfo->pr_sid = task_session_vnr(p);
1367
1368        i = p->state ? ffz(~p->state) + 1 : 0;
1369        psinfo->pr_state = i;
1370        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1371        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1372        psinfo->pr_nice = task_nice(p);
1373        psinfo->pr_flag = p->flags;
1374        rcu_read_lock();
1375        cred = __task_cred(p);
1376        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1377        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1378        rcu_read_unlock();
1379        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1380        
1381        return 0;
1382}
1383
1384static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1385{
1386        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1387        int i = 0;
1388        do
1389                i += 2;
1390        while (auxv[i - 2] != AT_NULL);
1391        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1392}
1393
1394static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1395                siginfo_t *siginfo)
1396{
1397        mm_segment_t old_fs = get_fs();
1398        set_fs(KERNEL_DS);
1399        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1400        set_fs(old_fs);
1401        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1402}
1403
1404#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1405/*
1406 * Format of NT_FILE note:
1407 *
1408 * long count     -- how many files are mapped
1409 * long page_size -- units for file_ofs
1410 * array of [COUNT] elements of
1411 *   long start
1412 *   long end
1413 *   long file_ofs
1414 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1415 */
1416static void fill_files_note(struct memelfnote *note)
1417{
1418        struct vm_area_struct *vma;
1419        unsigned count, size, names_ofs, remaining, n;
1420        user_long_t *data;
1421        user_long_t *start_end_ofs;
1422        char *name_base, *name_curpos;
1423
1424        /* *Estimated* file count and total data size needed */
1425        count = current->mm->map_count;
1426        size = count * 64;
1427
1428        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1429 alloc:
1430        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1431                goto err;
1432        size = round_up(size, PAGE_SIZE);
1433        data = vmalloc(size);
1434        if (!data)
1435                goto err;
1436
1437        start_end_ofs = data + 2;
1438        name_base = name_curpos = ((char *)data) + names_ofs;
1439        remaining = size - names_ofs;
1440        count = 0;
1441        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1442                struct file *file;
1443                const char *filename;
1444
1445                file = vma->vm_file;
1446                if (!file)
1447                        continue;
1448                filename = d_path(&file->f_path, name_curpos, remaining);
1449                if (IS_ERR(filename)) {
1450                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1451                                vfree(data);
1452                                size = size * 5 / 4;
1453                                goto alloc;
1454                        }
1455                        continue;
1456                }
1457
1458                /* d_path() fills at the end, move name down */
1459                /* n = strlen(filename) + 1: */
1460                n = (name_curpos + remaining) - filename;
1461                remaining = filename - name_curpos;
1462                memmove(name_curpos, filename, n);
1463                name_curpos += n;
1464
1465                *start_end_ofs++ = vma->vm_start;
1466                *start_end_ofs++ = vma->vm_end;
1467                *start_end_ofs++ = vma->vm_pgoff;
1468                count++;
1469        }
1470
1471        /* Now we know exact count of files, can store it */
1472        data[0] = count;
1473        data[1] = PAGE_SIZE;
1474        /*
1475         * Count usually is less than current->mm->map_count,
1476         * we need to move filenames down.
1477         */
1478        n = current->mm->map_count - count;
1479        if (n != 0) {
1480                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1481                memmove(name_base - shift_bytes, name_base,
1482                        name_curpos - name_base);
1483                name_curpos -= shift_bytes;
1484        }
1485
1486        size = name_curpos - (char *)data;
1487        fill_note(note, "CORE", NT_FILE, size, data);
1488 err: ;
1489}
1490
1491#ifdef CORE_DUMP_USE_REGSET
1492#include <linux/regset.h>
1493
1494struct elf_thread_core_info {
1495        struct elf_thread_core_info *next;
1496        struct task_struct *task;
1497        struct elf_prstatus prstatus;
1498        struct memelfnote notes[0];
1499};
1500
1501struct elf_note_info {
1502        struct elf_thread_core_info *thread;
1503        struct memelfnote psinfo;
1504        struct memelfnote signote;
1505        struct memelfnote auxv;
1506        struct memelfnote files;
1507        user_siginfo_t csigdata;
1508        size_t size;
1509        int thread_notes;
1510};
1511
1512/*
1513 * When a regset has a writeback hook, we call it on each thread before
1514 * dumping user memory.  On register window machines, this makes sure the
1515 * user memory backing the register data is up to date before we read it.
1516 */
1517static void do_thread_regset_writeback(struct task_struct *task,
1518                                       const struct user_regset *regset)
1519{
1520        if (regset->writeback)
1521                regset->writeback(task, regset, 1);
1522}
1523
1524#ifndef PR_REG_SIZE
1525#define PR_REG_SIZE(S) sizeof(S)
1526#endif
1527
1528#ifndef PRSTATUS_SIZE
1529#define PRSTATUS_SIZE(S) sizeof(S)
1530#endif
1531
1532#ifndef PR_REG_PTR
1533#define PR_REG_PTR(S) (&((S)->pr_reg))
1534#endif
1535
1536#ifndef SET_PR_FPVALID
1537#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1538#endif
1539
1540static int fill_thread_core_info(struct elf_thread_core_info *t,
1541                                 const struct user_regset_view *view,
1542                                 long signr, size_t *total)
1543{
1544        unsigned int i;
1545
1546        /*
1547         * NT_PRSTATUS is the one special case, because the regset data
1548         * goes into the pr_reg field inside the note contents, rather
1549         * than being the whole note contents.  We fill the reset in here.
1550         * We assume that regset 0 is NT_PRSTATUS.
1551         */
1552        fill_prstatus(&t->prstatus, t->task, signr);
1553        (void) view->regsets[0].get(t->task, &view->regsets[0],
1554                                    0, PR_REG_SIZE(t->prstatus.pr_reg),
1555                                    PR_REG_PTR(&t->prstatus), NULL);
1556
1557        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1558                  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1559        *total += notesize(&t->notes[0]);
1560
1561        do_thread_regset_writeback(t->task, &view->regsets[0]);
1562
1563        /*
1564         * Each other regset might generate a note too.  For each regset
1565         * that has no core_note_type or is inactive, we leave t->notes[i]
1566         * all zero and we'll know to skip writing it later.
1567         */
1568        for (i = 1; i < view->n; ++i) {
1569                const struct user_regset *regset = &view->regsets[i];
1570                do_thread_regset_writeback(t->task, regset);
1571                if (regset->core_note_type && regset->get &&
1572                    (!regset->active || regset->active(t->task, regset))) {
1573                        int ret;
1574                        size_t size = regset->n * regset->size;
1575                        void *data = kmalloc(size, GFP_KERNEL);
1576                        if (unlikely(!data))
1577                                return 0;
1578                        ret = regset->get(t->task, regset,
1579                                          0, size, data, NULL);
1580                        if (unlikely(ret))
1581                                kfree(data);
1582                        else {
1583                                if (regset->core_note_type != NT_PRFPREG)
1584                                        fill_note(&t->notes[i], "LINUX",
1585                                                  regset->core_note_type,
1586                                                  size, data);
1587                                else {
1588                                        SET_PR_FPVALID(&t->prstatus, 1);
1589                                        fill_note(&t->notes[i], "CORE",
1590                                                  NT_PRFPREG, size, data);
1591                                }
1592                                *total += notesize(&t->notes[i]);
1593                        }
1594                }
1595        }
1596
1597        return 1;
1598}
1599
1600static int fill_note_info(struct elfhdr *elf, int phdrs,
1601                          struct elf_note_info *info,
1602                          siginfo_t *siginfo, struct pt_regs *regs)
1603{
1604        struct task_struct *dump_task = current;
1605        const struct user_regset_view *view = task_user_regset_view(dump_task);
1606        struct elf_thread_core_info *t;
1607        struct elf_prpsinfo *psinfo;
1608        struct core_thread *ct;
1609        unsigned int i;
1610
1611        info->size = 0;
1612        info->thread = NULL;
1613
1614        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1615        if (psinfo == NULL) {
1616                info->psinfo.data = NULL; /* So we don't free this wrongly */
1617                return 0;
1618        }
1619
1620        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1621
1622        /*
1623         * Figure out how many notes we're going to need for each thread.
1624         */
1625        info->thread_notes = 0;
1626        for (i = 0; i < view->n; ++i)
1627                if (view->regsets[i].core_note_type != 0)
1628                        ++info->thread_notes;
1629
1630        /*
1631         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1632         * since it is our one special case.
1633         */
1634        if (unlikely(info->thread_notes == 0) ||
1635            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1636                WARN_ON(1);
1637                return 0;
1638        }
1639
1640        /*
1641         * Initialize the ELF file header.
1642         */
1643        fill_elf_header(elf, phdrs,
1644                        view->e_machine, view->e_flags);
1645
1646        /*
1647         * Allocate a structure for each thread.
1648         */
1649        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1650                t = kzalloc(offsetof(struct elf_thread_core_info,
1651                                     notes[info->thread_notes]),
1652                            GFP_KERNEL);
1653                if (unlikely(!t))
1654                        return 0;
1655
1656                t->task = ct->task;
1657                if (ct->task == dump_task || !info->thread) {
1658                        t->next = info->thread;
1659                        info->thread = t;
1660                } else {
1661                        /*
1662                         * Make sure to keep the original task at
1663                         * the head of the list.
1664                         */
1665                        t->next = info->thread->next;
1666                        info->thread->next = t;
1667                }
1668        }
1669
1670        /*
1671         * Now fill in each thread's information.
1672         */
1673        for (t = info->thread; t != NULL; t = t->next)
1674                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1675                        return 0;
1676
1677        /*
1678         * Fill in the two process-wide notes.
1679         */
1680        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1681        info->size += notesize(&info->psinfo);
1682
1683        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1684        info->size += notesize(&info->signote);
1685
1686        fill_auxv_note(&info->auxv, current->mm);
1687        info->size += notesize(&info->auxv);
1688
1689        fill_files_note(&info->files);
1690        info->size += notesize(&info->files);
1691
1692        return 1;
1693}
1694
1695static size_t get_note_info_size(struct elf_note_info *info)
1696{
1697        return info->size;
1698}
1699
1700/*
1701 * Write all the notes for each thread.  When writing the first thread, the
1702 * process-wide notes are interleaved after the first thread-specific note.
1703 */
1704static int write_note_info(struct elf_note_info *info,
1705                           struct file *file, loff_t *foffset)
1706{
1707        bool first = 1;
1708        struct elf_thread_core_info *t = info->thread;
1709
1710        do {
1711                int i;
1712
1713                if (!writenote(&t->notes[0], file, foffset))
1714                        return 0;
1715
1716                if (first && !writenote(&info->psinfo, file, foffset))
1717                        return 0;
1718                if (first && !writenote(&info->signote, file, foffset))
1719                        return 0;
1720                if (first && !writenote(&info->auxv, file, foffset))
1721                        return 0;
1722                if (first && !writenote(&info->files, file, foffset))
1723                        return 0;
1724
1725                for (i = 1; i < info->thread_notes; ++i)
1726                        if (t->notes[i].data &&
1727                            !writenote(&t->notes[i], file, foffset))
1728                                return 0;
1729
1730                first = 0;
1731                t = t->next;
1732        } while (t);
1733
1734        return 1;
1735}
1736
1737static void free_note_info(struct elf_note_info *info)
1738{
1739        struct elf_thread_core_info *threads = info->thread;
1740        while (threads) {
1741                unsigned int i;
1742                struct elf_thread_core_info *t = threads;
1743                threads = t->next;
1744                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1745                for (i = 1; i < info->thread_notes; ++i)
1746                        kfree(t->notes[i].data);
1747                kfree(t);
1748        }
1749        kfree(info->psinfo.data);
1750        vfree(info->files.data);
1751}
1752
1753#else
1754
1755/* Here is the structure in which status of each thread is captured. */
1756struct elf_thread_status
1757{
1758        struct list_head list;
1759        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1760        elf_fpregset_t fpu;             /* NT_PRFPREG */
1761        struct task_struct *thread;
1762#ifdef ELF_CORE_COPY_XFPREGS
1763        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1764#endif
1765        struct memelfnote notes[3];
1766        int num_notes;
1767};
1768
1769/*
1770 * In order to add the specific thread information for the elf file format,
1771 * we need to keep a linked list of every threads pr_status and then create
1772 * a single section for them in the final core file.
1773 */
1774static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1775{
1776        int sz = 0;
1777        struct task_struct *p = t->thread;
1778        t->num_notes = 0;
1779
1780        fill_prstatus(&t->prstatus, p, signr);
1781        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1782        
1783        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1784                  &(t->prstatus));
1785        t->num_notes++;
1786        sz += notesize(&t->notes[0]);
1787
1788        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1789                                                                &t->fpu))) {
1790                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1791                          &(t->fpu));
1792                t->num_notes++;
1793                sz += notesize(&t->notes[1]);
1794        }
1795
1796#ifdef ELF_CORE_COPY_XFPREGS
1797        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1798                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1799                          sizeof(t->xfpu), &t->xfpu);
1800                t->num_notes++;
1801                sz += notesize(&t->notes[2]);
1802        }
1803#endif  
1804        return sz;
1805}
1806
1807struct elf_note_info {
1808        struct memelfnote *notes;
1809        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1810        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1811        struct list_head thread_list;
1812        elf_fpregset_t *fpu;
1813#ifdef ELF_CORE_COPY_XFPREGS
1814        elf_fpxregset_t *xfpu;
1815#endif
1816        user_siginfo_t csigdata;
1817        int thread_status_size;
1818        int numnote;
1819};
1820
1821static int elf_note_info_init(struct elf_note_info *info)
1822{
1823        memset(info, 0, sizeof(*info));
1824        INIT_LIST_HEAD(&info->thread_list);
1825
1826        /* Allocate space for ELF notes */
1827        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1828        if (!info->notes)
1829                return 0;
1830        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1831        if (!info->psinfo)
1832                return 0;
1833        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1834        if (!info->prstatus)
1835                return 0;
1836        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1837        if (!info->fpu)
1838                return 0;
1839#ifdef ELF_CORE_COPY_XFPREGS
1840        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1841        if (!info->xfpu)
1842                return 0;
1843#endif
1844        return 1;
1845}
1846
1847static int fill_note_info(struct elfhdr *elf, int phdrs,
1848                          struct elf_note_info *info,
1849                          siginfo_t *siginfo, struct pt_regs *regs)
1850{
1851        struct list_head *t;
1852
1853        if (!elf_note_info_init(info))
1854                return 0;
1855
1856        if (siginfo->si_signo) {
1857                struct core_thread *ct;
1858                struct elf_thread_status *ets;
1859
1860                for (ct = current->mm->core_state->dumper.next;
1861                                                ct; ct = ct->next) {
1862                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1863                        if (!ets)
1864                                return 0;
1865
1866                        ets->thread = ct->task;
1867                        list_add(&ets->list, &info->thread_list);
1868                }
1869
1870                list_for_each(t, &info->thread_list) {
1871                        int sz;
1872
1873                        ets = list_entry(t, struct elf_thread_status, list);
1874                        sz = elf_dump_thread_status(siginfo->si_signo, ets);
1875                        info->thread_status_size += sz;
1876                }
1877        }
1878        /* now collect the dump for the current */
1879        memset(info->prstatus, 0, sizeof(*info->prstatus));
1880        fill_prstatus(info->prstatus, current, siginfo->si_signo);
1881        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1882
1883        /* Set up header */
1884        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1885
1886        /*
1887         * Set up the notes in similar form to SVR4 core dumps made
1888         * with info from their /proc.
1889         */
1890
1891        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1892                  sizeof(*info->prstatus), info->prstatus);
1893        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1894        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1895                  sizeof(*info->psinfo), info->psinfo);
1896
1897        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1898        fill_auxv_note(info->notes + 3, current->mm);
1899        fill_files_note(info->notes + 4);
1900
1901        info->numnote = 5;
1902
1903        /* Try to dump the FPU. */
1904        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1905                                                               info->fpu);
1906        if (info->prstatus->pr_fpvalid)
1907                fill_note(info->notes + info->numnote++,
1908                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1909#ifdef ELF_CORE_COPY_XFPREGS
1910        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1911                fill_note(info->notes + info->numnote++,
1912                          "LINUX", ELF_CORE_XFPREG_TYPE,
1913                          sizeof(*info->xfpu), info->xfpu);
1914#endif
1915
1916        return 1;
1917}
1918
1919static size_t get_note_info_size(struct elf_note_info *info)
1920{
1921        int sz = 0;
1922        int i;
1923
1924        for (i = 0; i < info->numnote; i++)
1925                sz += notesize(info->notes + i);
1926
1927        sz += info->thread_status_size;
1928
1929        return sz;
1930}
1931
1932static int write_note_info(struct elf_note_info *info,
1933                           struct file *file, loff_t *foffset)
1934{
1935        int i;
1936        struct list_head *t;
1937
1938        for (i = 0; i < info->numnote; i++)
1939                if (!writenote(info->notes + i, file, foffset))
1940                        return 0;
1941
1942        /* write out the thread status notes section */
1943        list_for_each(t, &info->thread_list) {
1944                struct elf_thread_status *tmp =
1945                                list_entry(t, struct elf_thread_status, list);
1946
1947                for (i = 0; i < tmp->num_notes; i++)
1948                        if (!writenote(&tmp->notes[i], file, foffset))
1949                                return 0;
1950        }
1951
1952        return 1;
1953}
1954
1955static void free_note_info(struct elf_note_info *info)
1956{
1957        while (!list_empty(&info->thread_list)) {
1958                struct list_head *tmp = info->thread_list.next;
1959                list_del(tmp);
1960                kfree(list_entry(tmp, struct elf_thread_status, list));
1961        }
1962
1963        /* Free data allocated by fill_files_note(): */
1964        vfree(info->notes[4].data);
1965
1966        kfree(info->prstatus);
1967        kfree(info->psinfo);
1968        kfree(info->notes);
1969        kfree(info->fpu);
1970#ifdef ELF_CORE_COPY_XFPREGS
1971        kfree(info->xfpu);
1972#endif
1973}
1974
1975#endif
1976
1977static struct vm_area_struct *first_vma(struct task_struct *tsk,
1978                                        struct vm_area_struct *gate_vma)
1979{
1980        struct vm_area_struct *ret = tsk->mm->mmap;
1981
1982        if (ret)
1983                return ret;
1984        return gate_vma;
1985}
1986/*
1987 * Helper function for iterating across a vma list.  It ensures that the caller
1988 * will visit `gate_vma' prior to terminating the search.
1989 */
1990static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1991                                        struct vm_area_struct *gate_vma)
1992{
1993        struct vm_area_struct *ret;
1994
1995        ret = this_vma->vm_next;
1996        if (ret)
1997                return ret;
1998        if (this_vma == gate_vma)
1999                return NULL;
2000        return gate_vma;
2001}
2002
2003static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2004                             elf_addr_t e_shoff, int segs)
2005{
2006        elf->e_shoff = e_shoff;
2007        elf->e_shentsize = sizeof(*shdr4extnum);
2008        elf->e_shnum = 1;
2009        elf->e_shstrndx = SHN_UNDEF;
2010
2011        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2012
2013        shdr4extnum->sh_type = SHT_NULL;
2014        shdr4extnum->sh_size = elf->e_shnum;
2015        shdr4extnum->sh_link = elf->e_shstrndx;
2016        shdr4extnum->sh_info = segs;
2017}
2018
2019static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2020                                     unsigned long mm_flags)
2021{
2022        struct vm_area_struct *vma;
2023        size_t size = 0;
2024
2025        for (vma = first_vma(current, gate_vma); vma != NULL;
2026             vma = next_vma(vma, gate_vma))
2027                size += vma_dump_size(vma, mm_flags);
2028        return size;
2029}
2030
2031/*
2032 * Actual dumper
2033 *
2034 * This is a two-pass process; first we find the offsets of the bits,
2035 * and then they are actually written out.  If we run out of core limit
2036 * we just truncate.
2037 */
2038static int elf_core_dump(struct coredump_params *cprm)
2039{
2040        int has_dumped = 0;
2041        mm_segment_t fs;
2042        int segs;
2043        size_t size = 0;
2044        struct vm_area_struct *vma, *gate_vma;
2045        struct elfhdr *elf = NULL;
2046        loff_t offset = 0, dataoff, foffset;
2047        struct elf_note_info info;
2048        struct elf_phdr *phdr4note = NULL;
2049        struct elf_shdr *shdr4extnum = NULL;
2050        Elf_Half e_phnum;
2051        elf_addr_t e_shoff;
2052
2053        /*
2054         * We no longer stop all VM operations.
2055         * 
2056         * This is because those proceses that could possibly change map_count
2057         * or the mmap / vma pages are now blocked in do_exit on current
2058         * finishing this core dump.
2059         *
2060         * Only ptrace can touch these memory addresses, but it doesn't change
2061         * the map_count or the pages allocated. So no possibility of crashing
2062         * exists while dumping the mm->vm_next areas to the core file.
2063         */
2064  
2065        /* alloc memory for large data structures: too large to be on stack */
2066        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2067        if (!elf)
2068                goto out;
2069        /*
2070         * The number of segs are recored into ELF header as 16bit value.
2071         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2072         */
2073        segs = current->mm->map_count;
2074        segs += elf_core_extra_phdrs();
2075
2076        gate_vma = get_gate_vma(current->mm);
2077        if (gate_vma != NULL)
2078                segs++;
2079
2080        /* for notes section */
2081        segs++;
2082
2083        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2084         * this, kernel supports extended numbering. Have a look at
2085         * include/linux/elf.h for further information. */
2086        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2087
2088        /*
2089         * Collect all the non-memory information about the process for the
2090         * notes.  This also sets up the file header.
2091         */
2092        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2093                goto cleanup;
2094
2095        has_dumped = 1;
2096
2097        fs = get_fs();
2098        set_fs(KERNEL_DS);
2099
2100        offset += sizeof(*elf);                         /* Elf header */
2101        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2102        foffset = offset;
2103
2104        /* Write notes phdr entry */
2105        {
2106                size_t sz = get_note_info_size(&info);
2107
2108                sz += elf_coredump_extra_notes_size();
2109
2110                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2111                if (!phdr4note)
2112                        goto end_coredump;
2113
2114                fill_elf_note_phdr(phdr4note, sz, offset);
2115                offset += sz;
2116        }
2117
2118        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2119
2120        offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2121        offset += elf_core_extra_data_size();
2122        e_shoff = offset;
2123
2124        if (e_phnum == PN_XNUM) {
2125                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2126                if (!shdr4extnum)
2127                        goto end_coredump;
2128                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2129        }
2130
2131        offset = dataoff;
2132
2133        size += sizeof(*elf);
2134        if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2135                goto end_coredump;
2136
2137        size += sizeof(*phdr4note);
2138        if (size > cprm->limit
2139            || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2140                goto end_coredump;
2141
2142        /* Write program headers for segments dump */
2143        for (vma = first_vma(current, gate_vma); vma != NULL;
2144                        vma = next_vma(vma, gate_vma)) {
2145                struct elf_phdr phdr;
2146
2147                phdr.p_type = PT_LOAD;
2148                phdr.p_offset = offset;
2149                phdr.p_vaddr = vma->vm_start;
2150                phdr.p_paddr = 0;
2151                phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2152                phdr.p_memsz = vma->vm_end - vma->vm_start;
2153                offset += phdr.p_filesz;
2154                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2155                if (vma->vm_flags & VM_WRITE)
2156                        phdr.p_flags |= PF_W;
2157                if (vma->vm_flags & VM_EXEC)
2158                        phdr.p_flags |= PF_X;
2159                phdr.p_align = ELF_EXEC_PAGESIZE;
2160
2161                size += sizeof(phdr);
2162                if (size > cprm->limit
2163                    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2164                        goto end_coredump;
2165        }
2166
2167        if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2168                goto end_coredump;
2169
2170        /* write out the notes section */
2171        if (!write_note_info(&info, cprm->file, &foffset))
2172                goto end_coredump;
2173
2174        if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2175                goto end_coredump;
2176
2177        /* Align to page */
2178        if (!dump_seek(cprm->file, dataoff - foffset))
2179                goto end_coredump;
2180
2181        for (vma = first_vma(current, gate_vma); vma != NULL;
2182                        vma = next_vma(vma, gate_vma)) {
2183                unsigned long addr;
2184                unsigned long end;
2185
2186                end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2187
2188                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2189                        struct page *page;
2190                        int stop;
2191
2192                        page = get_dump_page(addr);
2193                        if (page) {
2194                                void *kaddr = kmap(page);
2195                                stop = ((size += PAGE_SIZE) > cprm->limit) ||
2196                                        !dump_write(cprm->file, kaddr,
2197                                                    PAGE_SIZE);
2198                                kunmap(page);
2199                                page_cache_release(page);
2200                        } else
2201                                stop = !dump_seek(cprm->file, PAGE_SIZE);
2202                        if (stop)
2203                                goto end_coredump;
2204                }
2205        }
2206
2207        if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2208                goto end_coredump;
2209
2210        if (e_phnum == PN_XNUM) {
2211                size += sizeof(*shdr4extnum);
2212                if (size > cprm->limit
2213                    || !dump_write(cprm->file, shdr4extnum,
2214                                   sizeof(*shdr4extnum)))
2215                        goto end_coredump;
2216        }
2217
2218end_coredump:
2219        set_fs(fs);
2220
2221cleanup:
2222        free_note_info(&info);
2223        kfree(shdr4extnum);
2224        kfree(phdr4note);
2225        kfree(elf);
2226out:
2227        return has_dumped;
2228}
2229
2230#endif          /* CONFIG_ELF_CORE */
2231
2232static int __init init_elf_binfmt(void)
2233{
2234        register_binfmt(&elf_format);
2235        return 0;
2236}
2237
2238static void __exit exit_elf_binfmt(void)
2239{
2240        /* Remove the COFF and ELF loaders. */
2241        unregister_binfmt(&elf_format);
2242}
2243
2244core_initcall(init_elf_binfmt);
2245module_exit(exit_elf_binfmt);
2246MODULE_LICENSE("GPL");
2247