linux/fs/binfmt_elf.c
<<
>>
Prefs
   1/*
   2 * linux/fs/binfmt_elf.c
   3 *
   4 * These are the functions used to load ELF format executables as used
   5 * on SVr4 machines.  Information on the format may be found in the book
   6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
   7 * Tools".
   8 *
   9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
  10 */
  11
  12#include <linux/module.h>
  13#include <linux/kernel.h>
  14#include <linux/fs.h>
  15#include <linux/mm.h>
  16#include <linux/mman.h>
  17#include <linux/errno.h>
  18#include <linux/signal.h>
  19#include <linux/binfmts.h>
  20#include <linux/string.h>
  21#include <linux/file.h>
  22#include <linux/slab.h>
  23#include <linux/personality.h>
  24#include <linux/elfcore.h>
  25#include <linux/init.h>
  26#include <linux/highuid.h>
  27#include <linux/compiler.h>
  28#include <linux/highmem.h>
  29#include <linux/pagemap.h>
  30#include <linux/vmalloc.h>
  31#include <linux/security.h>
  32#include <linux/random.h>
  33#include <linux/elf.h>
  34#include <linux/utsname.h>
  35#include <linux/coredump.h>
  36#include <linux/sched.h>
  37#include <asm/uaccess.h>
  38#include <asm/param.h>
  39#include <asm/page.h>
  40
  41#ifndef user_long_t
  42#define user_long_t long
  43#endif
  44#ifndef user_siginfo_t
  45#define user_siginfo_t siginfo_t
  46#endif
  47
  48static int load_elf_binary(struct linux_binprm *bprm);
  49static int load_elf_library(struct file *);
  50static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
  51                                int, int, unsigned long);
  52
  53/*
  54 * If we don't support core dumping, then supply a NULL so we
  55 * don't even try.
  56 */
  57#ifdef CONFIG_ELF_CORE
  58static int elf_core_dump(struct coredump_params *cprm);
  59#else
  60#define elf_core_dump   NULL
  61#endif
  62
  63#if ELF_EXEC_PAGESIZE > PAGE_SIZE
  64#define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
  65#else
  66#define ELF_MIN_ALIGN   PAGE_SIZE
  67#endif
  68
  69#ifndef ELF_CORE_EFLAGS
  70#define ELF_CORE_EFLAGS 0
  71#endif
  72
  73#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
  74#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
  75#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
  76
  77static struct linux_binfmt elf_format = {
  78        .module         = THIS_MODULE,
  79        .load_binary    = load_elf_binary,
  80        .load_shlib     = load_elf_library,
  81        .core_dump      = elf_core_dump,
  82        .min_coredump   = ELF_EXEC_PAGESIZE,
  83};
  84
  85#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
  86
  87static int set_brk(unsigned long start, unsigned long end)
  88{
  89        start = ELF_PAGEALIGN(start);
  90        end = ELF_PAGEALIGN(end);
  91        if (end > start) {
  92                unsigned long addr;
  93                addr = vm_brk(start, end - start);
  94                if (BAD_ADDR(addr))
  95                        return addr;
  96        }
  97        current->mm->start_brk = current->mm->brk = end;
  98        return 0;
  99}
 100
 101/* We need to explicitly zero any fractional pages
 102   after the data section (i.e. bss).  This would
 103   contain the junk from the file that should not
 104   be in memory
 105 */
 106static int padzero(unsigned long elf_bss)
 107{
 108        unsigned long nbyte;
 109
 110        nbyte = ELF_PAGEOFFSET(elf_bss);
 111        if (nbyte) {
 112                nbyte = ELF_MIN_ALIGN - nbyte;
 113                if (clear_user((void __user *) elf_bss, nbyte))
 114                        return -EFAULT;
 115        }
 116        return 0;
 117}
 118
 119/* Let's use some macros to make this stack manipulation a little clearer */
 120#ifdef CONFIG_STACK_GROWSUP
 121#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
 122#define STACK_ROUND(sp, items) \
 123        ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
 124#define STACK_ALLOC(sp, len) ({ \
 125        elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
 126        old_sp; })
 127#else
 128#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
 129#define STACK_ROUND(sp, items) \
 130        (((unsigned long) (sp - items)) &~ 15UL)
 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
 132#endif
 133
 134#ifndef ELF_BASE_PLATFORM
 135/*
 136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
 137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
 138 * will be copied to the user stack in the same manner as AT_PLATFORM.
 139 */
 140#define ELF_BASE_PLATFORM NULL
 141#endif
 142
 143static int
 144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 145                unsigned long load_addr, unsigned long interp_load_addr)
 146{
 147        unsigned long p = bprm->p;
 148        int argc = bprm->argc;
 149        int envc = bprm->envc;
 150        elf_addr_t __user *argv;
 151        elf_addr_t __user *envp;
 152        elf_addr_t __user *sp;
 153        elf_addr_t __user *u_platform;
 154        elf_addr_t __user *u_base_platform;
 155        elf_addr_t __user *u_rand_bytes;
 156        const char *k_platform = ELF_PLATFORM;
 157        const char *k_base_platform = ELF_BASE_PLATFORM;
 158        unsigned char k_rand_bytes[16];
 159        int items;
 160        elf_addr_t *elf_info;
 161        int ei_index = 0;
 162        const struct cred *cred = current_cred();
 163        struct vm_area_struct *vma;
 164
 165        /*
 166         * In some cases (e.g. Hyper-Threading), we want to avoid L1
 167         * evictions by the processes running on the same package. One
 168         * thing we can do is to shuffle the initial stack for them.
 169         */
 170
 171        p = arch_align_stack(p);
 172
 173        /*
 174         * If this architecture has a platform capability string, copy it
 175         * to userspace.  In some cases (Sparc), this info is impossible
 176         * for userspace to get any other way, in others (i386) it is
 177         * merely difficult.
 178         */
 179        u_platform = NULL;
 180        if (k_platform) {
 181                size_t len = strlen(k_platform) + 1;
 182
 183                u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 184                if (__copy_to_user(u_platform, k_platform, len))
 185                        return -EFAULT;
 186        }
 187
 188        /*
 189         * If this architecture has a "base" platform capability
 190         * string, copy it to userspace.
 191         */
 192        u_base_platform = NULL;
 193        if (k_base_platform) {
 194                size_t len = strlen(k_base_platform) + 1;
 195
 196                u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
 197                if (__copy_to_user(u_base_platform, k_base_platform, len))
 198                        return -EFAULT;
 199        }
 200
 201        /*
 202         * Generate 16 random bytes for userspace PRNG seeding.
 203         */
 204        get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
 205        u_rand_bytes = (elf_addr_t __user *)
 206                       STACK_ALLOC(p, sizeof(k_rand_bytes));
 207        if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
 208                return -EFAULT;
 209
 210        /* Create the ELF interpreter info */
 211        elf_info = (elf_addr_t *)current->mm->saved_auxv;
 212        /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
 213#define NEW_AUX_ENT(id, val) \
 214        do { \
 215                elf_info[ei_index++] = id; \
 216                elf_info[ei_index++] = val; \
 217        } while (0)
 218
 219#ifdef ARCH_DLINFO
 220        /* 
 221         * ARCH_DLINFO must come first so PPC can do its special alignment of
 222         * AUXV.
 223         * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
 224         * ARCH_DLINFO changes
 225         */
 226        ARCH_DLINFO;
 227#endif
 228        NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
 229        NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
 230        NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
 231        NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
 232        NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
 233        NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
 234        NEW_AUX_ENT(AT_BASE, interp_load_addr);
 235        NEW_AUX_ENT(AT_FLAGS, 0);
 236        NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
 237        NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
 238        NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
 239        NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
 240        NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
 241        NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 242        NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
 243#ifdef ELF_HWCAP2
 244        NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
 245#endif
 246        NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 247        if (k_platform) {
 248                NEW_AUX_ENT(AT_PLATFORM,
 249                            (elf_addr_t)(unsigned long)u_platform);
 250        }
 251        if (k_base_platform) {
 252                NEW_AUX_ENT(AT_BASE_PLATFORM,
 253                            (elf_addr_t)(unsigned long)u_base_platform);
 254        }
 255        if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
 256                NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
 257        }
 258#undef NEW_AUX_ENT
 259        /* AT_NULL is zero; clear the rest too */
 260        memset(&elf_info[ei_index], 0,
 261               sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
 262
 263        /* And advance past the AT_NULL entry.  */
 264        ei_index += 2;
 265
 266        sp = STACK_ADD(p, ei_index);
 267
 268        items = (argc + 1) + (envc + 1) + 1;
 269        bprm->p = STACK_ROUND(sp, items);
 270
 271        /* Point sp at the lowest address on the stack */
 272#ifdef CONFIG_STACK_GROWSUP
 273        sp = (elf_addr_t __user *)bprm->p - items - ei_index;
 274        bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
 275#else
 276        sp = (elf_addr_t __user *)bprm->p;
 277#endif
 278
 279
 280        /*
 281         * Grow the stack manually; some architectures have a limit on how
 282         * far ahead a user-space access may be in order to grow the stack.
 283         */
 284        vma = find_extend_vma(current->mm, bprm->p);
 285        if (!vma)
 286                return -EFAULT;
 287
 288        /* Now, let's put argc (and argv, envp if appropriate) on the stack */
 289        if (__put_user(argc, sp++))
 290                return -EFAULT;
 291        argv = sp;
 292        envp = argv + argc + 1;
 293
 294        /* Populate argv and envp */
 295        p = current->mm->arg_end = current->mm->arg_start;
 296        while (argc-- > 0) {
 297                size_t len;
 298                if (__put_user((elf_addr_t)p, argv++))
 299                        return -EFAULT;
 300                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 301                if (!len || len > MAX_ARG_STRLEN)
 302                        return -EINVAL;
 303                p += len;
 304        }
 305        if (__put_user(0, argv))
 306                return -EFAULT;
 307        current->mm->arg_end = current->mm->env_start = p;
 308        while (envc-- > 0) {
 309                size_t len;
 310                if (__put_user((elf_addr_t)p, envp++))
 311                        return -EFAULT;
 312                len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
 313                if (!len || len > MAX_ARG_STRLEN)
 314                        return -EINVAL;
 315                p += len;
 316        }
 317        if (__put_user(0, envp))
 318                return -EFAULT;
 319        current->mm->env_end = p;
 320
 321        /* Put the elf_info on the stack in the right place.  */
 322        sp = (elf_addr_t __user *)envp + 1;
 323        if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
 324                return -EFAULT;
 325        return 0;
 326}
 327
 328#ifndef elf_map
 329
 330static unsigned long elf_map(struct file *filep, unsigned long addr,
 331                struct elf_phdr *eppnt, int prot, int type,
 332                unsigned long total_size)
 333{
 334        unsigned long map_addr;
 335        unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
 336        unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
 337        addr = ELF_PAGESTART(addr);
 338        size = ELF_PAGEALIGN(size);
 339
 340        /* mmap() will return -EINVAL if given a zero size, but a
 341         * segment with zero filesize is perfectly valid */
 342        if (!size)
 343                return addr;
 344
 345        /*
 346        * total_size is the size of the ELF (interpreter) image.
 347        * The _first_ mmap needs to know the full size, otherwise
 348        * randomization might put this image into an overlapping
 349        * position with the ELF binary image. (since size < total_size)
 350        * So we first map the 'big' image - and unmap the remainder at
 351        * the end. (which unmap is needed for ELF images with holes.)
 352        */
 353        if (total_size) {
 354                total_size = ELF_PAGEALIGN(total_size);
 355                map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
 356                if (!BAD_ADDR(map_addr))
 357                        vm_munmap(map_addr+size, total_size-size);
 358        } else
 359                map_addr = vm_mmap(filep, addr, size, prot, type, off);
 360
 361        return(map_addr);
 362}
 363
 364#endif /* !elf_map */
 365
 366static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 367{
 368        int i, first_idx = -1, last_idx = -1;
 369
 370        for (i = 0; i < nr; i++) {
 371                if (cmds[i].p_type == PT_LOAD) {
 372                        last_idx = i;
 373                        if (first_idx == -1)
 374                                first_idx = i;
 375                }
 376        }
 377        if (first_idx == -1)
 378                return 0;
 379
 380        return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
 381                                ELF_PAGESTART(cmds[first_idx].p_vaddr);
 382}
 383
 384
 385/* This is much more generalized than the library routine read function,
 386   so we keep this separate.  Technically the library read function
 387   is only provided so that we can read a.out libraries that have
 388   an ELF header */
 389
 390static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 391                struct file *interpreter, unsigned long *interp_map_addr,
 392                unsigned long no_base)
 393{
 394        struct elf_phdr *elf_phdata;
 395        struct elf_phdr *eppnt;
 396        unsigned long load_addr = 0;
 397        int load_addr_set = 0;
 398        unsigned long last_bss = 0, elf_bss = 0;
 399        unsigned long error = ~0UL;
 400        unsigned long total_size;
 401        int retval, i, size;
 402
 403        /* First of all, some simple consistency checks */
 404        if (interp_elf_ex->e_type != ET_EXEC &&
 405            interp_elf_ex->e_type != ET_DYN)
 406                goto out;
 407        if (!elf_check_arch(interp_elf_ex))
 408                goto out;
 409        if (!interpreter->f_op || !interpreter->f_op->mmap)
 410                goto out;
 411
 412        /*
 413         * If the size of this structure has changed, then punt, since
 414         * we will be doing the wrong thing.
 415         */
 416        if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
 417                goto out;
 418        if (interp_elf_ex->e_phnum < 1 ||
 419                interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
 420                goto out;
 421
 422        /* Now read in all of the header information */
 423        size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
 424        if (size > ELF_MIN_ALIGN)
 425                goto out;
 426        elf_phdata = kmalloc(size, GFP_KERNEL);
 427        if (!elf_phdata)
 428                goto out;
 429
 430        retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
 431                             (char *)elf_phdata, size);
 432        error = -EIO;
 433        if (retval != size) {
 434                if (retval < 0)
 435                        error = retval; 
 436                goto out_close;
 437        }
 438
 439        total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
 440        if (!total_size) {
 441                error = -EINVAL;
 442                goto out_close;
 443        }
 444
 445        eppnt = elf_phdata;
 446        for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
 447                if (eppnt->p_type == PT_LOAD) {
 448                        int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
 449                        int elf_prot = 0;
 450                        unsigned long vaddr = 0;
 451                        unsigned long k, map_addr;
 452
 453                        if (eppnt->p_flags & PF_R)
 454                                elf_prot = PROT_READ;
 455                        if (eppnt->p_flags & PF_W)
 456                                elf_prot |= PROT_WRITE;
 457                        if (eppnt->p_flags & PF_X)
 458                                elf_prot |= PROT_EXEC;
 459                        vaddr = eppnt->p_vaddr;
 460                        if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
 461                                elf_type |= MAP_FIXED;
 462                        else if (no_base && interp_elf_ex->e_type == ET_DYN)
 463                                load_addr = -vaddr;
 464
 465                        map_addr = elf_map(interpreter, load_addr + vaddr,
 466                                        eppnt, elf_prot, elf_type, total_size);
 467                        total_size = 0;
 468                        if (!*interp_map_addr)
 469                                *interp_map_addr = map_addr;
 470                        error = map_addr;
 471                        if (BAD_ADDR(map_addr))
 472                                goto out_close;
 473
 474                        if (!load_addr_set &&
 475                            interp_elf_ex->e_type == ET_DYN) {
 476                                load_addr = map_addr - ELF_PAGESTART(vaddr);
 477                                load_addr_set = 1;
 478                        }
 479
 480                        /*
 481                         * Check to see if the section's size will overflow the
 482                         * allowed task size. Note that p_filesz must always be
 483                         * <= p_memsize so it's only necessary to check p_memsz.
 484                         */
 485                        k = load_addr + eppnt->p_vaddr;
 486                        if (BAD_ADDR(k) ||
 487                            eppnt->p_filesz > eppnt->p_memsz ||
 488                            eppnt->p_memsz > TASK_SIZE ||
 489                            TASK_SIZE - eppnt->p_memsz < k) {
 490                                error = -ENOMEM;
 491                                goto out_close;
 492                        }
 493
 494                        /*
 495                         * Find the end of the file mapping for this phdr, and
 496                         * keep track of the largest address we see for this.
 497                         */
 498                        k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
 499                        if (k > elf_bss)
 500                                elf_bss = k;
 501
 502                        /*
 503                         * Do the same thing for the memory mapping - between
 504                         * elf_bss and last_bss is the bss section.
 505                         */
 506                        k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
 507                        if (k > last_bss)
 508                                last_bss = k;
 509                }
 510        }
 511
 512        if (last_bss > elf_bss) {
 513                /*
 514                 * Now fill out the bss section.  First pad the last page up
 515                 * to the page boundary, and then perform a mmap to make sure
 516                 * that there are zero-mapped pages up to and including the
 517                 * last bss page.
 518                 */
 519                if (padzero(elf_bss)) {
 520                        error = -EFAULT;
 521                        goto out_close;
 522                }
 523
 524                /* What we have mapped so far */
 525                elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 526
 527                /* Map the last of the bss segment */
 528                error = vm_brk(elf_bss, last_bss - elf_bss);
 529                if (BAD_ADDR(error))
 530                        goto out_close;
 531        }
 532
 533        error = load_addr;
 534
 535out_close:
 536        kfree(elf_phdata);
 537out:
 538        return error;
 539}
 540
 541/*
 542 * These are the functions used to load ELF style executables and shared
 543 * libraries.  There is no binary dependent code anywhere else.
 544 */
 545
 546#define INTERPRETER_NONE 0
 547#define INTERPRETER_ELF 2
 548
 549#ifndef STACK_RND_MASK
 550#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
 551#endif
 552
 553static unsigned long randomize_stack_top(unsigned long stack_top)
 554{
 555        unsigned int random_variable = 0;
 556
 557        if ((current->flags & PF_RANDOMIZE) &&
 558                !(current->personality & ADDR_NO_RANDOMIZE)) {
 559                random_variable = get_random_int() & STACK_RND_MASK;
 560                random_variable <<= PAGE_SHIFT;
 561        }
 562#ifdef CONFIG_STACK_GROWSUP
 563        return PAGE_ALIGN(stack_top) + random_variable;
 564#else
 565        return PAGE_ALIGN(stack_top) - random_variable;
 566#endif
 567}
 568
 569static int load_elf_binary(struct linux_binprm *bprm)
 570{
 571        struct file *interpreter = NULL; /* to shut gcc up */
 572        unsigned long load_addr = 0, load_bias = 0;
 573        int load_addr_set = 0;
 574        char * elf_interpreter = NULL;
 575        unsigned long error;
 576        struct elf_phdr *elf_ppnt, *elf_phdata;
 577        unsigned long elf_bss, elf_brk;
 578        int retval, i;
 579        unsigned int size;
 580        unsigned long elf_entry;
 581        unsigned long interp_load_addr = 0;
 582        unsigned long start_code, end_code, start_data, end_data;
 583        unsigned long reloc_func_desc __maybe_unused = 0;
 584        int executable_stack = EXSTACK_DEFAULT;
 585        unsigned long def_flags = 0;
 586        struct pt_regs *regs = current_pt_regs();
 587        struct {
 588                struct elfhdr elf_ex;
 589                struct elfhdr interp_elf_ex;
 590        } *loc;
 591
 592        loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 593        if (!loc) {
 594                retval = -ENOMEM;
 595                goto out_ret;
 596        }
 597        
 598        /* Get the exec-header */
 599        loc->elf_ex = *((struct elfhdr *)bprm->buf);
 600
 601        retval = -ENOEXEC;
 602        /* First of all, some simple consistency checks */
 603        if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 604                goto out;
 605
 606        if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
 607                goto out;
 608        if (!elf_check_arch(&loc->elf_ex))
 609                goto out;
 610        if (!bprm->file->f_op || !bprm->file->f_op->mmap)
 611                goto out;
 612
 613        /* Now read in all of the header information */
 614        if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
 615                goto out;
 616        if (loc->elf_ex.e_phnum < 1 ||
 617                loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
 618                goto out;
 619        size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
 620        retval = -ENOMEM;
 621        elf_phdata = kmalloc(size, GFP_KERNEL);
 622        if (!elf_phdata)
 623                goto out;
 624
 625        retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
 626                             (char *)elf_phdata, size);
 627        if (retval != size) {
 628                if (retval >= 0)
 629                        retval = -EIO;
 630                goto out_free_ph;
 631        }
 632
 633        elf_ppnt = elf_phdata;
 634        elf_bss = 0;
 635        elf_brk = 0;
 636
 637        start_code = ~0UL;
 638        end_code = 0;
 639        start_data = 0;
 640        end_data = 0;
 641
 642        for (i = 0; i < loc->elf_ex.e_phnum; i++) {
 643                if (elf_ppnt->p_type == PT_INTERP) {
 644                        /* This is the program interpreter used for
 645                         * shared libraries - for now assume that this
 646                         * is an a.out format binary
 647                         */
 648                        retval = -ENOEXEC;
 649                        if (elf_ppnt->p_filesz > PATH_MAX || 
 650                            elf_ppnt->p_filesz < 2)
 651                                goto out_free_ph;
 652
 653                        retval = -ENOMEM;
 654                        elf_interpreter = kmalloc(elf_ppnt->p_filesz,
 655                                                  GFP_KERNEL);
 656                        if (!elf_interpreter)
 657                                goto out_free_ph;
 658
 659                        retval = kernel_read(bprm->file, elf_ppnt->p_offset,
 660                                             elf_interpreter,
 661                                             elf_ppnt->p_filesz);
 662                        if (retval != elf_ppnt->p_filesz) {
 663                                if (retval >= 0)
 664                                        retval = -EIO;
 665                                goto out_free_interp;
 666                        }
 667                        /* make sure path is NULL terminated */
 668                        retval = -ENOEXEC;
 669                        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
 670                                goto out_free_interp;
 671
 672                        interpreter = open_exec(elf_interpreter);
 673                        retval = PTR_ERR(interpreter);
 674                        if (IS_ERR(interpreter))
 675                                goto out_free_interp;
 676
 677                        /*
 678                         * If the binary is not readable then enforce
 679                         * mm->dumpable = 0 regardless of the interpreter's
 680                         * permissions.
 681                         */
 682                        would_dump(bprm, interpreter);
 683
 684                        retval = kernel_read(interpreter, 0, bprm->buf,
 685                                             BINPRM_BUF_SIZE);
 686                        if (retval != BINPRM_BUF_SIZE) {
 687                                if (retval >= 0)
 688                                        retval = -EIO;
 689                                goto out_free_dentry;
 690                        }
 691
 692                        /* Get the exec headers */
 693                        loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
 694                        break;
 695                }
 696                elf_ppnt++;
 697        }
 698
 699        elf_ppnt = elf_phdata;
 700        for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
 701                if (elf_ppnt->p_type == PT_GNU_STACK) {
 702                        if (elf_ppnt->p_flags & PF_X)
 703                                executable_stack = EXSTACK_ENABLE_X;
 704                        else
 705                                executable_stack = EXSTACK_DISABLE_X;
 706                        break;
 707                }
 708
 709        /* Some simple consistency checks for the interpreter */
 710        if (elf_interpreter) {
 711                retval = -ELIBBAD;
 712                /* Not an ELF interpreter */
 713                if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
 714                        goto out_free_dentry;
 715                /* Verify the interpreter has a valid arch */
 716                if (!elf_check_arch(&loc->interp_elf_ex))
 717                        goto out_free_dentry;
 718        }
 719
 720        /* Flush all traces of the currently running executable */
 721        retval = flush_old_exec(bprm);
 722        if (retval)
 723                goto out_free_dentry;
 724
 725        /* OK, This is the point of no return */
 726        current->mm->def_flags = def_flags;
 727
 728        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
 729           may depend on the personality.  */
 730        SET_PERSONALITY(loc->elf_ex);
 731        if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 732                current->personality |= READ_IMPLIES_EXEC;
 733
 734        if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 735                current->flags |= PF_RANDOMIZE;
 736
 737        setup_new_exec(bprm);
 738
 739        /* Do this so that we can load the interpreter, if need be.  We will
 740           change some of these later */
 741        current->mm->free_area_cache = current->mm->mmap_base;
 742        current->mm->cached_hole_size = 0;
 743        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
 744                                 executable_stack);
 745        if (retval < 0) {
 746                send_sig(SIGKILL, current, 0);
 747                goto out_free_dentry;
 748        }
 749        
 750        current->mm->start_stack = bprm->p;
 751
 752        /* Now we do a little grungy work by mmapping the ELF image into
 753           the correct location in memory. */
 754        for(i = 0, elf_ppnt = elf_phdata;
 755            i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
 756                int elf_prot = 0, elf_flags;
 757                unsigned long k, vaddr;
 758
 759                if (elf_ppnt->p_type != PT_LOAD)
 760                        continue;
 761
 762                if (unlikely (elf_brk > elf_bss)) {
 763                        unsigned long nbyte;
 764                    
 765                        /* There was a PT_LOAD segment with p_memsz > p_filesz
 766                           before this one. Map anonymous pages, if needed,
 767                           and clear the area.  */
 768                        retval = set_brk(elf_bss + load_bias,
 769                                         elf_brk + load_bias);
 770                        if (retval) {
 771                                send_sig(SIGKILL, current, 0);
 772                                goto out_free_dentry;
 773                        }
 774                        nbyte = ELF_PAGEOFFSET(elf_bss);
 775                        if (nbyte) {
 776                                nbyte = ELF_MIN_ALIGN - nbyte;
 777                                if (nbyte > elf_brk - elf_bss)
 778                                        nbyte = elf_brk - elf_bss;
 779                                if (clear_user((void __user *)elf_bss +
 780                                                        load_bias, nbyte)) {
 781                                        /*
 782                                         * This bss-zeroing can fail if the ELF
 783                                         * file specifies odd protections. So
 784                                         * we don't check the return value
 785                                         */
 786                                }
 787                        }
 788                }
 789
 790                if (elf_ppnt->p_flags & PF_R)
 791                        elf_prot |= PROT_READ;
 792                if (elf_ppnt->p_flags & PF_W)
 793                        elf_prot |= PROT_WRITE;
 794                if (elf_ppnt->p_flags & PF_X)
 795                        elf_prot |= PROT_EXEC;
 796
 797                elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
 798
 799                vaddr = elf_ppnt->p_vaddr;
 800                if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
 801                        elf_flags |= MAP_FIXED;
 802                } else if (loc->elf_ex.e_type == ET_DYN) {
 803                        /* Try and get dynamic programs out of the way of the
 804                         * default mmap base, as well as whatever program they
 805                         * might try to exec.  This is because the brk will
 806                         * follow the loader, and is not movable.  */
 807#ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
 808                        /* Memory randomization might have been switched off
 809                         * in runtime via sysctl or explicit setting of
 810                         * personality flags.
 811                         * If that is the case, retain the original non-zero
 812                         * load_bias value in order to establish proper
 813                         * non-randomized mappings.
 814                         */
 815                        if (current->flags & PF_RANDOMIZE)
 816                                load_bias = 0;
 817                        else
 818                                load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 819#else
 820                        load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
 821#endif
 822                }
 823
 824                error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
 825                                elf_prot, elf_flags, 0);
 826                if (BAD_ADDR(error)) {
 827                        send_sig(SIGKILL, current, 0);
 828                        retval = IS_ERR((void *)error) ?
 829                                PTR_ERR((void*)error) : -EINVAL;
 830                        goto out_free_dentry;
 831                }
 832
 833                if (!load_addr_set) {
 834                        load_addr_set = 1;
 835                        load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
 836                        if (loc->elf_ex.e_type == ET_DYN) {
 837                                load_bias += error -
 838                                             ELF_PAGESTART(load_bias + vaddr);
 839                                load_addr += load_bias;
 840                                reloc_func_desc = load_bias;
 841                        }
 842                }
 843                k = elf_ppnt->p_vaddr;
 844                if (k < start_code)
 845                        start_code = k;
 846                if (start_data < k)
 847                        start_data = k;
 848
 849                /*
 850                 * Check to see if the section's size will overflow the
 851                 * allowed task size. Note that p_filesz must always be
 852                 * <= p_memsz so it is only necessary to check p_memsz.
 853                 */
 854                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
 855                    elf_ppnt->p_memsz > TASK_SIZE ||
 856                    TASK_SIZE - elf_ppnt->p_memsz < k) {
 857                        /* set_brk can never work. Avoid overflows. */
 858                        send_sig(SIGKILL, current, 0);
 859                        retval = -EINVAL;
 860                        goto out_free_dentry;
 861                }
 862
 863                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 864
 865                if (k > elf_bss)
 866                        elf_bss = k;
 867                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
 868                        end_code = k;
 869                if (end_data < k)
 870                        end_data = k;
 871                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
 872                if (k > elf_brk)
 873                        elf_brk = k;
 874        }
 875
 876        loc->elf_ex.e_entry += load_bias;
 877        elf_bss += load_bias;
 878        elf_brk += load_bias;
 879        start_code += load_bias;
 880        end_code += load_bias;
 881        start_data += load_bias;
 882        end_data += load_bias;
 883
 884        /* Calling set_brk effectively mmaps the pages that we need
 885         * for the bss and break sections.  We must do this before
 886         * mapping in the interpreter, to make sure it doesn't wind
 887         * up getting placed where the bss needs to go.
 888         */
 889        retval = set_brk(elf_bss, elf_brk);
 890        if (retval) {
 891                send_sig(SIGKILL, current, 0);
 892                goto out_free_dentry;
 893        }
 894        if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
 895                send_sig(SIGSEGV, current, 0);
 896                retval = -EFAULT; /* Nobody gets to see this, but.. */
 897                goto out_free_dentry;
 898        }
 899
 900        if (elf_interpreter) {
 901                unsigned long interp_map_addr = 0;
 902
 903                elf_entry = load_elf_interp(&loc->interp_elf_ex,
 904                                            interpreter,
 905                                            &interp_map_addr,
 906                                            load_bias);
 907                if (!IS_ERR((void *)elf_entry)) {
 908                        /*
 909                         * load_elf_interp() returns relocation
 910                         * adjustment
 911                         */
 912                        interp_load_addr = elf_entry;
 913                        elf_entry += loc->interp_elf_ex.e_entry;
 914                }
 915                if (BAD_ADDR(elf_entry)) {
 916                        force_sig(SIGSEGV, current);
 917                        retval = IS_ERR((void *)elf_entry) ?
 918                                        (int)elf_entry : -EINVAL;
 919                        goto out_free_dentry;
 920                }
 921                reloc_func_desc = interp_load_addr;
 922
 923                allow_write_access(interpreter);
 924                fput(interpreter);
 925                kfree(elf_interpreter);
 926        } else {
 927                elf_entry = loc->elf_ex.e_entry;
 928                if (BAD_ADDR(elf_entry)) {
 929                        force_sig(SIGSEGV, current);
 930                        retval = -EINVAL;
 931                        goto out_free_dentry;
 932                }
 933        }
 934
 935        kfree(elf_phdata);
 936
 937        set_binfmt(&elf_format);
 938
 939#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 940        retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
 941        if (retval < 0) {
 942                send_sig(SIGKILL, current, 0);
 943                goto out;
 944        }
 945#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 946
 947        install_exec_creds(bprm);
 948        retval = create_elf_tables(bprm, &loc->elf_ex,
 949                          load_addr, interp_load_addr);
 950        if (retval < 0) {
 951                send_sig(SIGKILL, current, 0);
 952                goto out;
 953        }
 954        /* N.B. passed_fileno might not be initialized? */
 955        current->mm->end_code = end_code;
 956        current->mm->start_code = start_code;
 957        current->mm->start_data = start_data;
 958        current->mm->end_data = end_data;
 959        current->mm->start_stack = bprm->p;
 960
 961#ifdef arch_randomize_brk
 962        if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
 963                current->mm->brk = current->mm->start_brk =
 964                        arch_randomize_brk(current->mm);
 965#ifdef CONFIG_COMPAT_BRK
 966                current->brk_randomized = 1;
 967#endif
 968        }
 969#endif
 970
 971        if (current->personality & MMAP_PAGE_ZERO) {
 972                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
 973                   and some applications "depend" upon this behavior.
 974                   Since we do not have the power to recompile these, we
 975                   emulate the SVr4 behavior. Sigh. */
 976                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 977                                MAP_FIXED | MAP_PRIVATE, 0);
 978        }
 979
 980#ifdef ELF_PLAT_INIT
 981        /*
 982         * The ABI may specify that certain registers be set up in special
 983         * ways (on i386 %edx is the address of a DT_FINI function, for
 984         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
 985         * that the e_entry field is the address of the function descriptor
 986         * for the startup routine, rather than the address of the startup
 987         * routine itself.  This macro performs whatever initialization to
 988         * the regs structure is required as well as any relocations to the
 989         * function descriptor entries when executing dynamically links apps.
 990         */
 991        ELF_PLAT_INIT(regs, reloc_func_desc);
 992#endif
 993
 994        start_thread(regs, elf_entry, bprm->p);
 995        retval = 0;
 996out:
 997        kfree(loc);
 998out_ret:
 999        return retval;
1000
1001        /* error cleanup */
1002out_free_dentry:
1003        allow_write_access(interpreter);
1004        if (interpreter)
1005                fput(interpreter);
1006out_free_interp:
1007        kfree(elf_interpreter);
1008out_free_ph:
1009        kfree(elf_phdata);
1010        goto out;
1011}
1012
1013/* This is really simpleminded and specialized - we are loading an
1014   a.out library that is given an ELF header. */
1015static int load_elf_library(struct file *file)
1016{
1017        struct elf_phdr *elf_phdata;
1018        struct elf_phdr *eppnt;
1019        unsigned long elf_bss, bss, len;
1020        int retval, error, i, j;
1021        struct elfhdr elf_ex;
1022
1023        error = -ENOEXEC;
1024        retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1025        if (retval != sizeof(elf_ex))
1026                goto out;
1027
1028        if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1029                goto out;
1030
1031        /* First of all, some simple consistency checks */
1032        if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1033            !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1034                goto out;
1035
1036        /* Now read in all of the header information */
1037
1038        j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1039        /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1040
1041        error = -ENOMEM;
1042        elf_phdata = kmalloc(j, GFP_KERNEL);
1043        if (!elf_phdata)
1044                goto out;
1045
1046        eppnt = elf_phdata;
1047        error = -ENOEXEC;
1048        retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1049        if (retval != j)
1050                goto out_free_ph;
1051
1052        for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1053                if ((eppnt + i)->p_type == PT_LOAD)
1054                        j++;
1055        if (j != 1)
1056                goto out_free_ph;
1057
1058        while (eppnt->p_type != PT_LOAD)
1059                eppnt++;
1060
1061        /* Now use mmap to map the library into memory. */
1062        error = vm_mmap(file,
1063                        ELF_PAGESTART(eppnt->p_vaddr),
1064                        (eppnt->p_filesz +
1065                         ELF_PAGEOFFSET(eppnt->p_vaddr)),
1066                        PROT_READ | PROT_WRITE | PROT_EXEC,
1067                        MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1068                        (eppnt->p_offset -
1069                         ELF_PAGEOFFSET(eppnt->p_vaddr)));
1070        if (error != ELF_PAGESTART(eppnt->p_vaddr))
1071                goto out_free_ph;
1072
1073        elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1074        if (padzero(elf_bss)) {
1075                error = -EFAULT;
1076                goto out_free_ph;
1077        }
1078
1079        len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1080                            ELF_MIN_ALIGN - 1);
1081        bss = eppnt->p_memsz + eppnt->p_vaddr;
1082        if (bss > len)
1083                vm_brk(len, bss - len);
1084        error = 0;
1085
1086out_free_ph:
1087        kfree(elf_phdata);
1088out:
1089        return error;
1090}
1091
1092#ifdef CONFIG_ELF_CORE
1093/*
1094 * ELF core dumper
1095 *
1096 * Modelled on fs/exec.c:aout_core_dump()
1097 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1098 */
1099
1100/*
1101 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1102 * that are useful for post-mortem analysis are included in every core dump.
1103 * In that way we ensure that the core dump is fully interpretable later
1104 * without matching up the same kernel and hardware config to see what PC values
1105 * meant. These special mappings include - vDSO, vsyscall, and other
1106 * architecture specific mappings
1107 */
1108static bool always_dump_vma(struct vm_area_struct *vma)
1109{
1110        /* Any vsyscall mappings? */
1111        if (vma == get_gate_vma(vma->vm_mm))
1112                return true;
1113        /*
1114         * arch_vma_name() returns non-NULL for special architecture mappings,
1115         * such as vDSO sections.
1116         */
1117        if (arch_vma_name(vma))
1118                return true;
1119
1120        return false;
1121}
1122
1123/*
1124 * Decide what to dump of a segment, part, all or none.
1125 */
1126static unsigned long vma_dump_size(struct vm_area_struct *vma,
1127                                   unsigned long mm_flags)
1128{
1129#define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1130
1131        /* always dump the vdso and vsyscall sections */
1132        if (always_dump_vma(vma))
1133                goto whole;
1134
1135        if (vma->vm_flags & VM_DONTDUMP)
1136                return 0;
1137
1138        /* Hugetlb memory check */
1139        if (vma->vm_flags & VM_HUGETLB) {
1140                if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1141                        goto whole;
1142                if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1143                        goto whole;
1144                return 0;
1145        }
1146
1147        /* Do not dump I/O mapped devices or special mappings */
1148        if (vma->vm_flags & VM_IO)
1149                return 0;
1150
1151        /* By default, dump shared memory if mapped from an anonymous file. */
1152        if (vma->vm_flags & VM_SHARED) {
1153                if (file_inode(vma->vm_file)->i_nlink == 0 ?
1154                    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1155                        goto whole;
1156                return 0;
1157        }
1158
1159        /* Dump segments that have been written to.  */
1160        if (vma->anon_vma && FILTER(ANON_PRIVATE))
1161                goto whole;
1162        if (vma->vm_file == NULL)
1163                return 0;
1164
1165        if (FILTER(MAPPED_PRIVATE))
1166                goto whole;
1167
1168        /*
1169         * If this looks like the beginning of a DSO or executable mapping,
1170         * check for an ELF header.  If we find one, dump the first page to
1171         * aid in determining what was mapped here.
1172         */
1173        if (FILTER(ELF_HEADERS) &&
1174            vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1175                u32 __user *header = (u32 __user *) vma->vm_start;
1176                u32 word;
1177                mm_segment_t fs = get_fs();
1178                /*
1179                 * Doing it this way gets the constant folded by GCC.
1180                 */
1181                union {
1182                        u32 cmp;
1183                        char elfmag[SELFMAG];
1184                } magic;
1185                BUILD_BUG_ON(SELFMAG != sizeof word);
1186                magic.elfmag[EI_MAG0] = ELFMAG0;
1187                magic.elfmag[EI_MAG1] = ELFMAG1;
1188                magic.elfmag[EI_MAG2] = ELFMAG2;
1189                magic.elfmag[EI_MAG3] = ELFMAG3;
1190                /*
1191                 * Switch to the user "segment" for get_user(),
1192                 * then put back what elf_core_dump() had in place.
1193                 */
1194                set_fs(USER_DS);
1195                if (unlikely(get_user(word, header)))
1196                        word = 0;
1197                set_fs(fs);
1198                if (word == magic.cmp)
1199                        return PAGE_SIZE;
1200        }
1201
1202#undef  FILTER
1203
1204        return 0;
1205
1206whole:
1207        return vma->vm_end - vma->vm_start;
1208}
1209
1210/* An ELF note in memory */
1211struct memelfnote
1212{
1213        const char *name;
1214        int type;
1215        unsigned int datasz;
1216        void *data;
1217};
1218
1219static int notesize(struct memelfnote *en)
1220{
1221        int sz;
1222
1223        sz = sizeof(struct elf_note);
1224        sz += roundup(strlen(en->name) + 1, 4);
1225        sz += roundup(en->datasz, 4);
1226
1227        return sz;
1228}
1229
1230#define DUMP_WRITE(addr, nr, foffset)   \
1231        do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1232
1233static int alignfile(struct file *file, loff_t *foffset)
1234{
1235        static const char buf[4] = { 0, };
1236        DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1237        return 1;
1238}
1239
1240static int writenote(struct memelfnote *men, struct file *file,
1241                        loff_t *foffset)
1242{
1243        struct elf_note en;
1244        en.n_namesz = strlen(men->name) + 1;
1245        en.n_descsz = men->datasz;
1246        en.n_type = men->type;
1247
1248        DUMP_WRITE(&en, sizeof(en), foffset);
1249        DUMP_WRITE(men->name, en.n_namesz, foffset);
1250        if (!alignfile(file, foffset))
1251                return 0;
1252        DUMP_WRITE(men->data, men->datasz, foffset);
1253        if (!alignfile(file, foffset))
1254                return 0;
1255
1256        return 1;
1257}
1258#undef DUMP_WRITE
1259
1260static void fill_elf_header(struct elfhdr *elf, int segs,
1261                            u16 machine, u32 flags)
1262{
1263        memset(elf, 0, sizeof(*elf));
1264
1265        memcpy(elf->e_ident, ELFMAG, SELFMAG);
1266        elf->e_ident[EI_CLASS] = ELF_CLASS;
1267        elf->e_ident[EI_DATA] = ELF_DATA;
1268        elf->e_ident[EI_VERSION] = EV_CURRENT;
1269        elf->e_ident[EI_OSABI] = ELF_OSABI;
1270
1271        elf->e_type = ET_CORE;
1272        elf->e_machine = machine;
1273        elf->e_version = EV_CURRENT;
1274        elf->e_phoff = sizeof(struct elfhdr);
1275        elf->e_flags = flags;
1276        elf->e_ehsize = sizeof(struct elfhdr);
1277        elf->e_phentsize = sizeof(struct elf_phdr);
1278        elf->e_phnum = segs;
1279
1280        return;
1281}
1282
1283static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1284{
1285        phdr->p_type = PT_NOTE;
1286        phdr->p_offset = offset;
1287        phdr->p_vaddr = 0;
1288        phdr->p_paddr = 0;
1289        phdr->p_filesz = sz;
1290        phdr->p_memsz = 0;
1291        phdr->p_flags = 0;
1292        phdr->p_align = 0;
1293        return;
1294}
1295
1296static void fill_note(struct memelfnote *note, const char *name, int type, 
1297                unsigned int sz, void *data)
1298{
1299        note->name = name;
1300        note->type = type;
1301        note->datasz = sz;
1302        note->data = data;
1303        return;
1304}
1305
1306/*
1307 * fill up all the fields in prstatus from the given task struct, except
1308 * registers which need to be filled up separately.
1309 */
1310static void fill_prstatus(struct elf_prstatus *prstatus,
1311                struct task_struct *p, long signr)
1312{
1313        prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1314        prstatus->pr_sigpend = p->pending.signal.sig[0];
1315        prstatus->pr_sighold = p->blocked.sig[0];
1316        rcu_read_lock();
1317        prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1318        rcu_read_unlock();
1319        prstatus->pr_pid = task_pid_vnr(p);
1320        prstatus->pr_pgrp = task_pgrp_vnr(p);
1321        prstatus->pr_sid = task_session_vnr(p);
1322        if (thread_group_leader(p)) {
1323                struct task_cputime cputime;
1324
1325                /*
1326                 * This is the record for the group leader.  It shows the
1327                 * group-wide total, not its individual thread total.
1328                 */
1329                thread_group_cputime(p, &cputime);
1330                cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1331                cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1332        } else {
1333                cputime_t utime, stime;
1334
1335                task_cputime(p, &utime, &stime);
1336                cputime_to_timeval(utime, &prstatus->pr_utime);
1337                cputime_to_timeval(stime, &prstatus->pr_stime);
1338        }
1339        cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1340        cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1341}
1342
1343static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1344                       struct mm_struct *mm)
1345{
1346        const struct cred *cred;
1347        unsigned int i, len;
1348        
1349        /* first copy the parameters from user space */
1350        memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1351
1352        len = mm->arg_end - mm->arg_start;
1353        if (len >= ELF_PRARGSZ)
1354                len = ELF_PRARGSZ-1;
1355        if (copy_from_user(&psinfo->pr_psargs,
1356                           (const char __user *)mm->arg_start, len))
1357                return -EFAULT;
1358        for(i = 0; i < len; i++)
1359                if (psinfo->pr_psargs[i] == 0)
1360                        psinfo->pr_psargs[i] = ' ';
1361        psinfo->pr_psargs[len] = 0;
1362
1363        rcu_read_lock();
1364        psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1365        rcu_read_unlock();
1366        psinfo->pr_pid = task_pid_vnr(p);
1367        psinfo->pr_pgrp = task_pgrp_vnr(p);
1368        psinfo->pr_sid = task_session_vnr(p);
1369
1370        i = p->state ? ffz(~p->state) + 1 : 0;
1371        psinfo->pr_state = i;
1372        psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1373        psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1374        psinfo->pr_nice = task_nice(p);
1375        psinfo->pr_flag = p->flags;
1376        rcu_read_lock();
1377        cred = __task_cred(p);
1378        SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1379        SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1380        rcu_read_unlock();
1381        strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1382        
1383        return 0;
1384}
1385
1386static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1387{
1388        elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1389        int i = 0;
1390        do
1391                i += 2;
1392        while (auxv[i - 2] != AT_NULL);
1393        fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1394}
1395
1396static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1397                siginfo_t *siginfo)
1398{
1399        mm_segment_t old_fs = get_fs();
1400        set_fs(KERNEL_DS);
1401        copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1402        set_fs(old_fs);
1403        fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1404}
1405
1406#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1407/*
1408 * Format of NT_FILE note:
1409 *
1410 * long count     -- how many files are mapped
1411 * long page_size -- units for file_ofs
1412 * array of [COUNT] elements of
1413 *   long start
1414 *   long end
1415 *   long file_ofs
1416 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1417 */
1418static void fill_files_note(struct memelfnote *note)
1419{
1420        struct vm_area_struct *vma;
1421        unsigned count, size, names_ofs, remaining, n;
1422        user_long_t *data;
1423        user_long_t *start_end_ofs;
1424        char *name_base, *name_curpos;
1425
1426        /* *Estimated* file count and total data size needed */
1427        count = current->mm->map_count;
1428        size = count * 64;
1429
1430        names_ofs = (2 + 3 * count) * sizeof(data[0]);
1431 alloc:
1432        if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1433                goto err;
1434        size = round_up(size, PAGE_SIZE);
1435        data = vmalloc(size);
1436        if (!data)
1437                goto err;
1438
1439        start_end_ofs = data + 2;
1440        name_base = name_curpos = ((char *)data) + names_ofs;
1441        remaining = size - names_ofs;
1442        count = 0;
1443        for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1444                struct file *file;
1445                const char *filename;
1446
1447                file = vma->vm_file;
1448                if (!file)
1449                        continue;
1450                filename = d_path(&file->f_path, name_curpos, remaining);
1451                if (IS_ERR(filename)) {
1452                        if (PTR_ERR(filename) == -ENAMETOOLONG) {
1453                                vfree(data);
1454                                size = size * 5 / 4;
1455                                goto alloc;
1456                        }
1457                        continue;
1458                }
1459
1460                /* d_path() fills at the end, move name down */
1461                /* n = strlen(filename) + 1: */
1462                n = (name_curpos + remaining) - filename;
1463                remaining = filename - name_curpos;
1464                memmove(name_curpos, filename, n);
1465                name_curpos += n;
1466
1467                *start_end_ofs++ = vma->vm_start;
1468                *start_end_ofs++ = vma->vm_end;
1469                *start_end_ofs++ = vma->vm_pgoff;
1470                count++;
1471        }
1472
1473        /* Now we know exact count of files, can store it */
1474        data[0] = count;
1475        data[1] = PAGE_SIZE;
1476        /*
1477         * Count usually is less than current->mm->map_count,
1478         * we need to move filenames down.
1479         */
1480        n = current->mm->map_count - count;
1481        if (n != 0) {
1482                unsigned shift_bytes = n * 3 * sizeof(data[0]);
1483                memmove(name_base - shift_bytes, name_base,
1484                        name_curpos - name_base);
1485                name_curpos -= shift_bytes;
1486        }
1487
1488        size = name_curpos - (char *)data;
1489        fill_note(note, "CORE", NT_FILE, size, data);
1490 err: ;
1491}
1492
1493#ifdef CORE_DUMP_USE_REGSET
1494#include <linux/regset.h>
1495
1496struct elf_thread_core_info {
1497        struct elf_thread_core_info *next;
1498        struct task_struct *task;
1499        struct elf_prstatus prstatus;
1500        struct memelfnote notes[0];
1501};
1502
1503struct elf_note_info {
1504        struct elf_thread_core_info *thread;
1505        struct memelfnote psinfo;
1506        struct memelfnote signote;
1507        struct memelfnote auxv;
1508        struct memelfnote files;
1509        user_siginfo_t csigdata;
1510        size_t size;
1511        int thread_notes;
1512};
1513
1514/*
1515 * When a regset has a writeback hook, we call it on each thread before
1516 * dumping user memory.  On register window machines, this makes sure the
1517 * user memory backing the register data is up to date before we read it.
1518 */
1519static void do_thread_regset_writeback(struct task_struct *task,
1520                                       const struct user_regset *regset)
1521{
1522        if (regset->writeback)
1523                regset->writeback(task, regset, 1);
1524}
1525
1526#ifndef PR_REG_SIZE
1527#define PR_REG_SIZE(S) sizeof(S)
1528#endif
1529
1530#ifndef PRSTATUS_SIZE
1531#define PRSTATUS_SIZE(S) sizeof(S)
1532#endif
1533
1534#ifndef PR_REG_PTR
1535#define PR_REG_PTR(S) (&((S)->pr_reg))
1536#endif
1537
1538#ifndef SET_PR_FPVALID
1539#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1540#endif
1541
1542static int fill_thread_core_info(struct elf_thread_core_info *t,
1543                                 const struct user_regset_view *view,
1544                                 long signr, size_t *total)
1545{
1546        unsigned int i;
1547
1548        /*
1549         * NT_PRSTATUS is the one special case, because the regset data
1550         * goes into the pr_reg field inside the note contents, rather
1551         * than being the whole note contents.  We fill the reset in here.
1552         * We assume that regset 0 is NT_PRSTATUS.
1553         */
1554        fill_prstatus(&t->prstatus, t->task, signr);
1555        (void) view->regsets[0].get(t->task, &view->regsets[0],
1556                                    0, PR_REG_SIZE(t->prstatus.pr_reg),
1557                                    PR_REG_PTR(&t->prstatus), NULL);
1558
1559        fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1560                  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1561        *total += notesize(&t->notes[0]);
1562
1563        do_thread_regset_writeback(t->task, &view->regsets[0]);
1564
1565        /*
1566         * Each other regset might generate a note too.  For each regset
1567         * that has no core_note_type or is inactive, we leave t->notes[i]
1568         * all zero and we'll know to skip writing it later.
1569         */
1570        for (i = 1; i < view->n; ++i) {
1571                const struct user_regset *regset = &view->regsets[i];
1572                do_thread_regset_writeback(t->task, regset);
1573                if (regset->core_note_type && regset->get &&
1574                    (!regset->active || regset->active(t->task, regset))) {
1575                        int ret;
1576                        size_t size = regset->n * regset->size;
1577                        void *data = kmalloc(size, GFP_KERNEL);
1578                        if (unlikely(!data))
1579                                return 0;
1580                        ret = regset->get(t->task, regset,
1581                                          0, size, data, NULL);
1582                        if (unlikely(ret))
1583                                kfree(data);
1584                        else {
1585                                if (regset->core_note_type != NT_PRFPREG)
1586                                        fill_note(&t->notes[i], "LINUX",
1587                                                  regset->core_note_type,
1588                                                  size, data);
1589                                else {
1590                                        SET_PR_FPVALID(&t->prstatus, 1);
1591                                        fill_note(&t->notes[i], "CORE",
1592                                                  NT_PRFPREG, size, data);
1593                                }
1594                                *total += notesize(&t->notes[i]);
1595                        }
1596                }
1597        }
1598
1599        return 1;
1600}
1601
1602static int fill_note_info(struct elfhdr *elf, int phdrs,
1603                          struct elf_note_info *info,
1604                          siginfo_t *siginfo, struct pt_regs *regs)
1605{
1606        struct task_struct *dump_task = current;
1607        const struct user_regset_view *view = task_user_regset_view(dump_task);
1608        struct elf_thread_core_info *t;
1609        struct elf_prpsinfo *psinfo;
1610        struct core_thread *ct;
1611        unsigned int i;
1612
1613        info->size = 0;
1614        info->thread = NULL;
1615
1616        psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1617        if (psinfo == NULL) {
1618                info->psinfo.data = NULL; /* So we don't free this wrongly */
1619                return 0;
1620        }
1621
1622        fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1623
1624        /*
1625         * Figure out how many notes we're going to need for each thread.
1626         */
1627        info->thread_notes = 0;
1628        for (i = 0; i < view->n; ++i)
1629                if (view->regsets[i].core_note_type != 0)
1630                        ++info->thread_notes;
1631
1632        /*
1633         * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1634         * since it is our one special case.
1635         */
1636        if (unlikely(info->thread_notes == 0) ||
1637            unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1638                WARN_ON(1);
1639                return 0;
1640        }
1641
1642        /*
1643         * Initialize the ELF file header.
1644         */
1645        fill_elf_header(elf, phdrs,
1646                        view->e_machine, view->e_flags);
1647
1648        /*
1649         * Allocate a structure for each thread.
1650         */
1651        for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1652                t = kzalloc(offsetof(struct elf_thread_core_info,
1653                                     notes[info->thread_notes]),
1654                            GFP_KERNEL);
1655                if (unlikely(!t))
1656                        return 0;
1657
1658                t->task = ct->task;
1659                if (ct->task == dump_task || !info->thread) {
1660                        t->next = info->thread;
1661                        info->thread = t;
1662                } else {
1663                        /*
1664                         * Make sure to keep the original task at
1665                         * the head of the list.
1666                         */
1667                        t->next = info->thread->next;
1668                        info->thread->next = t;
1669                }
1670        }
1671
1672        /*
1673         * Now fill in each thread's information.
1674         */
1675        for (t = info->thread; t != NULL; t = t->next)
1676                if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1677                        return 0;
1678
1679        /*
1680         * Fill in the two process-wide notes.
1681         */
1682        fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1683        info->size += notesize(&info->psinfo);
1684
1685        fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1686        info->size += notesize(&info->signote);
1687
1688        fill_auxv_note(&info->auxv, current->mm);
1689        info->size += notesize(&info->auxv);
1690
1691        fill_files_note(&info->files);
1692        info->size += notesize(&info->files);
1693
1694        return 1;
1695}
1696
1697static size_t get_note_info_size(struct elf_note_info *info)
1698{
1699        return info->size;
1700}
1701
1702/*
1703 * Write all the notes for each thread.  When writing the first thread, the
1704 * process-wide notes are interleaved after the first thread-specific note.
1705 */
1706static int write_note_info(struct elf_note_info *info,
1707                           struct file *file, loff_t *foffset)
1708{
1709        bool first = 1;
1710        struct elf_thread_core_info *t = info->thread;
1711
1712        do {
1713                int i;
1714
1715                if (!writenote(&t->notes[0], file, foffset))
1716                        return 0;
1717
1718                if (first && !writenote(&info->psinfo, file, foffset))
1719                        return 0;
1720                if (first && !writenote(&info->signote, file, foffset))
1721                        return 0;
1722                if (first && !writenote(&info->auxv, file, foffset))
1723                        return 0;
1724                if (first && !writenote(&info->files, file, foffset))
1725                        return 0;
1726
1727                for (i = 1; i < info->thread_notes; ++i)
1728                        if (t->notes[i].data &&
1729                            !writenote(&t->notes[i], file, foffset))
1730                                return 0;
1731
1732                first = 0;
1733                t = t->next;
1734        } while (t);
1735
1736        return 1;
1737}
1738
1739static void free_note_info(struct elf_note_info *info)
1740{
1741        struct elf_thread_core_info *threads = info->thread;
1742        while (threads) {
1743                unsigned int i;
1744                struct elf_thread_core_info *t = threads;
1745                threads = t->next;
1746                WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1747                for (i = 1; i < info->thread_notes; ++i)
1748                        kfree(t->notes[i].data);
1749                kfree(t);
1750        }
1751        kfree(info->psinfo.data);
1752        vfree(info->files.data);
1753}
1754
1755#else
1756
1757/* Here is the structure in which status of each thread is captured. */
1758struct elf_thread_status
1759{
1760        struct list_head list;
1761        struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1762        elf_fpregset_t fpu;             /* NT_PRFPREG */
1763        struct task_struct *thread;
1764#ifdef ELF_CORE_COPY_XFPREGS
1765        elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1766#endif
1767        struct memelfnote notes[3];
1768        int num_notes;
1769};
1770
1771/*
1772 * In order to add the specific thread information for the elf file format,
1773 * we need to keep a linked list of every threads pr_status and then create
1774 * a single section for them in the final core file.
1775 */
1776static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1777{
1778        int sz = 0;
1779        struct task_struct *p = t->thread;
1780        t->num_notes = 0;
1781
1782        fill_prstatus(&t->prstatus, p, signr);
1783        elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1784        
1785        fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1786                  &(t->prstatus));
1787        t->num_notes++;
1788        sz += notesize(&t->notes[0]);
1789
1790        if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1791                                                                &t->fpu))) {
1792                fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1793                          &(t->fpu));
1794                t->num_notes++;
1795                sz += notesize(&t->notes[1]);
1796        }
1797
1798#ifdef ELF_CORE_COPY_XFPREGS
1799        if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1800                fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1801                          sizeof(t->xfpu), &t->xfpu);
1802                t->num_notes++;
1803                sz += notesize(&t->notes[2]);
1804        }
1805#endif  
1806        return sz;
1807}
1808
1809struct elf_note_info {
1810        struct memelfnote *notes;
1811        struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1812        struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1813        struct list_head thread_list;
1814        elf_fpregset_t *fpu;
1815#ifdef ELF_CORE_COPY_XFPREGS
1816        elf_fpxregset_t *xfpu;
1817#endif
1818        user_siginfo_t csigdata;
1819        int thread_status_size;
1820        int numnote;
1821};
1822
1823static int elf_note_info_init(struct elf_note_info *info)
1824{
1825        memset(info, 0, sizeof(*info));
1826        INIT_LIST_HEAD(&info->thread_list);
1827
1828        /* Allocate space for ELF notes */
1829        info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1830        if (!info->notes)
1831                return 0;
1832        info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1833        if (!info->psinfo)
1834                return 0;
1835        info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1836        if (!info->prstatus)
1837                return 0;
1838        info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1839        if (!info->fpu)
1840                return 0;
1841#ifdef ELF_CORE_COPY_XFPREGS
1842        info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1843        if (!info->xfpu)
1844                return 0;
1845#endif
1846        return 1;
1847}
1848
1849static int fill_note_info(struct elfhdr *elf, int phdrs,
1850                          struct elf_note_info *info,
1851                          siginfo_t *siginfo, struct pt_regs *regs)
1852{
1853        struct list_head *t;
1854
1855        if (!elf_note_info_init(info))
1856                return 0;
1857
1858        if (siginfo->si_signo) {
1859                struct core_thread *ct;
1860                struct elf_thread_status *ets;
1861
1862                for (ct = current->mm->core_state->dumper.next;
1863                                                ct; ct = ct->next) {
1864                        ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1865                        if (!ets)
1866                                return 0;
1867
1868                        ets->thread = ct->task;
1869                        list_add(&ets->list, &info->thread_list);
1870                }
1871
1872                list_for_each(t, &info->thread_list) {
1873                        int sz;
1874
1875                        ets = list_entry(t, struct elf_thread_status, list);
1876                        sz = elf_dump_thread_status(siginfo->si_signo, ets);
1877                        info->thread_status_size += sz;
1878                }
1879        }
1880        /* now collect the dump for the current */
1881        memset(info->prstatus, 0, sizeof(*info->prstatus));
1882        fill_prstatus(info->prstatus, current, siginfo->si_signo);
1883        elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1884
1885        /* Set up header */
1886        fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1887
1888        /*
1889         * Set up the notes in similar form to SVR4 core dumps made
1890         * with info from their /proc.
1891         */
1892
1893        fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1894                  sizeof(*info->prstatus), info->prstatus);
1895        fill_psinfo(info->psinfo, current->group_leader, current->mm);
1896        fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1897                  sizeof(*info->psinfo), info->psinfo);
1898
1899        fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1900        fill_auxv_note(info->notes + 3, current->mm);
1901        fill_files_note(info->notes + 4);
1902
1903        info->numnote = 5;
1904
1905        /* Try to dump the FPU. */
1906        info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1907                                                               info->fpu);
1908        if (info->prstatus->pr_fpvalid)
1909                fill_note(info->notes + info->numnote++,
1910                          "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1911#ifdef ELF_CORE_COPY_XFPREGS
1912        if (elf_core_copy_task_xfpregs(current, info->xfpu))
1913                fill_note(info->notes + info->numnote++,
1914                          "LINUX", ELF_CORE_XFPREG_TYPE,
1915                          sizeof(*info->xfpu), info->xfpu);
1916#endif
1917
1918        return 1;
1919}
1920
1921static size_t get_note_info_size(struct elf_note_info *info)
1922{
1923        int sz = 0;
1924        int i;
1925
1926        for (i = 0; i < info->numnote; i++)
1927                sz += notesize(info->notes + i);
1928
1929        sz += info->thread_status_size;
1930
1931        return sz;
1932}
1933
1934static int write_note_info(struct elf_note_info *info,
1935                           struct file *file, loff_t *foffset)
1936{
1937        int i;
1938        struct list_head *t;
1939
1940        for (i = 0; i < info->numnote; i++)
1941                if (!writenote(info->notes + i, file, foffset))
1942                        return 0;
1943
1944        /* write out the thread status notes section */
1945        list_for_each(t, &info->thread_list) {
1946                struct elf_thread_status *tmp =
1947                                list_entry(t, struct elf_thread_status, list);
1948
1949                for (i = 0; i < tmp->num_notes; i++)
1950                        if (!writenote(&tmp->notes[i], file, foffset))
1951                                return 0;
1952        }
1953
1954        return 1;
1955}
1956
1957static void free_note_info(struct elf_note_info *info)
1958{
1959        while (!list_empty(&info->thread_list)) {
1960                struct list_head *tmp = info->thread_list.next;
1961                list_del(tmp);
1962                kfree(list_entry(tmp, struct elf_thread_status, list));
1963        }
1964
1965        /* Free data allocated by fill_files_note(): */
1966        vfree(info->notes[4].data);
1967
1968        kfree(info->prstatus);
1969        kfree(info->psinfo);
1970        kfree(info->notes);
1971        kfree(info->fpu);
1972#ifdef ELF_CORE_COPY_XFPREGS
1973        kfree(info->xfpu);
1974#endif
1975}
1976
1977#endif
1978
1979static struct vm_area_struct *first_vma(struct task_struct *tsk,
1980                                        struct vm_area_struct *gate_vma)
1981{
1982        struct vm_area_struct *ret = tsk->mm->mmap;
1983
1984        if (ret)
1985                return ret;
1986        return gate_vma;
1987}
1988/*
1989 * Helper function for iterating across a vma list.  It ensures that the caller
1990 * will visit `gate_vma' prior to terminating the search.
1991 */
1992static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1993                                        struct vm_area_struct *gate_vma)
1994{
1995        struct vm_area_struct *ret;
1996
1997        ret = this_vma->vm_next;
1998        if (ret)
1999                return ret;
2000        if (this_vma == gate_vma)
2001                return NULL;
2002        return gate_vma;
2003}
2004
2005static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2006                             elf_addr_t e_shoff, int segs)
2007{
2008        elf->e_shoff = e_shoff;
2009        elf->e_shentsize = sizeof(*shdr4extnum);
2010        elf->e_shnum = 1;
2011        elf->e_shstrndx = SHN_UNDEF;
2012
2013        memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2014
2015        shdr4extnum->sh_type = SHT_NULL;
2016        shdr4extnum->sh_size = elf->e_shnum;
2017        shdr4extnum->sh_link = elf->e_shstrndx;
2018        shdr4extnum->sh_info = segs;
2019}
2020
2021static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2022                                     unsigned long mm_flags)
2023{
2024        struct vm_area_struct *vma;
2025        size_t size = 0;
2026
2027        for (vma = first_vma(current, gate_vma); vma != NULL;
2028             vma = next_vma(vma, gate_vma))
2029                size += vma_dump_size(vma, mm_flags);
2030        return size;
2031}
2032
2033/*
2034 * Actual dumper
2035 *
2036 * This is a two-pass process; first we find the offsets of the bits,
2037 * and then they are actually written out.  If we run out of core limit
2038 * we just truncate.
2039 */
2040static int elf_core_dump(struct coredump_params *cprm)
2041{
2042        int has_dumped = 0;
2043        mm_segment_t fs;
2044        int segs;
2045        size_t size = 0;
2046        struct vm_area_struct *vma, *gate_vma;
2047        struct elfhdr *elf = NULL;
2048        loff_t offset = 0, dataoff, foffset;
2049        struct elf_note_info info;
2050        struct elf_phdr *phdr4note = NULL;
2051        struct elf_shdr *shdr4extnum = NULL;
2052        Elf_Half e_phnum;
2053        elf_addr_t e_shoff;
2054
2055        /*
2056         * We no longer stop all VM operations.
2057         * 
2058         * This is because those proceses that could possibly change map_count
2059         * or the mmap / vma pages are now blocked in do_exit on current
2060         * finishing this core dump.
2061         *
2062         * Only ptrace can touch these memory addresses, but it doesn't change
2063         * the map_count or the pages allocated. So no possibility of crashing
2064         * exists while dumping the mm->vm_next areas to the core file.
2065         */
2066  
2067        /* alloc memory for large data structures: too large to be on stack */
2068        elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2069        if (!elf)
2070                goto out;
2071        /*
2072         * The number of segs are recored into ELF header as 16bit value.
2073         * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2074         */
2075        segs = current->mm->map_count;
2076        segs += elf_core_extra_phdrs();
2077
2078        gate_vma = get_gate_vma(current->mm);
2079        if (gate_vma != NULL)
2080                segs++;
2081
2082        /* for notes section */
2083        segs++;
2084
2085        /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2086         * this, kernel supports extended numbering. Have a look at
2087         * include/linux/elf.h for further information. */
2088        e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2089
2090        /*
2091         * Collect all the non-memory information about the process for the
2092         * notes.  This also sets up the file header.
2093         */
2094        if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2095                goto cleanup;
2096
2097        has_dumped = 1;
2098
2099        fs = get_fs();
2100        set_fs(KERNEL_DS);
2101
2102        offset += sizeof(*elf);                         /* Elf header */
2103        offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2104        foffset = offset;
2105
2106        /* Write notes phdr entry */
2107        {
2108                size_t sz = get_note_info_size(&info);
2109
2110                sz += elf_coredump_extra_notes_size();
2111
2112                phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2113                if (!phdr4note)
2114                        goto end_coredump;
2115
2116                fill_elf_note_phdr(phdr4note, sz, offset);
2117                offset += sz;
2118        }
2119
2120        dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2121
2122        offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2123        offset += elf_core_extra_data_size();
2124        e_shoff = offset;
2125
2126        if (e_phnum == PN_XNUM) {
2127                shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2128                if (!shdr4extnum)
2129                        goto end_coredump;
2130                fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2131        }
2132
2133        offset = dataoff;
2134
2135        size += sizeof(*elf);
2136        if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2137                goto end_coredump;
2138
2139        size += sizeof(*phdr4note);
2140        if (size > cprm->limit
2141            || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2142                goto end_coredump;
2143
2144        /* Write program headers for segments dump */
2145        for (vma = first_vma(current, gate_vma); vma != NULL;
2146                        vma = next_vma(vma, gate_vma)) {
2147                struct elf_phdr phdr;
2148
2149                phdr.p_type = PT_LOAD;
2150                phdr.p_offset = offset;
2151                phdr.p_vaddr = vma->vm_start;
2152                phdr.p_paddr = 0;
2153                phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2154                phdr.p_memsz = vma->vm_end - vma->vm_start;
2155                offset += phdr.p_filesz;
2156                phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2157                if (vma->vm_flags & VM_WRITE)
2158                        phdr.p_flags |= PF_W;
2159                if (vma->vm_flags & VM_EXEC)
2160                        phdr.p_flags |= PF_X;
2161                phdr.p_align = ELF_EXEC_PAGESIZE;
2162
2163                size += sizeof(phdr);
2164                if (size > cprm->limit
2165                    || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2166                        goto end_coredump;
2167        }
2168
2169        if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2170                goto end_coredump;
2171
2172        /* write out the notes section */
2173        if (!write_note_info(&info, cprm->file, &foffset))
2174                goto end_coredump;
2175
2176        if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2177                goto end_coredump;
2178
2179        /* Align to page */
2180        if (!dump_seek(cprm->file, dataoff - foffset))
2181                goto end_coredump;
2182
2183        for (vma = first_vma(current, gate_vma); vma != NULL;
2184                        vma = next_vma(vma, gate_vma)) {
2185                unsigned long addr;
2186                unsigned long end;
2187
2188                end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2189
2190                for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2191                        struct page *page;
2192                        int stop;
2193
2194                        page = get_dump_page(addr);
2195                        if (page) {
2196                                void *kaddr = kmap(page);
2197                                stop = ((size += PAGE_SIZE) > cprm->limit) ||
2198                                        !dump_write(cprm->file, kaddr,
2199                                                    PAGE_SIZE);
2200                                kunmap(page);
2201                                page_cache_release(page);
2202                        } else
2203                                stop = !dump_seek(cprm->file, PAGE_SIZE);
2204                        if (stop)
2205                                goto end_coredump;
2206                }
2207        }
2208
2209        if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2210                goto end_coredump;
2211
2212        if (e_phnum == PN_XNUM) {
2213                size += sizeof(*shdr4extnum);
2214                if (size > cprm->limit
2215                    || !dump_write(cprm->file, shdr4extnum,
2216                                   sizeof(*shdr4extnum)))
2217                        goto end_coredump;
2218        }
2219
2220end_coredump:
2221        set_fs(fs);
2222
2223cleanup:
2224        free_note_info(&info);
2225        kfree(shdr4extnum);
2226        kfree(phdr4note);
2227        kfree(elf);
2228out:
2229        return has_dumped;
2230}
2231
2232#endif          /* CONFIG_ELF_CORE */
2233
2234static int __init init_elf_binfmt(void)
2235{
2236        register_binfmt(&elf_format);
2237        return 0;
2238}
2239
2240static void __exit exit_elf_binfmt(void)
2241{
2242        /* Remove the COFF and ELF loaders. */
2243        unregister_binfmt(&elf_format);
2244}
2245
2246core_initcall(init_elf_binfmt);
2247module_exit(exit_elf_binfmt);
2248MODULE_LICENSE("GPL");
2249